From 27484c5dfba345b516d37eb39b87e1a0d9977407 Mon Sep 17 00:00:00 2001 From: Nassim Oufattole Date: Sat, 14 Dec 2024 16:25:29 -0800 Subject: [PATCH 1/8] moved demo to the root project directory, should not be in src. Additionally added a stage for meds-transform mimiciv and eicu demo processing --- demo/aces.ipynb | 279 ++ demo/configs/event_configs.yaml | 595 ++++ demo/configs/extract_MIMIC.yaml | 42 + demo/configs/extract_eICU.yaml | 43 + demo/configs/table_preprocessors.yaml | 283 ++ demo/extract_meds_data.ipynb | 4467 +++++++++++++++++++++++++ demo/meds_cehrbert.py | 397 +++ demo/meds_tab.ipynb | 285 ++ demo/meds_tab.py | 240 ++ 9 files changed, 6631 insertions(+) create mode 100644 demo/aces.ipynb create mode 100644 demo/configs/event_configs.yaml create mode 100644 demo/configs/extract_MIMIC.yaml create mode 100644 demo/configs/extract_eICU.yaml create mode 100644 demo/configs/table_preprocessors.yaml create mode 100644 demo/extract_meds_data.ipynb create mode 100644 demo/meds_cehrbert.py create mode 100644 demo/meds_tab.ipynb create mode 100644 demo/meds_tab.py diff --git a/demo/aces.ipynb b/demo/aces.ipynb new file mode 100644 index 0000000..8d03cbf --- /dev/null +++ b/demo/aces.ipynb @@ -0,0 +1,279 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "PZmYRVX2W8m7" + }, + "source": [ + "# Using an example MEDS tool, ACES for labeling" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "collapsed": true, + "id": "z3_pG9YAWpKy", + "outputId": "efa4c286-413d-4a91-a53d-fb41769cd4f2" + }, + "outputs": [], + "source": [ + "#@title Install ACES\n", + "\n", + "\n", + "!pip install es-aces" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ROOT_DIR = \"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "H6fqe217XDhi" + }, + "outputs": [], + "source": [ + "# From the ACES documentation\n", + "\n", + "task_config = \"\"\"\n", + "description: >-\n", + " This file specifies the base configuration for the prediction of a hospital los being greater than 3days,\n", + " leveraging only the first 48 hours of data after admission, with a 24 hour gap between the input window\n", + " and the target window. Patients who die or are discharged in the gap window are excluded. Note that this\n", + " task is in-**hospital** los, not in-**ICU** los which is a different task.\n", + "\n", + "predicates:\n", + " hospital_admission:\n", + " code: {regex: \"HOSPITAL_ADMISSION//.*\"}\n", + " hospital_discharge:\n", + " code: {regex: \"HOSPITAL_DISCHARGE//.*\"}\n", + " death:\n", + " code: MEDS_DEATH\n", + " discharge_or_death:\n", + " expr: or(hospital_discharge, death)\n", + "\n", + "trigger: hospital_admission\n", + "\n", + "windows:\n", + " input:\n", + " start: NULL\n", + " end: trigger + 48h\n", + " start_inclusive: True\n", + " end_inclusive: True\n", + " index_timestamp: end\n", + " gap:\n", + " start: input.end\n", + " end: start + 24h\n", + " start_inclusive: False\n", + " end_inclusive: True\n", + " has:\n", + " hospital_admission: (None, 0)\n", + " discharge_or_death: (None, 0)\n", + " target:\n", + " start: trigger\n", + " end: start + 3d\n", + " start_inclusive: False\n", + " end_inclusive: True\n", + " label: discharge_or_death\n", + "\"\"\"\n", + "!mkdir /content/tasks/ -p\n", + "TASK_NAME = \"in_hospital_3d_los_after_48h\"\n", + "TASK_CONFIG_FP = f\"/content/tasks/{TASK_NAME}.yaml\"\n", + "with open(TASK_CONFIG_FP, 'w') as f:\n", + " f.write(task_config)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bXLiJGEry-Gb", + "outputId": "7d954ab4-cf5c-4d02-a99c-669b5822bf44" + }, + "outputs": [], + "source": [ + "!aces-cli --multirun data=sharded data.standard=meds data.root=\"$MIMICIV_MEDS_DIR/data\" \"data.shard=$(expand_shards /content/meds/data/)\" cohort_dir=\" /content/tasks\" cohort_name=\"$TASK_NAME\" config_path=\"$TASK_CONFIG_FP\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 497 + }, + "id": "7Vvac7DIWyRT", + "outputId": "40493f0e-48ba-4f5e-9d9a-401e26f1a9b7" + }, + "outputs": [], + "source": [ + "import polars as pl\n", + "\n", + "# execute query and get results\n", + "df = pl.read_parquet(f\"/content/tasks/{TASK_NAME}/**/*.parquet\")\n", + "\n", + "print(\"train prevalence: \" + str(round(pl.read_parquet(f\"/content/tasks/{TASK_NAME}/train/*.parquet\")['boolean_value'].mean(), 3)))\n", + "print(\"tuning prevalence: \" + str(round(pl.read_parquet(f\"/content/tasks/{TASK_NAME}/tuning/*.parquet\")['boolean_value'].mean(), 3)))\n", + "print(\"held_out prevalence: \" + str(round(pl.read_parquet(f\"/content/tasks/{TASK_NAME}/held_out/*.parquet\")['boolean_value'].mean(), 3)))\n", + "\n", + "\n", + "df.sort('boolean_value')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "XWB7O1UGhRIo", + "outputId": "e3416d5e-7427-4cf4-c0ab-20053a9d3430" + }, + "outputs": [], + "source": [ + "#@title Install meds-tab\n", + "\n", + "!pip uninstall es-aces -y\n", + "!pip install meds-tab" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SeGawIqli0nn" + }, + "outputs": [], + "source": [ + "MIMICIV_MEDS_DIR = \"/content/meds/\"\n", + "OUTPUT_TABULARIZATION_DIR=\"/content/tabularized/\"\n", + "TASK_DIR=\"/content/tasks/\"\n", + "TASK_NAME=\"in_hospital_3d_los_after_48h\"\n", + "OUTPUT_MODEL_DIR=\"/content/output/meds_tab/\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Tud0_0cgjljP", + "outputId": "fb3417e0-3ba4-4f9a-ab95-ce3ba8731ca1" + }, + "outputs": [], + "source": [ + "!meds-tab-describe input_dir={MIMICIV_MEDS_DIR}/data output_dir={OUTPUT_TABULARIZATION_DIR}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RVLBdOn1mnV5" + }, + "outputs": [], + "source": [ + "# Define the window sizes and aggregations to generate features for\n", + "WINDOW_SIZES = \"tabularization.window_sizes=[1d,30d,365d]\"\n", + "AGGREGATIONS = \"tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max]\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KhCPqBmduNYK" + }, + "outputs": [], + "source": [ + "!rm -rf /content/tabularized/tabularize/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "p_D07KzxjVUl", + "outputId": "8836b076-cf64-4f29-da81-ac5125ab7608" + }, + "outputs": [], + "source": [ + "!meds-tab-tabularize-static \"input_dir=$MIMICIV_MEDS_DIR/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "u-e-mV2Hk-Qf", + "outputId": "c292be12-ff74-44e4-f039-758e10ccc909" + }, + "outputs": [], + "source": [ + "!meds-tab-tabularize-time-series --multirun \"worker=range(0,2)\" \"hydra/launcher=joblib\" \"input_dir=$MIMICIV_MEDS_DIR/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NmaR_-Fik4eH" + }, + "outputs": [], + "source": [ + "!meds-tab-cache-task \"input_dir={MIMICIV_MEDS_DIR}/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" \"input_label_dir=$TASK_DIR/$TASK_NAME/\" \"task_name=$TASK_NAME\" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dLIkOzTblBB2" + }, + "outputs": [], + "source": [ + "!meds-tab-xgboost --multirun \"input_dir=$MIMICIV_MEDS_DIR/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" \"output_model_dir=$OUTPUT_MODEL_DIR/$TASK_NAME/\" \"task_name=$TASK_NAME\" do_overwrite=False \"hydra.sweeper.n_trials=10\" $WINDOW_SIZES $AGGREGATIONS \"tabularization.min_code_inclusion_count=10\"" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/demo/configs/event_configs.yaml b/demo/configs/event_configs.yaml new file mode 100644 index 0000000..3a270d1 --- /dev/null +++ b/demo/configs/event_configs.yaml @@ -0,0 +1,595 @@ +# Note that there is no "subject_id" for eICU -- patients are only differentiable during the course of a +# single health system stay. Accordingly, we set the "patient" id here as the "patientHealthSystemStayID" + +subject_id_col: patienthealthsystemstayid + +patient: + dob: + code: "MEDS_BIRTH" # This is the MEDS official code for BIRTH. + time: col(dateofbirth) + uniquepid: "uniquepid" + gender: + code: ["GENDER", "col(gender)"] + time: null + ethnicity: + code: ["ETHNICITY", "col(ethnicity)"] + time: null + hosp_admission: + code: + - "HOSPITAL_ADMISSION" + - col(hospitaladmitsource) + - col(hospitalregion) + - col(hospitalteachingstatus) + - col(hospitalnumbedscategory) + time: col(hospitaladmittimestamp) + hospital_id: "hospitalid" + hosp_discharge: + code: + - "HOSPITAL_DISCHARGE" + - col(hospitaldischargestatus) + - col(hospitaldischargelocation) + time: col(hospitaldischargetimestamp) + unit_admission: + code: + - "UNIT_ADMISSION" + - col(unitadmitsource) + - col(unitstaytype) + time: col(unitadmittimestamp) + ward_id: "wardid" + unit_stay_id: "patientunitstayid" + unit_admission_weight: + code: + - "UNIT_ADMISSION_WEIGHT" + time: col(unitadmittimestamp) + numeric_value: "unitadmissionweight" + unit_admission_height: + code: + - "UNIT_ADMISSION_HEIGHT" + time: col(unitadmittimestamp) + numeric_value: "unitadmissionheight" + unit_discharge: + code: + - "UNIT_DISCHARGE" + - col(unitdischargestatus) + - col(unitdischargelocation) + time: col(unitdischargetimestamp) + unit_discharge_weight: + code: + - "UNIT_DISCHARGE_WEIGHT" + time: col(unitdischargetimestamp) + numeric_value: "unitdischargeweight" + +admissionDx: + admission_diagnosis: + code: + - "ADMISSION_DX" + - col(admitdxname) + time: col(admitDxEnteredTimestamp) + admission_dx_id: "admissiondxid" + unit_stay_id: "patientunitstayid" + +allergy: + allergy: + code: + - "ALLERGY" + - col(allergytype) + - col(allergyname) + time: col(allergyEnteredTimestamp) + +carePlanGeneral: + cplItem: + code: + - "CAREPLAN_GENERAL" + - col(cplgroup) + - col(cplitemvalue) + time: col(carePlanGeneralItemEnteredTimestamp) + +carePlanEOL: + cplEolDiscussion: + code: + - "CAREPLAN_EOL" + time: col(carePlanEolDiscussionOccurredTimestamp) + +carePlanGoal: + cplGoal: + code: + - "CAREPLAN_GOAL" + - col(cplgoalcategory) + - col(cplgoalvalue) + - col(cplgoalstatus) + time: col(carePlanGoalEnteredTimestamp) + +carePlanInfectiousDisease: + cplInfectDisease: + code: + - "CAREPLAN_INFECTIOUS_DISEASE" + - col(infectdiseasesite) + - col(infectdiseaseassessment) + - col(treatment) + - col(responsetotherapy) + time: col(carePlanInfectDiseaseEnteredTimestamp) + +diagnosis: + diagnosis: + code: + - "ICD9CM" + - col(icd9code) + - col(diagnosispriority) + time: col(diagnosisEnteredTimestamp) + diagnosis_string: "diagnosisstring" + +lab: + lab: + code: + - "LAB" + - col(labmeasurenamesystem) + - col(labmeasurenameinterface) + - col(labname) + time: col(labResultDrawnTimestamp) + numeric_value: "labresult" + text_value: "labresulttext" + lab_type_id: "labtypeid" + +medication: + drug_ordered: + code: + - "MEDICATION" + - "ORDERED" + - col(drugname) + time: col(drugordertimestamp) + medication_id: "medicationid" + drug_iv_admixture: "drugivadmixture" + dosage: "dosage" + route_admin: "routeadmin" + frequency: "frequency" + loading_dose: "loadingdose" + prn: "prn" + gtc: "gtc" + drug_started: + code: + - "MEDICATION" + - "STARTED" + - col(drugname) + time: col(drugstarttimestamp) + medication_id: "medicationid" + drug_stopped: + code: + - "MEDICATION" + - "STOPPED" + - col(drugname) + time: col(drugstoptimestamp) + medication_id: "medicationid" + +nurseAssessment: + nurse_assessment_performed: + code: + - "NURSE_ASSESSMENT" + - "PERFORMED" + - NOT YET DONE + time: col(nurseAssessPerformedTimestamp) + nurse_assessment_id: "nurseassessid" + cell_label: "celllabel" + cell_attribute: "cellattribute" + cell_attribute_value: "cellattributevalue" + + nurse_assessment_entered: + code: + - "NURSE_ASSESSMENT" + - "ENTERED" + - NOT YET DONE + time: col(nurseAssessEnteredTimestamp) + nurse_assessment_id: "nurseassessid" + cell_label: "celllabel" + cell_attribute: "cellattribute" + cell_attribute_value: "cellattributevalue" + +nurseCare: + nurse_care_performed: + code: + - "NURSE_CARE" + - "PERFORMED" + - NOT YET DONE + time: col(nurseCarePerformedTimestamp) + nurse_care_id: "nursecareid" + cell_label: "celllabel" + cell_attribute: "cellattribute" + cell_attribute_value: "cellattributevalue" + + nurse_care_entered: + code: + - "NURSE_CARE" + - "ENTERED" + - NOT YET DONE + time: col(nurseCareEnteredTimestamp) + nurse_care_id: "nursecareid" + cell_label: "celllabel" + cell_attribute: "cellattribute" + cell_attribute_value: "cellattributevalue" + +nurseCharting: + nurse_charting_performed: + code: + - "NURSE_CHARTING" + - "PERFORMED" + - NOT YET DONE + time: col(nursingChartPerformedTimestamp) + nurse_charting_id: "nursingchartid" + cell_type_cat: "nursingchartcelltypecat" + cell_type_val_name: "nursingchartcelltypevalname" + cell_type_val_label: "nursingchartcelltypevallabel" + cell_value: "nursingchartvalue" + + nurse_charting_entered: + code: + - "NURSE_CHARTING" + - "ENTERED" + - NOT YET DONE + time: col(nursingChartEnteredTimestamp) + nurse_charting_id: "nursingchartid" + cell_type_cat: "nursingchartcelltypecat" + cell_type_val_name: "nursingchartcelltypevalname" + cell_type_val_label: "nursingchartcelltypevallabel" + cell_value: "nursingchartvalue" + +pastHistory: + past_history_taken: + code: + - "PAST_HISTORY" + - "TAKEN" + - NOT YET DONE + time: col(pastHistoryTakenTimestamp) + past_history_id: "pasthistoryid" + note_type: "pasthistorynotetype" + path: "pasthistorypath" + value: "pasthistoryvalue" + value_text: "pasthistoryvaluetext" + + past_history_entered: + code: + - "PAST_HISTORY" + - "ENTERED" + - NOT YET DONE + time: col(pastHistoryEnteredTimestamp) + past_history_id: "pasthistoryid" + note_type: "pasthistorynotetype" + path: "pasthistorypath" + value: "pasthistoryvalue" + value_text: "pasthistoryvaluetext" + +physicalExam: + physical_exam_entered: + code: + - "PHYSICAL_EXAM" + - "ENTERED" + - NOT YET DONE + time: col(physicalExamEnteredTimestamp) + physical_exam_id: "physicalexamid" + text: "physicalexamtext" + path: "physicalexampath" + value: "physicalexamvalue" + +respiratoryCare: + resp_care_status: + code: + - "RESP_CARE" + - "STATUS" + - NOT YET DONE + time: col(respCareStatusEnteredTimestamp) + resp_care_id: "respcareid" + airwaytype: "airwaytype" + airwaysize: "airwaysize" + airwayposition: "airwayposition" + cuffpressure: "cuffpressure" + lowexhmvlimit: "lowexhmvlimit" + hiexhmvlimit: "hiexhmvlimit" + lowexhtvlimit: "lowexhtvlimit" + hipeakpreslimit: "hipeakpreslimit" + lowpeakpreslimit: "lowpeakpreslimit" + hirespratelimit: "hirespratelimit" + lowrespratelimit: "lowrespratelimit" + sighpreslimit: "sighpreslimit" + lowironoxlimit: "lowironoxlimit" + highironoxlimit: "highironoxlimit" + meanairwaypreslimit: "meanairwaypreslimit" + peeplimit: "peeplimit" + cpaplimit: "cpaplimit" + setapneainterval: "setapneainterval" + setapneatv: "setapneatv" + setapneaippeephigh: "setapneaippeephigh" + setapnearr: "setapnearr" + setapneapeakflow: "setapneapeakflow" + setapneainsptime: "setapneainsptime" + setapneaie: "setapneaie" + setapneafio2: "setapneafio2" + + vent_start: + code: + - "VENT" + - "START" + - NOT YET DONE + time: col(ventStartTimestamp) + resp_care_id: "respcareid" + + vent_end: + code: + - "VENT" + - "END" + - NOT YET DONE + time: col(ventEndTimestamp) + resp_care_id: "respcareid" + +respiratoryCharting: + resp_charting_performed: + code: + - "RESP_CHARTING" + - "PERFORMED" + - NOT YET DONE + time: col(respChartPerformedTimestamp) + resp_chart_id: "respchartid" + type_cat: "respcharttypecat" + value_label: "respchartvaluelabel" + value: "respchartvalue" + + resp_charting_entered: + code: + - "RESP_CHARTING" + - "ENTERED" + - NOT YET DONE + time: col(respChartEnteredTimestamp) + resp_chart_id: "respchartid" + type_cat: "respcharttypecat" + value_label: "respchartvaluelabel" + value: "respchartvalue" + +treatment: + treatment: + code: + - "TREATMENT" + - "ENTERED" + - col(treatmentstring) + time: col(treatmentEnteredTimestamp) + treatment_id: "treatmentid" + +vitalAperiodic: + non_invasive_systolic: + code: + - "VITALS" + - "APERIODIC" + - "BP" + - "NONINVASIVE_SYSTOLIC" + time: col(observationEnteredTimestamp) + vital_id: "vitalaperiodicid" + numeric_value: "noninvasivesystolic" + non_invasive_diastolic: + code: + - "VITALS" + - "APERIODIC" + - "BP" + - "NONINVASIVE_DIASTOLIC" + time: col(observationEnteredTimestamp) + vital_id: "vitalaperiodicid" + numeric_value: "noninvasivediastolic" + + non_invasive_mean: + code: + - "VITALS" + - "APERIODIC" + - "BP" + - "NONINVASIVE_MEAN" + time: col(observationEnteredTimestamp) + vital_id: "vitalaperiodicid" + numeric_value: "noninvasivemean" + + paop: + code: + - "VITALS" + - "APERIODIC" + - "PAOP" + time: col(observationEnteredTimestamp) + vital_id: "vitalaperiodicid" + numeric_value: "paop" + + cardiac_output: + code: + - "VITALS" + - "APERIODIC" + - "CARDIAC_OUTPUT" + time: col(observationEnteredTimestamp) + vital_id: "vitalaperiodicid" + numeric_value: "cardiacoutput" + + cardiac_input: + code: + - "VITALS" + - "APERIODIC" + - "CARDIAC_INPUT" + time: col(observationEnteredTimestamp) + vital_id: "vitalaperiodicid" + numeric_value: "cardiacinput" + + svr: + code: + - "VITALS" + - "APERIODIC" + - "SVR" + time: col(observationEnteredTimestamp) + vital_id: "vitalaperiodicid" + numeric_value: "svr" + + svri: + code: + - "VITALS" + - "APERIODIC" + - "SVRI" + time: col(observationEnteredTimestamp) + vital_id: "vitalaperiodicid" + numeric_value: "svri" + + pvr: + code: + - "VITALS" + - "APERIODIC" + - "PVR" + time: col(observationEnteredTimestamp) + vital_id: "vitalaperiodicid" + numeric_value: "pvr" + + pvri: + code: + - "VITALS" + - "APERIODIC" + - "PVRI" + time: col(observationEnteredTimestamp) + vital_id: "vitalaperiodicid" + numeric_value: "pvri" + +vitalPeriodic: + temperature: + code: + - "VITALS" + - "PERIODIC" + - "TEMPERATURE" + time: col(observationEnteredTimestamp) + vital_id: "vitalperiodicid" + numeric_value: "temperature" + + saO2: + code: + - "VITALS" + - "PERIODIC" + - "SAO2" + time: col(observationEnteredTimestamp) + vital_id: "vitalperiodicid" + numeric_value: "sao2" + + heartRate: + code: + - "VITALS" + - "PERIODIC" + - "HEARTRATE" + time: col(observationEnteredTimestamp) + vital_id: "vitalperiodicid" + numeric_value: "heartrate" + + respiration: + code: + - "VITALS" + - "PERIODIC" + - "RESPIRATION" + time: col(observationEnteredTimestamp) + vital_id: "vitalperiodicid" + numeric_value: "respiration" + + cvp: + code: + - "VITALS" + - "PERIODIC" + - "CVP" + time: col(observationEnteredTimestamp) + vital_id: "vitalperiodicid" + numeric_value: "cvp" + + etCo2: + code: + - "VITALS" + - "PERIODIC" + - "ETCO2" + time: col(observationEnteredTimestamp) + vital_id: "vitalperiodicid" + numeric_value: "etco2" + + systemic_systolic: + code: + - "VITALS" + - "PERIODIC" + - "BP" + - "SYSTEMIC_SYSTOLIC" + time: col(observationEnteredTimestamp) + vital_id: "vitalperiodicid" + numeric_value: "systemicsystolic" + + systemic_diastolic: + code: + - "VITALS" + - "PERIODIC" + - "BP" + - "SYSTEMIC_DIASTOLIC" + time: col(observationEnteredTimestamp) + vital_id: "vitalperiodicid" + numeric_value: "systemicdiastolic" + + systemic_mean: + code: + - "VITALS" + - "PERIODIC" + - "BP" + - "SYSTEMIC_MEAN" + time: col(observationEnteredTimestamp) + vital_id: "vitalperiodicid" + numeric_value: "systemicmean" + + pa_systolic: + code: + - "VITALS" + - "PERIODIC" + - "BP" + - "PULM_ART_SYSTOLIC" + time: col(observationEnteredTimestamp) + vital_id: "vitalperiodicid" + numeric_value: "pasystolic" + + pa_diastolic: + code: + - "VITALS" + - "PERIODIC" + - "BP" + - "PULM_ART_DIASTOLIC" + time: col(observationEnteredTimestamp) + vital_id: "vitalperiodicid" + numeric_value: "padiastolic" + + pa_mean: + code: + - "VITALS" + - "PERIODIC" + - "BP" + - "PULM_ART_MEAN" + time: col(observationEnteredTimestamp) + vital_id: "vitalperiodicid" + numeric_value: "pamean" + + st1: + code: + - "VITALS" + - "PERIODIC" + - "ST1" + time: col(observationEnteredTimestamp) + vital_id: "vitalperiodicid" + numeric_value: "st1" + + st2: + code: + - "VITALS" + - "PERIODIC" + - "ST2" + time: col(observationEnteredTimestamp) + vital_id: "vitalperiodicid" + numeric_value: "st2" + + st3: + code: + - "VITALS" + - "PERIODIC" + - "ST3" + time: col(observationEnteredTimestamp) + vital_id: "vitalperiodicid" + numeric_value: "st3" + + ICP: + code: + - "VITALS" + - "PERIODIC" + - "ICP" + time: col(observationEnteredTimestamp) + vital_id: "vitalperiodicid" + numeric_value: "icp" diff --git a/demo/configs/extract_MIMIC.yaml b/demo/configs/extract_MIMIC.yaml new file mode 100644 index 0000000..53577f5 --- /dev/null +++ b/demo/configs/extract_MIMIC.yaml @@ -0,0 +1,42 @@ +defaults: + - _extract + - _self_ + +description: |- + This pipeline extracts the MIMIC-IV dataset in longitudinal, sparse form from an input dataset meeting + select criteria and converts them to the flattened, MEDS format. You can control the key arguments to this + pipeline by setting environment variables: + ```bash + export EVENT_CONVERSION_CONFIG_FP=# Path to your event conversion config + export MIMICIV_PRE_MEDS_DIR=# Path to the output dir of the pre-MEDS step + export MIMICIV_MEDS_COHORT_DIR=# Path to where you want the dataset to live + ``` + +# The event conversion configuration file is used throughout the pipeline to define the events to extract. +event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP} + +input_dir: ${oc.env:MIMICIV_PRE_MEDS_DIR} +cohort_dir: ${oc.env:MIMICIV_MEDS_COHORT_DIR} + +etl_metadata: + dataset_name: MIMIC-IV + dataset_version: 2.2 + +stage_configs: + shard_events: + infer_schema_length: 999999999 + split_and_shard_subjects: + n_subjects_per_shard: 1000 + split_fracs: + train: 0.5 + tuning: 0.25 + held_out: 0.25 + +stages: + - shard_events + - split_and_shard_subjects + - convert_to_sharded_events + - merge_to_MEDS_cohort + - extract_code_metadata + - finalize_MEDS_metadata + - finalize_MEDS_data diff --git a/demo/configs/extract_eICU.yaml b/demo/configs/extract_eICU.yaml new file mode 100644 index 0000000..66516d0 --- /dev/null +++ b/demo/configs/extract_eICU.yaml @@ -0,0 +1,43 @@ +defaults: + - _extract + - _self_ + +description: |- + This pipeline extracts the eICU dataset in longitudinal, sparse form from an input dataset meeting + select criteria and converts them to the flattened, MEDS format. You can control the key arguments to this + pipeline by setting environment variables: + ```bash + export EVENT_CONVERSION_CONFIG_FP=# Path to your event conversion config + export EICU_PRE_MEDS_DIR=# Path to the output dir of the pre-MEDS step + export EICU_MEDS_COHORT_DIR=# Path to where you want the dataset to live + ``` + +# The event conversion configuration file is used throughout the pipeline to define the events to extract. +event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP} + +input_dir: ${oc.env:EICU_PRE_MEDS_DIR} +cohort_dir: ${oc.env:EICU_MEDS_COHORT_DIR} + +etl_metadata: + dataset_name: eICU + dataset_version: 2.0 + +stage_configs: + shard_events: + infer_schema_length: 999999999 + split_and_shard_subjects: + n_subjects_per_shard: 10000 + split_fracs: + train: 0.5 + tuning: 0.25 + held_out: 0.25 + merge_to_MEDS_cohort: + unique_by: null + +stages: + - shard_events + - split_and_shard_subjects + - convert_to_sharded_events + - merge_to_MEDS_cohort + - finalize_MEDS_metadata + - finalize_MEDS_data diff --git a/demo/configs/table_preprocessors.yaml b/demo/configs/table_preprocessors.yaml new file mode 100644 index 0000000..2e15180 --- /dev/null +++ b/demo/configs/table_preprocessors.yaml @@ -0,0 +1,283 @@ +admissionDx: + offset_col: "admitdxenteredoffset" + pseudotime_col: "admitDxEnteredTimestamp" + output_data_cols: ["admitdxname", "admissiondxid"] + warning_items: + - "How should we use `admitdxtest`?" + - "How should we use `admitdxpath`?" + +allergy: + offset_col: "allergyenteredoffset" + pseudotime_col: "allergyEnteredTimestamp" + output_data_cols: ["allergytype", "allergyname"] + warning_items: + - "How should we use `allergyNoteType`?" + - "How should we use `specialtyType`?" + - "How should we use `userType`?" + - >- + Is `drugName` the name of the drug to which the patient is allergic or the drug given to the patient + (docs say 'name of the selected admission drug')? + +carePlanGeneral: + offset_col: "cplitemoffset" + pseudotime_col: "carePlanGeneralItemEnteredTimestamp" + output_data_cols: ["cplgroup", "cplitemvalue"] + +carePlanEOL: + offset_col: "cpleoldiscussionoffset" + pseudotime_col: "carePlanEolDiscussionOccurredTimestamp" + warning_items: + - "Is the DiscussionOffset time actually reliable? Should we fall back on the SaveOffset time?" + +carePlanGoal: + offset_col: "cplgoaloffset" + pseudotime_col: "carePlanGoalEnteredTimestamp" + output_data_cols: ["cplgoalcategory", "cplgoalvalue", "cplgoalstatus"] + +carePlanInfectiousDisease: + offset_col: "cplinfectdiseaseoffset" + pseudotime_col: "carePlanInfectDiseaseEnteredTimestamp" + output_data_cols: + - "infectdiseasesite" + - "infectdiseaseassessment" + - "responsetotherapy" + - "treatment" + +diagnosis: + offset_col: "diagnosisoffset" + pseudotime_col: "diagnosisEnteredTimestamp" + output_data_cols: ["icd9code", "diagnosispriority", "diagnosisstring"] + warning_items: + - "Though we use it, the `diagnosisString` field documentation is unclear -- by what is it separated?" + +infusionDrug: + offset_col: "infusionoffset" + pseudotime_col: "infusionEnteredTimestamp" + output_data_cols: + - "infusiondrugid" + - "drugname" + - "drugrate" + - "infusionrate" + - "drugamount" + - "volumeoffluid" + - "patientweight" + +lab: + offset_col: "labresultoffset" + pseudotime_col: "labResultDrawnTimestamp" + output_data_cols: + - "labname" + - "labresult" + - "labresulttext" + - "labmeasurenamesystem" + - "labmeasurenameinterface" + - "labtypeid" + warning_items: + - "Is this the time the lab was drawn? Entered? The time the result came in?" + - "We **IGNORE** the `labResultRevisedOffset` column -- this may be a mistake!" + +medication: + offset_col: + - "drugorderoffset" + - "drugstartoffset" + - "drugstopoffset" + pseudotime_col: + - "drugordertimestamp" + - "drugstarttimestamp" + - "drugstoptimestamp" + output_data_cols: + - "medicationid" + - "drugivadmixture" + - "drugname" + - "drughiclseqno" + - "dosage" + - "routeadmin" + - "frequency" + - "loadingdose" + - "prn" + - "gtc" + warning_items: + - "We **IGNORE** the `drugOrderCancelled` column -- this may be a mistake!" + +nurseAssessment: + offset_col: + - "nurseassessoffset" + - "nurseassessentryoffset" + pseudotime_col: + - "nurseAssessPerformedTimestamp" + - "nurseAssessEnteredTimestamp" + output_data_cols: + - "nurseassessid" + - "celllabel" + - "cellattribute" + - "cellattributevalue" + warning_items: + - "Should we be using `cellAttributePath` instead of `cellAttribute`?" + - "SOME MAY BE LISTS" + +nurseCare: + offset_col: + - "nursecareoffset" + - "nursecareentryoffset" + pseudotime_col: + - "nurseCarePerformedTimestamp" + - "nurseCareEnteredTimestamp" + output_data_cols: + - "nursecareid" + - "celllabel" + - "cellattribute" + - "cellattributevalue" + warning_items: + - "Should we be using `cellAttributePath` instead of `cellAttribute`?" + - "SOME MAY BE LISTS" + +nurseCharting: + offset_col: + - "nursingchartoffset" + - "nursingchartentryoffset" + pseudotime_col: + - "nursingChartPerformedTimestamp" + - "nursingChartEnteredTimestamp" + output_data_cols: + - "nursingchartid" + - "nursingchartcelltypecat" + - "nursingchartcelltypevalname" + - "nursingchartcelltypevallabel" + - "nursingchartvalue" + warning_items: + - "SOME MAY BE LISTS" + +pastHistory: + offset_col: + - "pasthistoryoffset" + - "pasthistoryenteredoffset" + pseudotime_col: + - "pastHistoryTakenTimestamp" + - "pastHistoryEnteredTimestamp" + output_data_cols: + - "pasthistoryid" + - "pasthistorynotetype" + - "pasthistorypath" + - "pasthistoryvalue" + - "pasthistoryvaluetext" + warning_items: + - "SOME MAY BE LISTS" + - "How should we use `pastHistoryPath` vs. `pastHistoryNoteType`?" + - "How should we use `pastHistoryValue` vs. `pastHistoryValueText`?" + +physicalExam: + offset_col: "physicalexamoffset" + pseudotime_col: "physicalExamEnteredTimestamp" + output_data_cols: + - "physicalexamid" + - "physicalexamtext" + - "physicalexampath" + - "physicalexamvalue" + warning_items: + - "How should we use `physicalExamValue` vs. `physicalExamText`?" + - "I believe the `physicalExamValue` is a **LIST**. This must be processed specially." + +respiratoryCare: + offset_col: + - "respcarestatusoffset" + - "ventstartoffset" + - "ventendoffset" + pseudotime_col: + - "respCareStatusEnteredTimestamp" + - "ventStartTimestamp" + - "ventEndTimestamp" + output_data_cols: + - "respcareid" + - "airwaytype" + - "airwaysize" + - "airwayposition" + - "cuffpressure" + - "lowexhmvlimit" + - "hiexhmvlimit" + - "lowexhtvlimit" + - "hipeakpreslimit" + - "lowpeakpreslimit" + - "hirespratelimit" + - "lowrespratelimit" + - "sighpreslimit" + - "lowironoxlimit" + - "highironoxlimit" + - "meanairwaypreslimit" + - "peeplimit" + - "cpaplimit" + - "setapneainterval" + - "setapneatv" + - "setapneaippeephigh" + - "setapnearr" + - "setapneapeakflow" + - "setapneainsptime" + - "setapneaie" + - "setapneafio2" + warning_items: + - "We ignore the `priorVent*` columns -- this may be a mistake!" + - "There is a lot of data in this table -- what should be incorporated into the event structure?" + - "We might be able to use `priorVent` timestamps to further refine true season of unit admission." + +respiratoryCharting: + offset_col: + - "respchartoffset" + - "respchartentryoffset" + pseudotime_col: + - "respChartPerformedTimestamp" + - "respChartEnteredTimestamp" + output_data_cols: + - "respchartid" + - "respcharttypecat" + - "respchartvaluelabel" + - "respchartvalue" + warning_items: + - "SOME MAY BE LISTS" + +treatment: + offset_col: "treatmentoffset" + pseudotime_col: "treatmentEnteredTimestamp" + output_data_cols: + - "treatmentid" + - "treatmentstring" + warning_items: + - "Absence of entries in table do not indicate absence of treatments" + +vitalAperiodic: + offset_col: "observationoffset" + pseudotime_col: "observationEnteredTimestamp" + output_data_cols: + - "vitalaperiodicid" + - "noninvasivesystolic" + - "noninvasivediastolic" + - "noninvasivemean" + - "paop" + - "cardiacoutput" + - "cardiacinput" + - "svr" + - "svri" + - "pvr" + - "pvri" + +vitalPeriodic: + offset_col: "observationoffset" + pseudotime_col: "observationEnteredTimestamp" + output_data_cols: + - "vitalperiodicid" + - "temperature" + - "sao2" + - "heartrate" + - "respiration" + - "cvp" + - "etco2" + - "systemicsystolic" + - "systemicdiastolic" + - "systemicmean" + - "pasystolic" + - "padiastolic" + - "pamean" + - "st1" + - "st2" + - "st3" + - "icp" + warning_items: + - "These are 5-minute median values. There are going to be a *lot* of events." diff --git a/demo/extract_meds_data.ipynb b/demo/extract_meds_data.ipynb new file mode 100644 index 0000000..55a228f --- /dev/null +++ b/demo/extract_meds_data.ipynb @@ -0,0 +1,4467 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "ikPVQZOnPcI0" + }, + "outputs": [], + "source": [ + "#@title Install dependencies\n", + "!pip -q install meds_etl==0.3.6 meds_transforms==0.0.9 es-aces==0.6.1" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": true, + "id": "rjqK4CuRPfnE" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/\n" + ] + } + ], + "source": [ + "#@title Download MIMIC-IV demo\n", + "import tempfile\n", + "import os\n", + "from pathlib import Path\n", + "temp_dir = tempfile.TemporaryDirectory()\n", + "notebook_dir = os.getcwd()\n", + "\n", + "ROOT_DIR=f\"{notebook_dir}/work_dir/mimiciv_demo/\"\n", + "Path(ROOT_DIR).mkdir(parents=True, exist_ok=True)\n", + "!echo {ROOT_DIR}" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "!wget -q -r -N -c --no-host-directories --cut-dirs=1 -np -P {ROOT_DIR}/raw_data https://physionet.org/files/mimic-iv-demo/2.2/" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "qHOBI1_5StBb", + "outputId": "eb0ef7ec-54c8-4cac-b1ff-d176c986a447" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cloning into '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tmp'...\n", + "remote: Enumerating objects: 144, done.\u001b[K\n", + "remote: Counting objects: 100% (144/144), done.\u001b[K\n", + "remote: Compressing objects: 100% (129/129), done.\u001b[K\n", + "remote: Total 144 (delta 22), reused 69 (delta 7), pack-reused 0 (from 0)\u001b[K\n", + "Receiving objects: 100% (144/144), 211.41 KiB | 979.00 KiB/s, done.\n", + "Resolving deltas: 100% (22/22), done.\n", + "usage: cp [-R [-H | -L | -P]] [-fi | -n] [-aclpSsvXx] source_file target_file\n", + " cp [-R [-H | -L | -P]] [-fi | -n] [-aclpSsvXx] source_file ... target_directory\n" + ] + } + ], + "source": [ + "# Download pre-meds script, event config (defining how raw data is converted to meds data), and meds-transform config\n", + "!mkdir {ROOT_DIR}/meds-transform/\n", + "!git clone --depth 1 https://github.com/mmcdermott/MEDS_transforms.git {ROOT_DIR}/tmp/\n", + "!mv {ROOT_DIR}/tmp/MIMIC-IV_Example {ROOT_DIR}/MIMIC-IV_Example\n", + "!cp {ROOT_DIR}/MIMIC-IV_Example/" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Sr2QdvNxpd0p", + "outputId": "7877300f-afc5-4583-95f2-e4f7089356b6" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2024-12-14 16:06:48-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/d_labitems_to_loinc.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 361048 (353K) [text/plain]\n", + "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/d_labitems_to_loinc.csv’\n", + "\n", + "/Users/sim/Document 100%[===================>] 352.59K 664KB/s in 0.5s \n", + "\n", + "2024-12-14 16:06:49 (664 KB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/d_labitems_to_loinc.csv’ saved [361048/361048]\n", + "\n", + "--2024-12-14 16:06:49-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/d_labitems_to_loinc.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 361048 (353K) [text/plain]\n", + "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//d_labitems_to_loinc.csv’\n", + "\n", + "/Users/sim/Document 100%[===================>] 352.59K 942KB/s in 0.4s \n", + "\n", + "2024-12-14 16:06:49 (942 KB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//d_labitems_to_loinc.csv’ saved [361048/361048]\n", + "\n", + "--2024-12-14 16:06:50-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/inputevents_to_rxnorm.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 79195 (77K) [text/plain]\n", + "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/inputevents_to_rxnorm.csv’\n", + "\n", + "/Users/sim/Document 100%[===================>] 77.34K --.-KB/s in 0.009s \n", + "\n", + "2024-12-14 16:06:50 (8.44 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/inputevents_to_rxnorm.csv’ saved [79195/79195]\n", + "\n", + "--2024-12-14 16:06:51-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/inputevents_to_rxnorm.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 79195 (77K) [text/plain]\n", + "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//inputevents_to_rxnorm.csv’\n", + "\n", + "/Users/sim/Document 100%[===================>] 77.34K --.-KB/s in 0.008s \n", + "\n", + "2024-12-14 16:06:51 (9.16 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//inputevents_to_rxnorm.csv’ saved [79195/79195]\n", + "\n", + "--2024-12-14 16:06:51-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/lab_itemid_to_loinc.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 79970 (78K) [text/plain]\n", + "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/lab_itemid_to_loinc.csv’\n", + "\n", + "/Users/sim/Document 100%[===================>] 78.10K --.-KB/s in 0.01s \n", + "\n", + "2024-12-14 16:06:51 (6.67 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/lab_itemid_to_loinc.csv’ saved [79970/79970]\n", + "\n", + "--2024-12-14 16:06:52-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/lab_itemid_to_loinc.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 79970 (78K) [text/plain]\n", + "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//lab_itemid_to_loinc.csv’\n", + "\n", + "/Users/sim/Document 100%[===================>] 78.10K --.-KB/s in 0.007s \n", + "\n", + "2024-12-14 16:06:52 (11.4 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//lab_itemid_to_loinc.csv’ saved [79970/79970]\n", + "\n", + "--2024-12-14 16:06:52-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/meas_chartevents_main.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 34862 (34K) [text/plain]\n", + "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/meas_chartevents_main.csv’\n", + "\n", + "/Users/sim/Document 100%[===================>] 34.04K --.-KB/s in 0.003s \n", + "\n", + "2024-12-14 16:06:52 (10.1 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/meas_chartevents_main.csv’ saved [34862/34862]\n", + "\n", + "--2024-12-14 16:06:53-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/meas_chartevents_main.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 34862 (34K) [text/plain]\n", + "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//meas_chartevents_main.csv’\n", + "\n", + "/Users/sim/Document 100%[===================>] 34.04K --.-KB/s in 0.002s \n", + "\n", + "2024-12-14 16:06:53 (17.4 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//meas_chartevents_main.csv’ saved [34862/34862]\n", + "\n", + "--2024-12-14 16:06:53-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/meas_chartevents_value.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 5902 (5.8K) [text/plain]\n", + "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/meas_chartevents_value.csv’\n", + "\n", + "/Users/sim/Document 100%[===================>] 5.76K --.-KB/s in 0.001s \n", + "\n", + "2024-12-14 16:06:54 (8.35 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/meas_chartevents_value.csv’ saved [5902/5902]\n", + "\n", + "--2024-12-14 16:06:54-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/meas_chartevents_value.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 5902 (5.8K) [text/plain]\n", + "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//meas_chartevents_value.csv’\n", + "\n", + "/Users/sim/Document 100%[===================>] 5.76K --.-KB/s in 0s \n", + "\n", + "2024-12-14 16:06:54 (49.4 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//meas_chartevents_value.csv’ saved [5902/5902]\n", + "\n", + "--2024-12-14 16:06:54-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/numerics-summary.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 32353 (32K) [text/plain]\n", + "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/numerics-summary.csv’\n", + "\n", + "/Users/sim/Document 100%[===================>] 31.59K --.-KB/s in 0.001s \n", + "\n", + "2024-12-14 16:06:55 (27.5 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/numerics-summary.csv’ saved [32353/32353]\n", + "\n", + "--2024-12-14 16:06:55-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/numerics-summary.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 32353 (32K) [text/plain]\n", + "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//numerics-summary.csv’\n", + "\n", + "/Users/sim/Document 100%[===================>] 31.59K --.-KB/s in 0.001s \n", + "\n", + "2024-12-14 16:06:55 (61.1 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//numerics-summary.csv’ saved [32353/32353]\n", + "\n", + "--2024-12-14 16:06:56-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/outputevents_to_loinc.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 34008 (33K) [text/plain]\n", + "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/outputevents_to_loinc.csv’\n", + "\n", + "/Users/sim/Document 100%[===================>] 33.21K --.-KB/s in 0.001s \n", + "\n", + "2024-12-14 16:06:56 (22.2 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/outputevents_to_loinc.csv’ saved [34008/34008]\n", + "\n", + "--2024-12-14 16:06:56-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/outputevents_to_loinc.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 34008 (33K) [text/plain]\n", + "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//outputevents_to_loinc.csv’\n", + "\n", + "/Users/sim/Document 100%[===================>] 33.21K --.-KB/s in 0s \n", + "\n", + "2024-12-14 16:06:56 (133 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//outputevents_to_loinc.csv’ saved [34008/34008]\n", + "\n", + "--2024-12-14 16:06:57-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/proc_datetimeevents.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 25205 (25K) [text/plain]\n", + "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/proc_datetimeevents.csv’\n", + "\n", + "/Users/sim/Document 100%[===================>] 24.61K --.-KB/s in 0.001s \n", + "\n", + "2024-12-14 16:06:57 (28.1 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/proc_datetimeevents.csv’ saved [25205/25205]\n", + "\n", + "--2024-12-14 16:06:57-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/proc_datetimeevents.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 25205 (25K) [text/plain]\n", + "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//proc_datetimeevents.csv’\n", + "\n", + "/Users/sim/Document 100%[===================>] 24.61K --.-KB/s in 0.006s \n", + "\n", + "2024-12-14 16:06:57 (3.71 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//proc_datetimeevents.csv’ saved [25205/25205]\n", + "\n", + "--2024-12-14 16:06:58-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/proc_itemid.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 21414 (21K) [text/plain]\n", + "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/proc_itemid.csv’\n", + "\n", + "/Users/sim/Document 100%[===================>] 20.91K --.-KB/s in 0.004s \n", + "\n", + "2024-12-14 16:06:58 (4.98 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/proc_itemid.csv’ saved [21414/21414]\n", + "\n", + "--2024-12-14 16:06:59-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/proc_itemid.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 21414 (21K) [text/plain]\n", + "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//proc_itemid.csv’\n", + "\n", + "/Users/sim/Document 100%[===================>] 20.91K --.-KB/s in 0.002s \n", + "\n", + "2024-12-14 16:06:59 (10.4 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//proc_itemid.csv’ saved [21414/21414]\n", + "\n", + "--2024-12-14 16:06:59-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/waveforms-summary.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 5743 (5.6K) [text/plain]\n", + "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/waveforms-summary.csv’\n", + "\n", + "/Users/sim/Document 100%[===================>] 5.61K --.-KB/s in 0s \n", + "\n", + "2024-12-14 16:06:59 (30.4 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/waveforms-summary.csv’ saved [5743/5743]\n", + "\n", + "--2024-12-14 16:07:00-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/waveforms-summary.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 5743 (5.6K) [text/plain]\n", + "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//waveforms-summary.csv’\n", + "\n", + "/Users/sim/Document 100%[===================>] 5.61K --.-KB/s in 0s \n", + "\n", + "2024-12-14 16:07:00 (37.3 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//waveforms-summary.csv’ saved [5743/5743]\n", + "\n" + ] + } + ], + "source": [ + "# download MIMIC IV metadata\n", + "MIMICIV_RAW_DIR = \"https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map\"\n", + "MIMICIV_PRE_MEDS_DIR = f\"{ROOT_DIR}/pre_meds/\"\n", + "!mkdir {MIMICIV_PRE_MEDS_DIR}\n", + "\n", + "OUTPUT_DIR = f\"{ROOT_DIR}/raw_data/mimic-iv-demo/2.2\"\n", + "\n", + "files = [\n", + " 'd_labitems_to_loinc.csv',\n", + " 'inputevents_to_rxnorm.csv',\n", + " 'lab_itemid_to_loinc.csv',\n", + " 'meas_chartevents_main.csv',\n", + " 'meas_chartevents_value.csv',\n", + " 'numerics-summary.csv',\n", + " 'outputevents_to_loinc.csv',\n", + " 'proc_datetimeevents.csv',\n", + " 'proc_itemid.csv',\n", + " 'waveforms-summary.csv'\n", + "]\n", + "\n", + "for file in files:\n", + " !wget -O {OUTPUT_DIR}/{file} {MIMICIV_RAW_DIR}/{file}\n", + " !wget -O {MIMICIV_PRE_MEDS_DIR}/{file} {MIMICIV_RAW_DIR}/{file}" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "!cp configs/extract_MIMIC.yaml {ROOT_DIR}/MIMIC-IV_Example/configs/." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "pQSLxYJhRPxm", + "outputId": "41ab56f5-512c-4489-adfc-614644c6c632" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//MIMIC-IV_Example\n", + "Unsetting SLURM_CPU_BIND in case you're running this on a slurm interactive node with slurm parallelism\n", + "Setting DO_UNZIP=true\n", + "Unzipping csv.gz files matching /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/*/*.csv.gz.\n", + "Running pre-MEDS conversion.\n", + "\u001b[32m2024-12-14 16:07:02.391\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/poe_detail.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.391\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/poe_detail: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/poe_detail.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/poe_detail.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.392\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/provider.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.392\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/provider: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/provider.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/provider.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.393\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.393\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/pharmacy: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.393\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m243\u001b[0m - \u001b[1mSkipping hosp/index @ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/index.html as no compatible dataframe file was found.\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.393\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/emar: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.394\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/microbiologyevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/microbiologyevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/microbiologyevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/microbiologyevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.395\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.395\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/labevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.395\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/admissions: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.396\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_labitems.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/d_labitems: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_labitems.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_labitems.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.396\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/prescriptions.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.397\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/prescriptions: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/prescriptions.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/prescriptions.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.397\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.397\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/procedures_icd: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.398\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/poe.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/poe: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/poe.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/poe.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.398\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_hcpcs.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/d_hcpcs: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_hcpcs.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_hcpcs.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.399\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.399\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/omr: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.399\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.400\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/transfers: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.400\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/diagnoses_icd.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.400\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/services.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.400\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/services: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/services.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/services.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.401\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.401\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/hcpcsevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.401\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/drgcodes.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.402\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/patients.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.402\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_icd_diagnoses.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.402\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_icd_procedures.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.403\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar_detail.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.403\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/emar_detail: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar_detail.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar_detail.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.403\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m243\u001b[0m - \u001b[1mSkipping icu/index @ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/index.html as no compatible dataframe file was found.\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.404\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/d_items.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.404\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/d_items: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/d_items.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/d_items.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.404\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/procedureevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.405\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/inputevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.406\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/datetimeevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.406\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/datetimeevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/datetimeevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/datetimeevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.406\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/ingredientevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.407\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/ingredientevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/ingredientevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/ingredientevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.407\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.407\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/chartevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.408\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/caregiver.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.408\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/caregiver: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/caregiver.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/caregiver.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.408\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.408\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/outputevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.409\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.409\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/icustays: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.409\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.409\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m300\u001b[0m - \u001b[1mLoading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv for manipulating other dataframes...\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.411\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m305\u001b[0m - \u001b[1m Loaded in 0:00:00.001291\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.411\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m311\u001b[0m - \u001b[1m Processing dependent df @ hosp/diagnoses_icd...\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.411\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1m Loading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/diagnoses_icd.csv...\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.411\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m317\u001b[0m - \u001b[1m Loaded in 0:00:00.000166\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.480\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m320\u001b[0m - \u001b[1m Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet in 0:00:00.068729\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.480\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m311\u001b[0m - \u001b[1m Processing dependent df @ hosp/drgcodes...\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.480\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1m Loading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/drgcodes.csv...\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.480\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m317\u001b[0m - \u001b[1m Loaded in 0:00:00.000264\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m320\u001b[0m - \u001b[1m Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet in 0:00:00.004873\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m311\u001b[0m - \u001b[1m Processing dependent df @ hosp/patients...\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1m Loading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/patients.csv...\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m317\u001b[0m - \u001b[1m Loaded in 0:00:00.000171\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.492\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m320\u001b[0m - \u001b[1m Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet in 0:00:00.007117\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.492\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_icd_diagnoses.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.492\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m334\u001b[0m - \u001b[1mProcessing hosp/d_icd_diagnoses...\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.580\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1m Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet in 0:00:00.087729\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.580\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_icd_procedures.csv\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.580\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m334\u001b[0m - \u001b[1mProcessing hosp/d_icd_procedures...\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.633\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1m Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet in 0:00:00.052624\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:02.633\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mDone! All dataframes processed and written to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds\u001b[0m\n", + "Setting N_WORKERS to 1 to avoid issues with the runners.\n", + "Running extraction pipeline.\n", + "\u001b[32m2024-12-14 16:07:03.167\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: shard_events\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:03.187\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-shard_events --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=shard_events\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:06.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:06.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 16:07:03.746 | INFO | MEDS_transforms.extract.shard_events:main:330 - Running with config:\n", + "input_dir: ${oc.env:MIMICIV_PRE_MEDS_DIR}\n", + "cohort_dir: ${oc.env:MIMICIV_MEDS_COHORT_DIR}\n", + "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", + " at the top of your specific pipeline\n", + "\n", + " configuration file.'\n", + "log_dir: ${stage_cfg.output_dir}/.logs\n", + "do_overwrite: false\n", + "seed: 1\n", + "stages:\n", + "- shard_events\n", + "- split_and_shard_subjects\n", + "- convert_to_sharded_events\n", + "- merge_to_MEDS_cohort\n", + "- extract_code_metadata\n", + "- finalize_MEDS_metadata\n", + "- finalize_MEDS_data\n", + "stage_configs:\n", + " shard_events:\n", + " row_chunksize: 200000000\n", + " infer_schema_length: 999999999\n", + " data_input_dir: ${input_dir}\n", + " split_and_shard_subjects:\n", + " is_metadata: true\n", + " output_dir: ${cohort_dir}/metadata\n", + " n_subjects_per_shard: 1000\n", + " external_splits_json_fp: null\n", + " split_fracs:\n", + " train: 0.8\n", + " tuning: 0.1\n", + " held_out: 0.1\n", + " convert_to_sharded_events:\n", + " do_dedup_text_and_numeric: true\n", + " merge_to_MEDS_cohort:\n", + " unique_by: '*'\n", + " additional_sort_by: null\n", + " extract_code_metadata:\n", + " is_metadata: true\n", + " description_separator: '\n", + "\n", + " '\n", + " finalize_MEDS_metadata:\n", + " is_metadata: true\n", + " do_retype: true\n", + " finalize_MEDS_data:\n", + " do_retype: true\n", + " split_fracs:\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", + "worker: 0\n", + "polling_time: 300\n", + "stage: shard_events\n", + "stage_cfg: ${oc.create:${populate_stage:${stage}, ${input_dir}, ${cohort_dir}, ${stages},\n", + " ${stage_configs}}}\n", + "etl_metadata:\n", + " pipeline_name: ???\n", + " dataset_name: MIMIC-IV\n", + " dataset_version: 2.2\n", + " package_name: ${get_package_name:}\n", + " package_version: ${get_package_version:}\n", + "etl_metadata.pipeline_name: extract\n", + "description: \"This pipeline extracts the MIMIC-IV dataset in longitudinal, sparse\\\n", + " \\ form from an input dataset meeting\\nselect criteria and converts them to the flattened,\\\n", + " \\ MEDS format. You can control the key arguments to this\\npipeline by setting environment\\\n", + " \\ variables:\\n```bash\\n export EVENT_CONVERSION_CONFIG_FP=# Path to your event\\\n", + " \\ conversion config\\n export MIMICIV_PRE_MEDS_DIR=# Path to the output dir of the\\\n", + " \\ pre-MEDS step\\n export MIMICIV_MEDS_COHORT_DIR=# Path to where you want the dataset\\\n", + " \\ to live\\n```\"\n", + "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", + "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", + "\n", + "Stage: shard_events\n", + "\n", + "Stage config:\n", + "row_chunksize: 200000000\n", + "infer_schema_length: 999999999\n", + "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds/\n", + "is_metadata: false\n", + "metadata_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/metadata\n", + "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events\n", + "reducer_output_dir: null\n", + "\n", + "2024-12-14 16:07:03.751 | INFO | MEDS_transforms.extract.shard_events:main:342 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs/event_configs.yaml to identify needed columns.\n", + "2024-12-14 16:07:03.781 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet as it is not specified in the event conversion configuration.\n", + "2024-12-14 16:07:03.782 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet as it is not specified in the event conversion configuration.\n", + "2024-12-14 16:07:03.782 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/microbiologyevents.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 16:07:03.783 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/caregiver.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 16:07:03.783 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/inputevents_to_rxnorm.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 16:07:03.783 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/services.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 16:07:03.783 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar_detail.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 16:07:03.783 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_datetimeevents.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 16:07:03.783 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/d_labitems_to_loinc.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 16:07:03.784 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/poe.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 16:07:03.784 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_hcpcs.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 16:07:03.784 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/numerics-summary.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 16:07:03.784 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_itemid.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 16:07:03.784 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/prescriptions.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 16:07:03.784 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/meas_chartevents_main.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 16:07:03.785 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/poe_detail.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 16:07:03.785 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/ingredientevents.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 16:07:03.785 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/datetimeevents.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 16:07:03.785 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/outputevents_to_loinc.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 16:07:03.785 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_labitems.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 16:07:03.785 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/meas_chartevents_value.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 16:07:03.786 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/d_items.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 16:07:03.786 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/provider.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 16:07:03.786 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/lab_itemid_to_loinc.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 16:07:03.786 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/waveforms-summary.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 16:07:03.787 | INFO | MEDS_transforms.extract.shard_events:main:368 - Starting event sub-sharding. Sub-sharding 16 files:\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv\n", + "2024-12-14 16:07:03.790 | INFO | MEDS_transforms.extract.shard_events:main:372 - Will read raw data from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/$IN_FILE.parquet and write sub-sharded data to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/$IN_FILE/$ROW_START-$ROW_END.parquet\n", + "2024-12-14 16:07:03.792 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents.\n", + "2024-12-14 16:07:03.792 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv to determine row count.\n", + "2024-12-14 16:07:03.794 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv as CSV with kwargs:\n", + " * infer_schema_length=999999999.\n", + "2024-12-14 16:07:03.795 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'stay_id', 'subject_id', 'value', 'valuenum', 'valueuom']\n", + "2024-12-14 16:07:04.301 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, stay_id, subject_id, value, valuenum, valueuom\n", + "2024-12-14 16:07:04.337 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 668862 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv.\n", + "2024-12-14 16:07:04.337 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 16:07:04.337 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv row-chunk [0-668862) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet.\n", + "2024-12-14 16:07:04.343 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:04.343324. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:04.345 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv\n", + "2024-12-14 16:07:04.345 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv as CSV with kwargs:\n", + " * infer_schema_length=999999999.\n", + "2024-12-14 16:07:04.345 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'stay_id', 'subject_id', 'value', 'valuenum', 'valueuom']\n", + "2024-12-14 16:07:05.156 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, stay_id, subject_id, value, valuenum, valueuom\n", + "2024-12-14 16:07:05.156 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:05.156 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 16:07:05.364 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:01.021167\n", + "2024-12-14 16:07:05.364 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/.[0-668862).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/.[0-668862).parquet_cache/locks/2024-12-14T16:07:04.343324.json\n", + "2024-12-14 16:07:05.367 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar.\n", + "2024-12-14 16:07:05.367 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv to determine row count.\n", + "2024-12-14 16:07:05.369 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv as CSV with kwargs:\n", + " * infer_schema_length=999999999.\n", + "2024-12-14 16:07:05.370 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'emar_id', 'emar_seq', 'event_txt', 'hadm_id', 'medication', 'subject_id']\n", + "2024-12-14 16:07:05.418 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, emar_id, emar_seq, event_txt, hadm_id, medication, subject_id\n", + "2024-12-14 16:07:05.422 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 35835 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv.\n", + "2024-12-14 16:07:05.422 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 16:07:05.422 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv row-chunk [0-35835) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet.\n", + "2024-12-14 16:07:05.425 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.425385. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:05.425 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv\n", + "2024-12-14 16:07:05.425 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv as CSV with kwargs:\n", + " * infer_schema_length=999999999.\n", + "2024-12-14 16:07:05.425 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'emar_id', 'emar_seq', 'event_txt', 'hadm_id', 'medication', 'subject_id']\n", + "2024-12-14 16:07:05.475 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, emar_id, emar_seq, event_txt, hadm_id, medication, subject_id\n", + "2024-12-14 16:07:05.475 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:05.475 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", + "2024-12-14 16:07:05.514 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.089048\n", + "2024-12-14 16:07:05.514 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/.[0-35835).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/.[0-35835).parquet_cache/locks/2024-12-14T16:07:05.425385.json\n", + "2024-12-14 16:07:05.517 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions.\n", + "2024-12-14 16:07:05.517 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv to determine row count.\n", + "2024-12-14 16:07:05.519 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv as CSV with kwargs:\n", + " * infer_schema_length=999999999.\n", + "2024-12-14 16:07:05.519 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['admission_location', 'admission_type', 'admittime', 'discharge_location', 'dischtime', 'edouttime', 'edregtime', 'hadm_id', 'insurance', 'language', 'marital_status', 'race', 'subject_id']\n", + "2024-12-14 16:07:05.520 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, admission_location, admission_type, admittime, discharge_location, dischtime, edouttime, edregtime, hadm_id, insurance, language, marital_status, race, subject_id\n", + "2024-12-14 16:07:05.521 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 275 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv.\n", + "2024-12-14 16:07:05.521 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 16:07:05.521 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv row-chunk [0-275) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet.\n", + "2024-12-14 16:07:05.523 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.523672. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:05.524 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv\n", + "2024-12-14 16:07:05.524 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv as CSV with kwargs:\n", + " * infer_schema_length=999999999.\n", + "2024-12-14 16:07:05.524 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['admission_location', 'admission_type', 'admittime', 'discharge_location', 'dischtime', 'edouttime', 'edregtime', 'hadm_id', 'insurance', 'language', 'marital_status', 'race', 'subject_id']\n", + "2024-12-14 16:07:05.524 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, admission_location, admission_type, admittime, discharge_location, dischtime, edouttime, edregtime, hadm_id, insurance, language, marital_status, race, subject_id\n", + "2024-12-14 16:07:05.524 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:05.525 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", + "2024-12-14 16:07:05.528 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.005136\n", + "2024-12-14 16:07:05.528 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/.[0-275).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/.[0-275).parquet_cache/locks/2024-12-14T16:07:05.523672.json\n", + "2024-12-14 16:07:05.531 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents.\n", + "2024-12-14 16:07:05.531 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv to determine row count.\n", + "2024-12-14 16:07:05.534 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv as CSV with kwargs:\n", + " * infer_schema_length=999999999.\n", + "2024-12-14 16:07:05.534 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'stay_id', 'subject_id', 'value', 'valueuom']\n", + "2024-12-14 16:07:05.544 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, stay_id, subject_id, value, valueuom\n", + "2024-12-14 16:07:05.546 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 9362 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv.\n", + "2024-12-14 16:07:05.546 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 16:07:05.546 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv row-chunk [0-9362) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet.\n", + "2024-12-14 16:07:05.548 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.548505. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:05.548 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv\n", + "2024-12-14 16:07:05.548 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv as CSV with kwargs:\n", + " * infer_schema_length=999999999.\n", + "2024-12-14 16:07:05.549 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'stay_id', 'subject_id', 'value', 'valueuom']\n", + "2024-12-14 16:07:05.559 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, stay_id, subject_id, value, valueuom\n", + "2024-12-14 16:07:05.559 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:05.560 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 16:07:05.583 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.034846\n", + "2024-12-14 16:07:05.583 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/.[0-9362).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/.[0-9362).parquet_cache/locks/2024-12-14T16:07:05.548505.json\n", + "2024-12-14 16:07:05.586 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents.\n", + "2024-12-14 16:07:05.586 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv to determine row count.\n", + "2024-12-14 16:07:05.588 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv as CSV with kwargs:\n", + " * infer_schema_length=999999999.\n", + "2024-12-14 16:07:05.588 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['endtime', 'hadm_id', 'itemid', 'starttime', 'stay_id', 'subject_id']\n", + "2024-12-14 16:07:05.592 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, endtime, hadm_id, itemid, starttime, stay_id, subject_id\n", + "2024-12-14 16:07:05.593 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 1468 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv.\n", + "2024-12-14 16:07:05.593 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 16:07:05.593 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv row-chunk [0-1468) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet.\n", + "2024-12-14 16:07:05.595 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.595809. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:05.596 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv\n", + "2024-12-14 16:07:05.596 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv as CSV with kwargs:\n", + " * infer_schema_length=999999999.\n", + "2024-12-14 16:07:05.596 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['endtime', 'hadm_id', 'itemid', 'starttime', 'stay_id', 'subject_id']\n", + "2024-12-14 16:07:05.600 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, endtime, hadm_id, itemid, starttime, stay_id, subject_id\n", + "2024-12-14 16:07:05.600 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:05.600 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 16:07:05.607 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.012174\n", + "2024-12-14 16:07:05.608 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/.[0-1468).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/.[0-1468).parquet_cache/locks/2024-12-14T16:07:05.595809.json\n", + "2024-12-14 16:07:05.610 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients.\n", + "2024-12-14 16:07:05.610 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet to determine row count.\n", + "2024-12-14 16:07:05.612 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 16:07:05.612 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 16:07:05.612 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['dod', 'gender', 'subject_id', 'year_of_birth']\n", + "2024-12-14 16:07:05.614 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, dod, gender, subject_id, year_of_birth\n", + "2024-12-14 16:07:05.617 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 100 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet.\n", + "2024-12-14 16:07:05.617 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 16:07:05.617 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet row-chunk [0-100) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet.\n", + "2024-12-14 16:07:05.620 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.619958. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:05.620 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet\n", + "2024-12-14 16:07:05.620 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 16:07:05.620 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 16:07:05.620 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['dod', 'gender', 'subject_id', 'year_of_birth']\n", + "2024-12-14 16:07:05.620 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, dod, gender, subject_id, year_of_birth\n", + "2024-12-14 16:07:05.620 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:05.620 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", + "2024-12-14 16:07:05.622 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.002234\n", + "2024-12-14 16:07:05.622 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/.[0-100).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/.[0-100).parquet_cache/locks/2024-12-14T16:07:05.619958.json\n", + "2024-12-14 16:07:05.624 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers.\n", + "2024-12-14 16:07:05.624 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv to determine row count.\n", + "2024-12-14 16:07:05.626 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv as CSV with kwargs:\n", + " * infer_schema_length=999999999.\n", + "2024-12-14 16:07:05.626 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['careunit', 'eventtype', 'hadm_id', 'intime', 'subject_id']\n", + "2024-12-14 16:07:05.628 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, careunit, eventtype, hadm_id, intime, subject_id\n", + "2024-12-14 16:07:05.628 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 1190 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv.\n", + "2024-12-14 16:07:05.628 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 16:07:05.628 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv row-chunk [0-1190) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet.\n", + "2024-12-14 16:07:05.631 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.631167. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:05.631 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv\n", + "2024-12-14 16:07:05.631 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv as CSV with kwargs:\n", + " * infer_schema_length=999999999.\n", + "2024-12-14 16:07:05.631 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['careunit', 'eventtype', 'hadm_id', 'intime', 'subject_id']\n", + "2024-12-14 16:07:05.632 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, careunit, eventtype, hadm_id, intime, subject_id\n", + "2024-12-14 16:07:05.632 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:05.633 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 16:07:05.636 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.005445\n", + "2024-12-14 16:07:05.636 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/.[0-1190).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/.[0-1190).parquet_cache/locks/2024-12-14T16:07:05.631167.json\n", + "2024-12-14 16:07:05.639 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd.\n", + "2024-12-14 16:07:05.639 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv to determine row count.\n", + "2024-12-14 16:07:05.641 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv as CSV with kwargs:\n", + " * infer_schema_length=999999999.\n", + "2024-12-14 16:07:05.641 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'hadm_id', 'icd_code', 'icd_version', 'subject_id']\n", + "2024-12-14 16:07:05.642 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, hadm_id, icd_code, icd_version, subject_id\n", + "2024-12-14 16:07:05.642 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 722 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv.\n", + "2024-12-14 16:07:05.642 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 16:07:05.642 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv row-chunk [0-722) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet.\n", + "2024-12-14 16:07:05.645 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.644982. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:05.645 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv\n", + "2024-12-14 16:07:05.645 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv as CSV with kwargs:\n", + " * infer_schema_length=999999999.\n", + "2024-12-14 16:07:05.645 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'hadm_id', 'icd_code', 'icd_version', 'subject_id']\n", + "2024-12-14 16:07:05.646 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, hadm_id, icd_code, icd_version, subject_id\n", + "2024-12-14 16:07:05.646 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:05.646 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 16:07:05.648 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.003102\n", + "2024-12-14 16:07:05.648 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/.[0-722).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/.[0-722).parquet_cache/locks/2024-12-14T16:07:05.644982.json\n", + "2024-12-14 16:07:05.650 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes.\n", + "2024-12-14 16:07:05.650 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet to determine row count.\n", + "2024-12-14 16:07:05.652 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 16:07:05.652 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 16:07:05.652 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['description', 'drg_code', 'drg_mortality', 'drg_severity', 'drg_type', 'hadm_discharge_time', 'hadm_id', 'subject_id']\n", + "2024-12-14 16:07:05.653 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, description, drg_code, drg_mortality, drg_severity, drg_type, hadm_discharge_time, hadm_id, subject_id\n", + "2024-12-14 16:07:05.653 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 454 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet.\n", + "2024-12-14 16:07:05.653 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 16:07:05.653 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet row-chunk [0-454) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet.\n", + "2024-12-14 16:07:05.655 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.655772. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:05.656 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet\n", + "2024-12-14 16:07:05.656 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 16:07:05.656 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 16:07:05.656 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['description', 'drg_code', 'drg_mortality', 'drg_severity', 'drg_type', 'hadm_discharge_time', 'hadm_id', 'subject_id']\n", + "2024-12-14 16:07:05.656 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, description, drg_code, drg_mortality, drg_severity, drg_type, hadm_discharge_time, hadm_id, subject_id\n", + "2024-12-14 16:07:05.656 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:05.656 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 16:07:05.658 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.002542\n", + "2024-12-14 16:07:05.658 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/.[0-454).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/.[0-454).parquet_cache/locks/2024-12-14T16:07:05.655772.json\n", + "2024-12-14 16:07:05.660 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy.\n", + "2024-12-14 16:07:05.661 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv to determine row count.\n", + "2024-12-14 16:07:05.663 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv as CSV with kwargs:\n", + " * infer_schema_length=999999999.\n", + "2024-12-14 16:07:05.663 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['doses_per_24_hrs', 'frequency', 'medication', 'poe_id', 'route', 'starttime', 'stoptime', 'subject_id']\n", + "2024-12-14 16:07:05.697 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, doses_per_24_hrs, frequency, medication, poe_id, route, starttime, stoptime, subject_id\n", + "2024-12-14 16:07:05.699 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 15306 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv.\n", + "2024-12-14 16:07:05.699 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 16:07:05.699 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv row-chunk [0-15306) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet.\n", + "2024-12-14 16:07:05.702 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.702392. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:05.702 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv\n", + "2024-12-14 16:07:05.702 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv as CSV with kwargs:\n", + " * infer_schema_length=999999999.\n", + "2024-12-14 16:07:05.702 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['doses_per_24_hrs', 'frequency', 'medication', 'poe_id', 'route', 'starttime', 'stoptime', 'subject_id']\n", + "2024-12-14 16:07:05.737 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, doses_per_24_hrs, frequency, medication, poe_id, route, starttime, stoptime, subject_id\n", + "2024-12-14 16:07:05.737 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:05.737 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 16:07:05.765 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.063267\n", + "2024-12-14 16:07:05.765 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/.[0-15306).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/.[0-15306).parquet_cache/locks/2024-12-14T16:07:05.702392.json\n", + "2024-12-14 16:07:05.768 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd.\n", + "2024-12-14 16:07:05.768 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet to determine row count.\n", + "2024-12-14 16:07:05.770 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 16:07:05.770 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 16:07:05.770 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['hadm_discharge_time', 'hadm_id', 'icd_code', 'icd_version', 'subject_id']\n", + "2024-12-14 16:07:05.770 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, hadm_discharge_time, hadm_id, icd_code, icd_version, subject_id\n", + "2024-12-14 16:07:05.771 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 4506 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet.\n", + "2024-12-14 16:07:05.771 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 16:07:05.771 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet row-chunk [0-4506) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet.\n", + "2024-12-14 16:07:05.773 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.773633. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:05.773 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet\n", + "2024-12-14 16:07:05.773 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 16:07:05.774 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 16:07:05.774 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['hadm_discharge_time', 'hadm_id', 'icd_code', 'icd_version', 'subject_id']\n", + "2024-12-14 16:07:05.774 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, hadm_discharge_time, hadm_id, icd_code, icd_version, subject_id\n", + "2024-12-14 16:07:05.774 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:05.774 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 16:07:05.776 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.003029\n", + "2024-12-14 16:07:05.776 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/.[0-4506).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/.[0-4506).parquet_cache/locks/2024-12-14T16:07:05.773633.json\n", + "2024-12-14 16:07:05.779 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents.\n", + "2024-12-14 16:07:05.779 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv to determine row count.\n", + "2024-12-14 16:07:05.781 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv as CSV with kwargs:\n", + " * infer_schema_length=999999999.\n", + "2024-12-14 16:07:05.781 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['amount', 'amountuom', 'endtime', 'hadm_id', 'itemid', 'linkorderid', 'ordercategorydescription', 'orderid', 'patientweight', 'rate', 'rateuom', 'starttime', 'statusdescription', 'stay_id', 'subject_id']\n", + "2024-12-14 16:07:05.842 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, amount, amountuom, endtime, hadm_id, itemid, linkorderid, ordercategorydescription, orderid, patientweight, rate, rateuom, starttime, statusdescription, stay_id, subject_id\n", + "2024-12-14 16:07:05.845 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 20404 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv.\n", + "2024-12-14 16:07:05.846 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 16:07:05.846 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv row-chunk [0-20404) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet.\n", + "2024-12-14 16:07:05.848 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.848754. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:05.849 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv\n", + "2024-12-14 16:07:05.849 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv as CSV with kwargs:\n", + " * infer_schema_length=999999999.\n", + "2024-12-14 16:07:05.849 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['amount', 'amountuom', 'endtime', 'hadm_id', 'itemid', 'linkorderid', 'ordercategorydescription', 'orderid', 'patientweight', 'rate', 'rateuom', 'starttime', 'statusdescription', 'stay_id', 'subject_id']\n", + "2024-12-14 16:07:05.910 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, amount, amountuom, endtime, hadm_id, itemid, linkorderid, ordercategorydescription, orderid, patientweight, rate, rateuom, starttime, statusdescription, stay_id, subject_id\n", + "2024-12-14 16:07:05.910 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:05.910 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 16:07:05.940 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.092049\n", + "2024-12-14 16:07:05.940 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/.[0-20404).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/.[0-20404).parquet_cache/locks/2024-12-14T16:07:05.848754.json\n", + "2024-12-14 16:07:05.944 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays.\n", + "2024-12-14 16:07:05.944 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv to determine row count.\n", + "2024-12-14 16:07:05.946 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv as CSV with kwargs:\n", + " * infer_schema_length=999999999.\n", + "2024-12-14 16:07:05.947 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['first_careunit', 'hadm_id', 'intime', 'last_careunit', 'outtime', 'stay_id', 'subject_id']\n", + "2024-12-14 16:07:05.947 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, first_careunit, hadm_id, intime, last_careunit, outtime, stay_id, subject_id\n", + "2024-12-14 16:07:05.947 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 140 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv.\n", + "2024-12-14 16:07:05.947 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 16:07:05.947 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv row-chunk [0-140) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet.\n", + "2024-12-14 16:07:05.950 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.950272. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:05.950 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv\n", + "2024-12-14 16:07:05.950 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv as CSV with kwargs:\n", + " * infer_schema_length=999999999.\n", + "2024-12-14 16:07:05.950 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['first_careunit', 'hadm_id', 'intime', 'last_careunit', 'outtime', 'stay_id', 'subject_id']\n", + "2024-12-14 16:07:05.951 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, first_careunit, hadm_id, intime, last_careunit, outtime, stay_id, subject_id\n", + "2024-12-14 16:07:05.951 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:05.951 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", + "2024-12-14 16:07:05.953 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.002913\n", + "2024-12-14 16:07:05.953 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/.[0-140).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/.[0-140).parquet_cache/locks/2024-12-14T16:07:05.950272.json\n", + "2024-12-14 16:07:05.955 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents.\n", + "2024-12-14 16:07:05.955 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv to determine row count.\n", + "2024-12-14 16:07:05.957 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv as CSV with kwargs:\n", + " * infer_schema_length=999999999.\n", + "2024-12-14 16:07:05.958 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'hadm_id', 'short_description', 'subject_id']\n", + "2024-12-14 16:07:05.958 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, hadm_id, short_description, subject_id\n", + "2024-12-14 16:07:05.958 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 61 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv.\n", + "2024-12-14 16:07:05.958 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 16:07:05.958 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv row-chunk [0-61) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet.\n", + "2024-12-14 16:07:05.960 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.960658. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:05.960 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv\n", + "2024-12-14 16:07:05.961 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv as CSV with kwargs:\n", + " * infer_schema_length=999999999.\n", + "2024-12-14 16:07:05.961 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'hadm_id', 'short_description', 'subject_id']\n", + "2024-12-14 16:07:05.961 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, hadm_id, short_description, subject_id\n", + "2024-12-14 16:07:05.961 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:05.961 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 16:07:05.962 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.001616\n", + "2024-12-14 16:07:05.962 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/.[0-61).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/.[0-61).parquet_cache/locks/2024-12-14T16:07:05.960658.json\n", + "2024-12-14 16:07:05.964 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr.\n", + "2024-12-14 16:07:05.965 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv to determine row count.\n", + "2024-12-14 16:07:05.967 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv as CSV with kwargs:\n", + " * infer_schema_length=999999999.\n", + "2024-12-14 16:07:05.967 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'result_name', 'result_value', 'subject_id']\n", + "2024-12-14 16:07:05.968 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, result_name, result_value, subject_id\n", + "2024-12-14 16:07:05.969 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 2964 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv.\n", + "2024-12-14 16:07:05.969 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 16:07:05.969 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv row-chunk [0-2964) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet.\n", + "2024-12-14 16:07:05.972 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.971901. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:05.972 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv\n", + "2024-12-14 16:07:05.972 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv as CSV with kwargs:\n", + " * infer_schema_length=999999999.\n", + "2024-12-14 16:07:05.972 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'result_name', 'result_value', 'subject_id']\n", + "2024-12-14 16:07:05.974 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, result_name, result_value, subject_id\n", + "2024-12-14 16:07:05.974 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:05.974 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", + "2024-12-14 16:07:05.978 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006211\n", + "2024-12-14 16:07:05.978 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/.[0-2964).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/.[0-2964).parquet_cache/locks/2024-12-14T16:07:05.971901.json\n", + "2024-12-14 16:07:05.980 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents.\n", + "2024-12-14 16:07:05.980 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv to determine row count.\n", + "2024-12-14 16:07:05.982 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv as CSV with kwargs:\n", + " * infer_schema_length=999999999.\n", + "2024-12-14 16:07:05.983 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'priority', 'subject_id', 'value', 'valuenum', 'valueuom']\n", + "2024-12-14 16:07:06.155 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, priority, subject_id, value, valuenum, valueuom\n", + "2024-12-14 16:07:06.161 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 107727 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv.\n", + "2024-12-14 16:07:06.161 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 16:07:06.161 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv row-chunk [0-107727) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet.\n", + "2024-12-14 16:07:06.164 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:06.164674. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:06.165 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv\n", + "2024-12-14 16:07:06.165 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv as CSV with kwargs:\n", + " * infer_schema_length=999999999.\n", + "2024-12-14 16:07:06.165 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'priority', 'subject_id', 'value', 'valuenum', 'valueuom']\n", + "2024-12-14 16:07:06.337 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, priority, subject_id, value, valuenum, valueuom\n", + "2024-12-14 16:07:06.337 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:06.338 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 16:07:06.384 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.220152\n", + "2024-12-14 16:07:06.384 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/.[0-107727).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/.[0-107727).parquet_cache/locks/2024-12-14T16:07:06.164674.json\n", + "2024-12-14 16:07:06.385 | INFO | MEDS_transforms.extract.shard_events:main:430 - Sub-sharding completed in 0:00:02.594945\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:06.447\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: split_and_shard_subjects\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:06.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-split_and_shard_subjects --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=split_and_shard_subjects\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:07.210\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:07.210\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 16:07:07.037 | INFO | MEDS_transforms.utils:stage_init:73 - Running split_and_shard_subjects with the following configuration:\n", + "input_dir: ${oc.env:MIMICIV_PRE_MEDS_DIR}\n", + "cohort_dir: ${oc.env:MIMICIV_MEDS_COHORT_DIR}\n", + "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", + " at the top of your specific pipeline\n", + "\n", + " configuration file.'\n", + "log_dir: ${stage_cfg.output_dir}/.logs\n", + "do_overwrite: false\n", + "seed: 1\n", + "stages:\n", + "- shard_events\n", + "- split_and_shard_subjects\n", + "- convert_to_sharded_events\n", + "- merge_to_MEDS_cohort\n", + "- extract_code_metadata\n", + "- finalize_MEDS_metadata\n", + "- finalize_MEDS_data\n", + "stage_configs:\n", + " shard_events:\n", + " row_chunksize: 200000000\n", + " infer_schema_length: 999999999\n", + " data_input_dir: ${input_dir}\n", + " split_and_shard_subjects:\n", + " is_metadata: true\n", + " output_dir: ${cohort_dir}/metadata\n", + " n_subjects_per_shard: 1000\n", + " external_splits_json_fp: null\n", + " split_fracs:\n", + " train: 0.8\n", + " tuning: 0.1\n", + " held_out: 0.1\n", + " convert_to_sharded_events:\n", + " do_dedup_text_and_numeric: true\n", + " merge_to_MEDS_cohort:\n", + " unique_by: '*'\n", + " additional_sort_by: null\n", + " extract_code_metadata:\n", + " is_metadata: true\n", + " description_separator: '\n", + "\n", + " '\n", + " finalize_MEDS_metadata:\n", + " is_metadata: true\n", + " do_retype: true\n", + " finalize_MEDS_data:\n", + " do_retype: true\n", + " split_fracs:\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", + "worker: 0\n", + "polling_time: 300\n", + "stage: split_and_shard_subjects\n", + "stage_cfg: ${oc.create:${populate_stage:${stage}, ${input_dir}, ${cohort_dir}, ${stages},\n", + " ${stage_configs}}}\n", + "etl_metadata:\n", + " pipeline_name: ???\n", + " dataset_name: MIMIC-IV\n", + " dataset_version: 2.2\n", + " package_name: ${get_package_name:}\n", + " package_version: ${get_package_version:}\n", + "etl_metadata.pipeline_name: extract\n", + "description: \"This pipeline extracts the MIMIC-IV dataset in longitudinal, sparse\\\n", + " \\ form from an input dataset meeting\\nselect criteria and converts them to the flattened,\\\n", + " \\ MEDS format. You can control the key arguments to this\\npipeline by setting environment\\\n", + " \\ variables:\\n```bash\\n export EVENT_CONVERSION_CONFIG_FP=# Path to your event\\\n", + " \\ conversion config\\n export MIMICIV_PRE_MEDS_DIR=# Path to the output dir of the\\\n", + " \\ pre-MEDS step\\n export MIMICIV_MEDS_COHORT_DIR=# Path to where you want the dataset\\\n", + " \\ to live\\n```\"\n", + "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", + "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", + "\n", + "2024-12-14 16:07:07.051 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "is_metadata: true\n", + "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//metadata\n", + "n_subjects_per_shard: 1000\n", + "external_splits_json_fp: null\n", + "split_fracs:\n", + " train: 0.8\n", + " tuning: 0.1\n", + " held_out: 0.1\n", + "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events\n", + "metadata_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/metadata\n", + "reducer_output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/split_and_shard_subjects\n", + "train_only: true\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events\n", + " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata\n", + " - metadata_input_dir: ❌ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/metadata\n", + "2024-12-14 16:07:07.051 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:215 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs/event_configs.yaml (needed for subject ID columns)\n", + "2024-12-14 16:07:07.082 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:219 - Event conversion config:\n", + "subject_id_col: subject_id\n", + "hosp/admissions:\n", + " ed_registration:\n", + " code: ED_REGISTRATION\n", + " time: col(edregtime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " ed_out:\n", + " code: ED_OUT\n", + " time: col(edouttime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " admission:\n", + " code:\n", + " - HOSPITAL_ADMISSION\n", + " - col(admission_type)\n", + " - col(admission_location)\n", + " time: col(admittime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " insurance: insurance\n", + " language: language\n", + " marital_status: marital_status\n", + " race: race\n", + " hadm_id: hadm_id\n", + " discharge:\n", + " code:\n", + " - HOSPITAL_DISCHARGE\n", + " - col(discharge_location)\n", + " time: col(dischtime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " hadm_id: hadm_id\n", + "hosp/diagnoses_icd:\n", + " diagnosis:\n", + " code:\n", + " - DIAGNOSIS\n", + " - ICD\n", + " - col(icd_version)\n", + " - col(icd_code)\n", + " hadm_id: hadm_id\n", + " time: col(hadm_discharge_time)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " _metadata:\n", + " hosp/d_icd_diagnoses:\n", + " description: long_title\n", + " parent_codes: ICD{icd_version}CM/{norm_icd_code}\n", + "hosp/drgcodes:\n", + " drg:\n", + " code:\n", + " - DRG\n", + " - col(drg_type)\n", + " - col(drg_code)\n", + " - col(description)\n", + " hadm_id: hadm_id\n", + " time: col(hadm_discharge_time)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " drg_severity: drg_severity\n", + " drg_mortality: drg_mortality\n", + "hosp/emar:\n", + " medication:\n", + " code:\n", + " - MEDICATION\n", + " - col(medication)\n", + " - col(event_txt)\n", + " time: col(charttime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " hadm_id: hadm_id\n", + " emar_id: emar_id\n", + " emar_seq: emar_seq\n", + "hosp/hcpcsevents:\n", + " hcpcs:\n", + " code:\n", + " - HCPCS\n", + " - col(short_description)\n", + " hadm_id: hadm_id\n", + " time: col(chartdate)\n", + " time_format: '%Y-%m-%d'\n", + " _metadata:\n", + " hosp/d_hcpcs:\n", + " description: long_description\n", + " possibly_cpt_code: code\n", + "hosp/labevents:\n", + " lab:\n", + " code:\n", + " - LAB\n", + " - col(itemid)\n", + " - col(valueuom)\n", + " hadm_id: hadm_id\n", + " time: col(charttime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " numeric_value: valuenum\n", + " text_value: value\n", + " priority: priority\n", + " _metadata:\n", + " d_labitems_to_loinc:\n", + " description:\n", + " - omop_concept_name\n", + " - label\n", + " itemid: itemid (omop_source_code)\n", + " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", + " valueuom: valueuom\n", + "hosp/omr:\n", + " omr:\n", + " code: col(result_name)\n", + " text_value: col(result_value)\n", + " time: col(chartdate)\n", + " time_format: '%Y-%m-%d'\n", + "hosp/patients:\n", + " gender:\n", + " code:\n", + " - GENDER\n", + " - col(gender)\n", + " time: null\n", + " dob:\n", + " code: MEDS_BIRTH\n", + " time: col(year_of_birth)\n", + " time_format: '%Y'\n", + " death:\n", + " code: MEDS_DEATH\n", + " time: col(dod)\n", + " time_format:\n", + " - '%Y-%m-%d %H:%M:%S'\n", + " - '%Y-%m-%d'\n", + "hosp/pharmacy:\n", + " medication_start:\n", + " code:\n", + " - MEDICATION\n", + " - START\n", + " - col(medication)\n", + " time: col(starttime)\n", + " route: route\n", + " frequency: frequency\n", + " doses_per_24_hrs: doses_per_24_hrs\n", + " poe_id: poe_id\n", + " time_format:\n", + " - '%Y-%m-%d %H:%M:%S'\n", + " - '%Y-%m-%d'\n", + " medication_stop:\n", + " code:\n", + " - MEDICATION\n", + " - STOP\n", + " - col(medication)\n", + " time: col(stoptime)\n", + " poe_id: poe_id\n", + " time_format:\n", + " - '%Y-%m-%d %H:%M:%S'\n", + " - '%Y-%m-%d'\n", + "hosp/procedures_icd:\n", + " procedure:\n", + " code:\n", + " - PROCEDURE\n", + " - ICD\n", + " - col(icd_version)\n", + " - col(icd_code)\n", + " hadm_id: hadm_id\n", + " time: col(chartdate)\n", + " time_format: '%Y-%m-%d'\n", + " _metadata:\n", + " hosp/d_icd_procedures:\n", + " description: long_title\n", + " parent_codes:\n", + " - ICD{icd_version}Proc/{norm_icd_code}:\n", + " icd_version: '9'\n", + " - ICD{icd_version}PCS/{norm_icd_code}:\n", + " icd_version: '10'\n", + "hosp/transfers:\n", + " transfer:\n", + " code:\n", + " - TRANSFER_TO\n", + " - col(eventtype)\n", + " - col(careunit)\n", + " time: col(intime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " hadm_id: hadm_id\n", + "icu/icustays:\n", + " icu_admission:\n", + " code:\n", + " - ICU_ADMISSION\n", + " - col(first_careunit)\n", + " time: col(intime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " hadm_id: hadm_id\n", + " icustay_id: stay_id\n", + " icu_discharge:\n", + " code:\n", + " - ICU_DISCHARGE\n", + " - col(last_careunit)\n", + " time: col(outtime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " hadm_id: hadm_id\n", + " icustay_id: stay_id\n", + "icu/chartevents:\n", + " event:\n", + " code:\n", + " - LAB\n", + " - col(itemid)\n", + " - col(valueuom)\n", + " time: col(charttime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " numeric_value: valuenum\n", + " text_value: value\n", + " hadm_id: hadm_id\n", + " icustay_id: stay_id\n", + " _metadata:\n", + " d_labitems_to_loinc:\n", + " description:\n", + " - omop_concept_name\n", + " - label\n", + " itemid: itemid (omop_source_code)\n", + " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", + " valueuom: valueuom\n", + "icu/procedureevents:\n", + " start:\n", + " code:\n", + " - PROCEDURE\n", + " - START\n", + " - col(itemid)\n", + " time: col(starttime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " hadm_id: hadm_id\n", + " icustay_id: stay_id\n", + " _metadata:\n", + " proc_datetimeevents:\n", + " description:\n", + " - omop_concept_name\n", + " - label\n", + " itemid: itemid (omop_source_code)\n", + " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", + " proc_itemid:\n", + " description:\n", + " - omop_concept_name\n", + " - label\n", + " itemid: itemid (omop_source_code)\n", + " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", + " end:\n", + " code:\n", + " - PROCEDURE\n", + " - END\n", + " - col(itemid)\n", + " time: col(endtime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " hadm_id: hadm_id\n", + " icustay_id: stay_id\n", + " _metadata:\n", + " proc_datetimeevents:\n", + " description:\n", + " - omop_concept_name\n", + " - label\n", + " itemid: itemid (omop_source_code)\n", + " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", + " proc_itemid:\n", + " description:\n", + " - omop_concept_name\n", + " - label\n", + " itemid: itemid (omop_source_code)\n", + " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", + "icu/inputevents:\n", + " input_start:\n", + " code:\n", + " - INFUSION_START\n", + " - col(itemid)\n", + " time: col(starttime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " hadm_id: hadm_id\n", + " icustay_id: stay_id\n", + " order_id: orderid\n", + " link_order_id: linkorderid\n", + " numeric_value: rate\n", + " unit: rateuom\n", + " ordercategorydescription: ordercategorydescription\n", + " _metadata:\n", + " inputevents_to_rxnorm:\n", + " description:\n", + " - omop_concept_name\n", + " - label\n", + " itemid: itemid (omop_source_code)\n", + " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", + " input_end:\n", + " code:\n", + " - INFUSION_END\n", + " - col(itemid)\n", + " time: col(endtime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " hadm_id: hadm_id\n", + " icustay_id: stay_id\n", + " order_id: orderid\n", + " link_order_id: linkorderid\n", + " numeric_value: amount\n", + " ordercategorydescription: ordercategorydescription\n", + " statusdescription: statusdescription\n", + " unit: amountuom\n", + " _metadata:\n", + " inputevents_to_rxnorm:\n", + " description:\n", + " - omop_concept_name\n", + " - label\n", + " itemid: itemid (omop_source_code)\n", + " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", + " subject_weight:\n", + " code:\n", + " - SUBJECT_WEIGHT_AT_INFUSION\n", + " - KG\n", + " time: col(starttime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " numeric_value: patientweight\n", + "icu/outputevents:\n", + " output:\n", + " code:\n", + " - SUBJECT_FLUID_OUTPUT\n", + " - col(itemid)\n", + " - col(valueuom)\n", + " time: col(charttime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " hadm_id: hadm_id\n", + " icustay_id: stay_id\n", + " numeric_value: value\n", + " _metadata:\n", + " outputevents_to_loinc:\n", + " description:\n", + " - omop_concept_name\n", + " - label\n", + " itemid: itemid (omop_source_code)\n", + " valueuom: unitname\n", + " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", + "\n", + "2024-12-14 16:07:07.083 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/admissions files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", + "2024-12-14 16:07:07.084 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/diagnoses_icd files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 16:07:07.084 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/drgcodes files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 16:07:07.084 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/emar files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", + "2024-12-14 16:07:07.085 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/hcpcsevents files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 16:07:07.085 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/labevents files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 16:07:07.085 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/omr files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", + "2024-12-14 16:07:07.085 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/patients files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", + "2024-12-14 16:07:07.085 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/pharmacy files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 16:07:07.086 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/procedures_icd files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 16:07:07.086 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/transfers files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 16:07:07.086 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/icustays files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", + "2024-12-14 16:07:07.086 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/chartevents files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 16:07:07.086 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/procedureevents files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 16:07:07.087 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/inputevents files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 16:07:07.087 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/outputevents files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 16:07:07.087 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:239 - Joining all subject IDs from 16 dataframes\n", + "2024-12-14 16:07:07.125 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:247 - Found 100 unique subject IDs of type int64\n", + "2024-12-14 16:07:07.128 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:262 - Sharding and splitting subjects\n", + "2024-12-14 16:07:07.166 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split train/0 has 80 subjects.\n", + "2024-12-14 16:07:07.166 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split tuning/0 has 10 subjects.\n", + "2024-12-14 16:07:07.166 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split held_out/0 has 10 subjects.\n", + "2024-12-14 16:07:07.166 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:273 - Writing sharded subjects to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata/.shards.json\n", + "2024-12-14 16:07:07.167 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:276 - Done writing sharded subjects\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:07.213\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: convert_to_sharded_events\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:07.221\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-convert_to_sharded_events --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=convert_to_sharded_events\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:08.742\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:08.743\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 16:07:07.767 | INFO | MEDS_transforms.utils:stage_init:73 - Running convert_to_sharded_events with the following configuration:\n", + "input_dir: ${oc.env:MIMICIV_PRE_MEDS_DIR}\n", + "cohort_dir: ${oc.env:MIMICIV_MEDS_COHORT_DIR}\n", + "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", + " at the top of your specific pipeline\n", + "\n", + " configuration file.'\n", + "log_dir: ${stage_cfg.output_dir}/.logs\n", + "do_overwrite: false\n", + "seed: 1\n", + "stages:\n", + "- shard_events\n", + "- split_and_shard_subjects\n", + "- convert_to_sharded_events\n", + "- merge_to_MEDS_cohort\n", + "- extract_code_metadata\n", + "- finalize_MEDS_metadata\n", + "- finalize_MEDS_data\n", + "stage_configs:\n", + " shard_events:\n", + " row_chunksize: 200000000\n", + " infer_schema_length: 999999999\n", + " data_input_dir: ${input_dir}\n", + " split_and_shard_subjects:\n", + " is_metadata: true\n", + " output_dir: ${cohort_dir}/metadata\n", + " n_subjects_per_shard: 1000\n", + " external_splits_json_fp: null\n", + " split_fracs:\n", + " train: 0.8\n", + " tuning: 0.1\n", + " held_out: 0.1\n", + " convert_to_sharded_events:\n", + " do_dedup_text_and_numeric: true\n", + " merge_to_MEDS_cohort:\n", + " unique_by: '*'\n", + " additional_sort_by: null\n", + " extract_code_metadata:\n", + " is_metadata: true\n", + " description_separator: '\n", + "\n", + " '\n", + " finalize_MEDS_metadata:\n", + " is_metadata: true\n", + " do_retype: true\n", + " finalize_MEDS_data:\n", + " do_retype: true\n", + " split_fracs:\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", + "worker: 0\n", + "polling_time: 300\n", + "stage: convert_to_sharded_events\n", + "stage_cfg: ${oc.create:${populate_stage:${stage}, ${input_dir}, ${cohort_dir}, ${stages},\n", + " ${stage_configs}}}\n", + "etl_metadata:\n", + " pipeline_name: ???\n", + " dataset_name: MIMIC-IV\n", + " dataset_version: 2.2\n", + " package_name: ${get_package_name:}\n", + " package_version: ${get_package_version:}\n", + "etl_metadata.pipeline_name: extract\n", + "description: \"This pipeline extracts the MIMIC-IV dataset in longitudinal, sparse\\\n", + " \\ form from an input dataset meeting\\nselect criteria and converts them to the flattened,\\\n", + " \\ MEDS format. You can control the key arguments to this\\npipeline by setting environment\\\n", + " \\ variables:\\n```bash\\n export EVENT_CONVERSION_CONFIG_FP=# Path to your event\\\n", + " \\ conversion config\\n export MIMICIV_PRE_MEDS_DIR=# Path to the output dir of the\\\n", + " \\ pre-MEDS step\\n export MIMICIV_MEDS_COHORT_DIR=# Path to where you want the dataset\\\n", + " \\ to live\\n```\"\n", + "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", + "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", + "\n", + "2024-12-14 16:07:07.780 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "do_dedup_text_and_numeric: true\n", + "is_metadata: false\n", + "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events\n", + "metadata_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/split_and_shard_subjects\n", + "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events\n", + "reducer_output_dir: null\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events\n", + " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events\n", + " - metadata_input_dir: ❌ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/split_and_shard_subjects\n", + "2024-12-14 16:07:07.781 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:769 - Starting event conversion.\n", + "2024-12-14 16:07:07.781 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:771 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs/event_configs.yaml\n", + "2024-12-14 16:07:07.811 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:773 - Event conversion config:\n", + "subject_id_col: subject_id\n", + "hosp/admissions:\n", + " ed_registration:\n", + " code: ED_REGISTRATION\n", + " time: col(edregtime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " ed_out:\n", + " code: ED_OUT\n", + " time: col(edouttime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " admission:\n", + " code:\n", + " - HOSPITAL_ADMISSION\n", + " - col(admission_type)\n", + " - col(admission_location)\n", + " time: col(admittime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " insurance: insurance\n", + " language: language\n", + " marital_status: marital_status\n", + " race: race\n", + " hadm_id: hadm_id\n", + " discharge:\n", + " code:\n", + " - HOSPITAL_DISCHARGE\n", + " - col(discharge_location)\n", + " time: col(dischtime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " hadm_id: hadm_id\n", + "hosp/diagnoses_icd:\n", + " diagnosis:\n", + " code:\n", + " - DIAGNOSIS\n", + " - ICD\n", + " - col(icd_version)\n", + " - col(icd_code)\n", + " hadm_id: hadm_id\n", + " time: col(hadm_discharge_time)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " _metadata:\n", + " hosp/d_icd_diagnoses:\n", + " description: long_title\n", + " parent_codes: ICD{icd_version}CM/{norm_icd_code}\n", + "hosp/drgcodes:\n", + " drg:\n", + " code:\n", + " - DRG\n", + " - col(drg_type)\n", + " - col(drg_code)\n", + " - col(description)\n", + " hadm_id: hadm_id\n", + " time: col(hadm_discharge_time)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " drg_severity: drg_severity\n", + " drg_mortality: drg_mortality\n", + "hosp/emar:\n", + " medication:\n", + " code:\n", + " - MEDICATION\n", + " - col(medication)\n", + " - col(event_txt)\n", + " time: col(charttime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " hadm_id: hadm_id\n", + " emar_id: emar_id\n", + " emar_seq: emar_seq\n", + "hosp/hcpcsevents:\n", + " hcpcs:\n", + " code:\n", + " - HCPCS\n", + " - col(short_description)\n", + " hadm_id: hadm_id\n", + " time: col(chartdate)\n", + " time_format: '%Y-%m-%d'\n", + " _metadata:\n", + " hosp/d_hcpcs:\n", + " description: long_description\n", + " possibly_cpt_code: code\n", + "hosp/labevents:\n", + " lab:\n", + " code:\n", + " - LAB\n", + " - col(itemid)\n", + " - col(valueuom)\n", + " hadm_id: hadm_id\n", + " time: col(charttime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " numeric_value: valuenum\n", + " text_value: value\n", + " priority: priority\n", + " _metadata:\n", + " d_labitems_to_loinc:\n", + " description:\n", + " - omop_concept_name\n", + " - label\n", + " itemid: itemid (omop_source_code)\n", + " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", + " valueuom: valueuom\n", + "hosp/omr:\n", + " omr:\n", + " code: col(result_name)\n", + " text_value: col(result_value)\n", + " time: col(chartdate)\n", + " time_format: '%Y-%m-%d'\n", + "hosp/patients:\n", + " gender:\n", + " code:\n", + " - GENDER\n", + " - col(gender)\n", + " time: null\n", + " dob:\n", + " code: MEDS_BIRTH\n", + " time: col(year_of_birth)\n", + " time_format: '%Y'\n", + " death:\n", + " code: MEDS_DEATH\n", + " time: col(dod)\n", + " time_format:\n", + " - '%Y-%m-%d %H:%M:%S'\n", + " - '%Y-%m-%d'\n", + "hosp/pharmacy:\n", + " medication_start:\n", + " code:\n", + " - MEDICATION\n", + " - START\n", + " - col(medication)\n", + " time: col(starttime)\n", + " route: route\n", + " frequency: frequency\n", + " doses_per_24_hrs: doses_per_24_hrs\n", + " poe_id: poe_id\n", + " time_format:\n", + " - '%Y-%m-%d %H:%M:%S'\n", + " - '%Y-%m-%d'\n", + " medication_stop:\n", + " code:\n", + " - MEDICATION\n", + " - STOP\n", + " - col(medication)\n", + " time: col(stoptime)\n", + " poe_id: poe_id\n", + " time_format:\n", + " - '%Y-%m-%d %H:%M:%S'\n", + " - '%Y-%m-%d'\n", + "hosp/procedures_icd:\n", + " procedure:\n", + " code:\n", + " - PROCEDURE\n", + " - ICD\n", + " - col(icd_version)\n", + " - col(icd_code)\n", + " hadm_id: hadm_id\n", + " time: col(chartdate)\n", + " time_format: '%Y-%m-%d'\n", + " _metadata:\n", + " hosp/d_icd_procedures:\n", + " description: long_title\n", + " parent_codes:\n", + " - ICD{icd_version}Proc/{norm_icd_code}:\n", + " icd_version: '9'\n", + " - ICD{icd_version}PCS/{norm_icd_code}:\n", + " icd_version: '10'\n", + "hosp/transfers:\n", + " transfer:\n", + " code:\n", + " - TRANSFER_TO\n", + " - col(eventtype)\n", + " - col(careunit)\n", + " time: col(intime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " hadm_id: hadm_id\n", + "icu/icustays:\n", + " icu_admission:\n", + " code:\n", + " - ICU_ADMISSION\n", + " - col(first_careunit)\n", + " time: col(intime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " hadm_id: hadm_id\n", + " icustay_id: stay_id\n", + " icu_discharge:\n", + " code:\n", + " - ICU_DISCHARGE\n", + " - col(last_careunit)\n", + " time: col(outtime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " hadm_id: hadm_id\n", + " icustay_id: stay_id\n", + "icu/chartevents:\n", + " event:\n", + " code:\n", + " - LAB\n", + " - col(itemid)\n", + " - col(valueuom)\n", + " time: col(charttime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " numeric_value: valuenum\n", + " text_value: value\n", + " hadm_id: hadm_id\n", + " icustay_id: stay_id\n", + " _metadata:\n", + " d_labitems_to_loinc:\n", + " description:\n", + " - omop_concept_name\n", + " - label\n", + " itemid: itemid (omop_source_code)\n", + " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", + " valueuom: valueuom\n", + "icu/procedureevents:\n", + " start:\n", + " code:\n", + " - PROCEDURE\n", + " - START\n", + " - col(itemid)\n", + " time: col(starttime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " hadm_id: hadm_id\n", + " icustay_id: stay_id\n", + " _metadata:\n", + " proc_datetimeevents:\n", + " description:\n", + " - omop_concept_name\n", + " - label\n", + " itemid: itemid (omop_source_code)\n", + " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", + " proc_itemid:\n", + " description:\n", + " - omop_concept_name\n", + " - label\n", + " itemid: itemid (omop_source_code)\n", + " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", + " end:\n", + " code:\n", + " - PROCEDURE\n", + " - END\n", + " - col(itemid)\n", + " time: col(endtime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " hadm_id: hadm_id\n", + " icustay_id: stay_id\n", + " _metadata:\n", + " proc_datetimeevents:\n", + " description:\n", + " - omop_concept_name\n", + " - label\n", + " itemid: itemid (omop_source_code)\n", + " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", + " proc_itemid:\n", + " description:\n", + " - omop_concept_name\n", + " - label\n", + " itemid: itemid (omop_source_code)\n", + " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", + "icu/inputevents:\n", + " input_start:\n", + " code:\n", + " - INFUSION_START\n", + " - col(itemid)\n", + " time: col(starttime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " hadm_id: hadm_id\n", + " icustay_id: stay_id\n", + " order_id: orderid\n", + " link_order_id: linkorderid\n", + " numeric_value: rate\n", + " unit: rateuom\n", + " ordercategorydescription: ordercategorydescription\n", + " _metadata:\n", + " inputevents_to_rxnorm:\n", + " description:\n", + " - omop_concept_name\n", + " - label\n", + " itemid: itemid (omop_source_code)\n", + " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", + " input_end:\n", + " code:\n", + " - INFUSION_END\n", + " - col(itemid)\n", + " time: col(endtime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " hadm_id: hadm_id\n", + " icustay_id: stay_id\n", + " order_id: orderid\n", + " link_order_id: linkorderid\n", + " numeric_value: amount\n", + " ordercategorydescription: ordercategorydescription\n", + " statusdescription: statusdescription\n", + " unit: amountuom\n", + " _metadata:\n", + " inputevents_to_rxnorm:\n", + " description:\n", + " - omop_concept_name\n", + " - label\n", + " itemid: itemid (omop_source_code)\n", + " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", + " subject_weight:\n", + " code:\n", + " - SUBJECT_WEIGHT_AT_INFUSION\n", + " - KG\n", + " time: col(starttime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " numeric_value: patientweight\n", + "icu/outputevents:\n", + " output:\n", + " code:\n", + " - SUBJECT_FLUID_OUTPUT\n", + " - col(itemid)\n", + " - col(valueuom)\n", + " time: col(charttime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " hadm_id: hadm_id\n", + " icustay_id: stay_id\n", + " numeric_value: value\n", + " _metadata:\n", + " outputevents_to_loinc:\n", + " description:\n", + " - omop_concept_name\n", + " - label\n", + " itemid: itemid (omop_source_code)\n", + " valueuom: unitname\n", + " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", + "\n", + "2024-12-14 16:07:07.820 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 16:07:07.821 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.821527. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:07.822 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 16:07:07.823 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:802: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + " typed_subjects = pl.Series(subjects, dtype=df.schema[input_subject_id_column])\n", + "2024-12-14 16:07:07.829 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/transfers/[0-1190).parquet\n", + "2024-12-14 16:07:07.834 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting transfer\n", + "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:468: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + " if col not in df.schema:\n", + "2024-12-14 16:07:07.835 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column eventtype\n", + "2024-12-14 16:07:07.835 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column careunit\n", + "2024-12-14 16:07:07.835 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", + "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:513: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + " if v not in df.schema:\n", + "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:517: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + " is_numeric = df.schema[v].is_numeric()\n", + "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:518: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + " is_str = df.schema[v] == pl.Utf8\n", + "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:519: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + " is_cat = isinstance(df.schema[v], pl.Categorical)\n", + "2024-12-14 16:07:07.836 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:07.836 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 16:07:07.859 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.037877\n", + "2024-12-14 16:07:07.859 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/transfers/.[0-1190).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/transfers/.[0-1190).parquet_cache/locks/2024-12-14T16:07:07.821527.json\n", + "2024-12-14 16:07:07.859 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/omr/[0-2964).parquet\n", + "2024-12-14 16:07:07.860 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.860108. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:07.860 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", + "2024-12-14 16:07:07.860 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:07.860 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/omr/[0-2964).parquet\n", + "2024-12-14 16:07:07.864 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting omr\n", + "2024-12-14 16:07:07.864 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column result_name\n", + "2024-12-14 16:07:07.864 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 16:07:07.865 | WARNING | MEDS_transforms.extract.convert_to_sharded_events:extract_event:507 - Source column 'col(result_value)' for event column text_value is always interpreted as a column name. Removing col() function call and setting source column to result_value.\n", + "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:529: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + " case \"text_value\" if not df.schema[v] == pl.Utf8:\n", + "2024-12-14 16:07:07.865 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:558 - Filtering out rows with null codes via col(\"result_name\").is_not_null()\n", + "2024-12-14 16:07:07.865 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:07.865 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/omr/[0-2964).parquet\n", + "2024-12-14 16:07:07.868 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008277\n", + "2024-12-14 16:07:07.868 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/omr/.[0-2964).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/omr/.[0-2964).parquet_cache/locks/2024-12-14T16:07:07.860108.json\n", + "2024-12-14 16:07:07.869 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 16:07:07.870 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.869972. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:07.870 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 16:07:07.870 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:07.870 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 16:07:07.874 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting start\n", + "2024-12-14 16:07:07.875 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 16:07:07.875 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:07.875 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:07.875 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting end\n", + "2024-12-14 16:07:07.876 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 16:07:07.876 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:07.876 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:07.876 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 16:07:07.883 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.013825\n", + "2024-12-14 16:07:07.883 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/.[0-1468).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/.[0-1468).parquet_cache/locks/2024-12-14T16:07:07.869972.json\n", + "2024-12-14 16:07:07.884 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 16:07:07.884 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.884587. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:07.884 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 16:07:07.884 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:07.885 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 16:07:07.888 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", + "2024-12-14 16:07:07.889 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", + "2024-12-14 16:07:07.889 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", + "2024-12-14 16:07:07.889 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:07.889 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:07.889 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 16:07:07.891 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006845\n", + "2024-12-14 16:07:07.891 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/.[0-4506).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/.[0-4506).parquet_cache/locks/2024-12-14T16:07:07.884587.json\n", + "2024-12-14 16:07:07.891 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/emar/[0-35835).parquet\n", + "2024-12-14 16:07:07.892 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.892220. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:07.892 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", + "2024-12-14 16:07:07.892 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:07.892 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/emar/[0-35835).parquet\n", + "2024-12-14 16:07:07.896 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication\n", + "2024-12-14 16:07:07.896 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 16:07:07.896 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column event_txt\n", + "2024-12-14 16:07:07.896 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:07.897 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:07.897 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/emar/[0-35835).parquet\n", + "2024-12-14 16:07:07.902 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.009786\n", + "2024-12-14 16:07:07.902 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/emar/.[0-35835).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/emar/.[0-35835).parquet_cache/locks/2024-12-14T16:07:07.892220.json\n", + "2024-12-14 16:07:07.902 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/[0-275).parquet\n", + "2024-12-14 16:07:07.903 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.903054. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:07.903 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", + "2024-12-14 16:07:07.903 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:07.903 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/admissions/[0-275).parquet\n", + "2024-12-14 16:07:07.907 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_registration\n", + "2024-12-14 16:07:07.907 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edregtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:07.907 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edregtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:07.907 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_out\n", + "2024-12-14 16:07:07.908 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edouttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:07.908 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edouttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:07.908 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission\n", + "2024-12-14 16:07:07.908 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_type\n", + "2024-12-14 16:07:07.908 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_location\n", + "2024-12-14 16:07:07.908 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column admittime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:07.909 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admittime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:07.909 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting discharge\n", + "2024-12-14 16:07:07.909 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column discharge_location\n", + "2024-12-14 16:07:07.909 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dischtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:07.909 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dischtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:07.909 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/[0-275).parquet\n", + "2024-12-14 16:07:07.918 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.015170\n", + "2024-12-14 16:07:07.918 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/.[0-275).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/.[0-275).parquet_cache/locks/2024-12-14T16:07:07.903054.json\n", + "2024-12-14 16:07:07.918 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/patients/[0-100).parquet\n", + "2024-12-14 16:07:07.919 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.919077. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:07.919 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", + "2024-12-14 16:07:07.919 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:07.919 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/patients/[0-100).parquet\n", + "2024-12-14 16:07:07.923 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", + "2024-12-14 16:07:07.923 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", + "2024-12-14 16:07:07.923 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", + "2024-12-14 16:07:07.923 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", + "2024-12-14 16:07:07.923 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column year_of_birth in possible formats %Y\n", + "2024-12-14 16:07:07.923 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"year_of_birth\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:07.924 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting death\n", + "2024-12-14 16:07:07.924 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dod in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 16:07:07.924 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dod\").str.strptime([String(raise)]).coalesce([col(\"dod\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 16:07:07.924 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/patients/[0-100).parquet\n", + "2024-12-14 16:07:07.928 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.009003\n", + "2024-12-14 16:07:07.928 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/patients/.[0-100).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/patients/.[0-100).parquet_cache/locks/2024-12-14T16:07:07.919077.json\n", + "2024-12-14 16:07:07.928 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 16:07:07.929 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.928924. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:07.929 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 16:07:07.929 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:07.929 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/chartevents/[0-668862).parquet\n", + "2024-12-14 16:07:07.933 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting event\n", + "2024-12-14 16:07:07.934 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 16:07:07.934 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 16:07:07.934 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:07.935 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:07.935 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 16:07:07.968 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.039739\n", + "2024-12-14 16:07:07.968 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/chartevents/.[0-668862).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/chartevents/.[0-668862).parquet_cache/locks/2024-12-14T16:07:07.928924.json\n", + "2024-12-14 16:07:07.969 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 16:07:07.970 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.969832. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:07.970 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 16:07:07.970 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:07.970 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/outputevents/[0-9362).parquet\n", + "2024-12-14 16:07:07.974 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting output\n", + "2024-12-14 16:07:07.975 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 16:07:07.975 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 16:07:07.975 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:07.975 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:07.975 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 16:07:07.978 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008660\n", + "2024-12-14 16:07:07.978 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/.[0-9362).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/.[0-9362).parquet_cache/locks/2024-12-14T16:07:07.969832.json\n", + "2024-12-14 16:07:07.979 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 16:07:07.979 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.979455. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:07.979 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 16:07:07.979 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:07.980 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 16:07:07.983 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_start\n", + "2024-12-14 16:07:07.984 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 16:07:07.984 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 16:07:07.984 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce([col(\"starttime\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 16:07:07.984 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_stop\n", + "2024-12-14 16:07:07.985 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 16:07:07.985 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column stoptime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 16:07:07.985 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"stoptime\").str.strptime([String(raise)]).coalesce([col(\"stoptime\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 16:07:07.985 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 16:07:07.991 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.011555\n", + "2024-12-14 16:07:07.991 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/pharmacy/.[0-15306).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/pharmacy/.[0-15306).parquet_cache/locks/2024-12-14T16:07:07.979455.json\n", + "2024-12-14 16:07:07.991 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 16:07:07.992 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.991934. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:07.992 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 16:07:07.992 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:07.992 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 16:07:07.996 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hcpcs\n", + "2024-12-14 16:07:07.996 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column short_description\n", + "2024-12-14 16:07:07.996 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 16:07:07.996 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:07.996 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 16:07:07.998 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006670\n", + "2024-12-14 16:07:07.998 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/hcpcsevents/.[0-61).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/hcpcsevents/.[0-61).parquet_cache/locks/2024-12-14T16:07:07.991934.json\n", + "2024-12-14 16:07:07.999 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 16:07:07.999 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.999367. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:07.999 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 16:07:07.999 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:07.999 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 16:07:08.003 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drg\n", + "2024-12-14 16:07:08.003 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column description\n", + "2024-12-14 16:07:08.003 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_type\n", + "2024-12-14 16:07:08.004 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_code\n", + "2024-12-14 16:07:08.004 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.004 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.004 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 16:07:08.006 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006814\n", + "2024-12-14 16:07:08.006 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/.[0-454).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/.[0-454).parquet_cache/locks/2024-12-14T16:07:07.999367.json\n", + "2024-12-14 16:07:08.007 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 16:07:08.007 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.007341. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.007 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 16:07:08.007 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.007 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 16:07:08.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting procedure\n", + "2024-12-14 16:07:08.012 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", + "2024-12-14 16:07:08.012 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", + "2024-12-14 16:07:08.012 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 16:07:08.012 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.012 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 16:07:08.014 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006829\n", + "2024-12-14 16:07:08.014 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/procedures_icd/.[0-722).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/procedures_icd/.[0-722).parquet_cache/locks/2024-12-14T16:07:08.007341.json\n", + "2024-12-14 16:07:08.014 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 16:07:08.015 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.015034. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.015 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 16:07:08.015 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.015 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/labevents/[0-107727).parquet\n", + "2024-12-14 16:07:08.019 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", + "2024-12-14 16:07:08.019 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 16:07:08.019 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 16:07:08.020 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.020 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.020 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 16:07:08.026 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.011136\n", + "2024-12-14 16:07:08.026 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/labevents/.[0-107727).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/labevents/.[0-107727).parquet_cache/locks/2024-12-14T16:07:08.015034.json\n", + "2024-12-14 16:07:08.027 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 16:07:08.027 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.027619. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.027 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 16:07:08.027 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.028 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/inputevents/[0-20404).parquet\n", + "2024-12-14 16:07:08.032 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_start\n", + "2024-12-14 16:07:08.033 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 16:07:08.033 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.033 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.033 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_end\n", + "2024-12-14 16:07:08.034 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 16:07:08.034 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.035 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.035 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting subject_weight\n", + "2024-12-14 16:07:08.035 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.035 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.035 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 16:07:08.044 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.016992\n", + "2024-12-14 16:07:08.044 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/.[0-20404).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/.[0-20404).parquet_cache/locks/2024-12-14T16:07:08.027619.json\n", + "2024-12-14 16:07:08.045 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/icustays/[0-140).parquet\n", + "2024-12-14 16:07:08.045 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.045586. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.045 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", + "2024-12-14 16:07:08.045 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.046 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/icustays/[0-140).parquet\n", + "2024-12-14 16:07:08.050 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_admission\n", + "2024-12-14 16:07:08.050 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column first_careunit\n", + "2024-12-14 16:07:08.050 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.051 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.051 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_discharge\n", + "2024-12-14 16:07:08.051 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column last_careunit\n", + "2024-12-14 16:07:08.051 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column outtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.051 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"outtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.051 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/icustays/[0-140).parquet\n", + "2024-12-14 16:07:08.054 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008874\n", + "2024-12-14 16:07:08.054 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/icustays/.[0-140).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/icustays/.[0-140).parquet_cache/locks/2024-12-14T16:07:08.045586.json\n", + "2024-12-14 16:07:08.054 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 16:07:08.055 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.055324. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.055 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 16:07:08.055 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.055 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/transfers/[0-1190).parquet\n", + "2024-12-14 16:07:08.059 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting transfer\n", + "2024-12-14 16:07:08.059 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column eventtype\n", + "2024-12-14 16:07:08.059 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column careunit\n", + "2024-12-14 16:07:08.059 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.060 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.060 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 16:07:08.061 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006326\n", + "2024-12-14 16:07:08.061 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/transfers/.[0-1190).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/transfers/.[0-1190).parquet_cache/locks/2024-12-14T16:07:08.055324.json\n", + "2024-12-14 16:07:08.062 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/omr/[0-2964).parquet\n", + "2024-12-14 16:07:08.062 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.062419. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.062 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", + "2024-12-14 16:07:08.062 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.062 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/omr/[0-2964).parquet\n", + "2024-12-14 16:07:08.066 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting omr\n", + "2024-12-14 16:07:08.066 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column result_name\n", + "2024-12-14 16:07:08.066 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 16:07:08.066 | WARNING | MEDS_transforms.extract.convert_to_sharded_events:extract_event:507 - Source column 'col(result_value)' for event column text_value is always interpreted as a column name. Removing col() function call and setting source column to result_value.\n", + "2024-12-14 16:07:08.066 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:558 - Filtering out rows with null codes via col(\"result_name\").is_not_null()\n", + "2024-12-14 16:07:08.066 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.067 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/omr/[0-2964).parquet\n", + "2024-12-14 16:07:08.068 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006074\n", + "2024-12-14 16:07:08.068 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/omr/.[0-2964).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/omr/.[0-2964).parquet_cache/locks/2024-12-14T16:07:08.062419.json\n", + "2024-12-14 16:07:08.069 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 16:07:08.070 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.069888. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.070 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 16:07:08.070 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.070 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 16:07:08.074 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting start\n", + "2024-12-14 16:07:08.075 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 16:07:08.075 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.075 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.075 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting end\n", + "2024-12-14 16:07:08.076 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 16:07:08.076 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.076 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.076 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 16:07:08.079 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.009659\n", + "2024-12-14 16:07:08.079 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/.[0-1468).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/.[0-1468).parquet_cache/locks/2024-12-14T16:07:08.069888.json\n", + "2024-12-14 16:07:08.080 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 16:07:08.080 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.080456. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.080 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 16:07:08.080 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.080 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 16:07:08.084 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", + "2024-12-14 16:07:08.084 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", + "2024-12-14 16:07:08.085 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", + "2024-12-14 16:07:08.085 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.085 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.085 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 16:07:08.086 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006417\n", + "2024-12-14 16:07:08.086 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/.[0-4506).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/.[0-4506).parquet_cache/locks/2024-12-14T16:07:08.080456.json\n", + "2024-12-14 16:07:08.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/emar/[0-35835).parquet\n", + "2024-12-14 16:07:08.087 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.087668. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.087 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", + "2024-12-14 16:07:08.087 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.088 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/emar/[0-35835).parquet\n", + "2024-12-14 16:07:08.091 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication\n", + "2024-12-14 16:07:08.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 16:07:08.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column event_txt\n", + "2024-12-14 16:07:08.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.092 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/emar/[0-35835).parquet\n", + "2024-12-14 16:07:08.096 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.009201\n", + "2024-12-14 16:07:08.096 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/emar/.[0-35835).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/emar/.[0-35835).parquet_cache/locks/2024-12-14T16:07:08.087668.json\n", + "2024-12-14 16:07:08.097 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/[0-275).parquet\n", + "2024-12-14 16:07:08.098 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.098026. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.098 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", + "2024-12-14 16:07:08.098 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.098 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/admissions/[0-275).parquet\n", + "2024-12-14 16:07:08.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_registration\n", + "2024-12-14 16:07:08.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edregtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edregtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_out\n", + "2024-12-14 16:07:08.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edouttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edouttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission\n", + "2024-12-14 16:07:08.103 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_type\n", + "2024-12-14 16:07:08.103 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_location\n", + "2024-12-14 16:07:08.103 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column admittime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.103 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admittime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.103 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting discharge\n", + "2024-12-14 16:07:08.104 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column discharge_location\n", + "2024-12-14 16:07:08.104 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dischtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.104 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dischtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.104 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/[0-275).parquet\n", + "2024-12-14 16:07:08.108 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010742\n", + "2024-12-14 16:07:08.108 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/.[0-275).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/.[0-275).parquet_cache/locks/2024-12-14T16:07:08.098026.json\n", + "2024-12-14 16:07:08.109 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/patients/[0-100).parquet\n", + "2024-12-14 16:07:08.109 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.109557. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.109 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", + "2024-12-14 16:07:08.109 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.110 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/patients/[0-100).parquet\n", + "2024-12-14 16:07:08.113 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", + "2024-12-14 16:07:08.114 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", + "2024-12-14 16:07:08.114 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", + "2024-12-14 16:07:08.114 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", + "2024-12-14 16:07:08.114 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column year_of_birth in possible formats %Y\n", + "2024-12-14 16:07:08.114 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"year_of_birth\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.114 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting death\n", + "2024-12-14 16:07:08.114 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dod in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 16:07:08.114 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dod\").str.strptime([String(raise)]).coalesce([col(\"dod\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 16:07:08.114 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/patients/[0-100).parquet\n", + "2024-12-14 16:07:08.117 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008429\n", + "2024-12-14 16:07:08.118 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/patients/.[0-100).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/patients/.[0-100).parquet_cache/locks/2024-12-14T16:07:08.109557.json\n", + "2024-12-14 16:07:08.118 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 16:07:08.119 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.118982. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.119 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 16:07:08.119 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.119 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/chartevents/[0-668862).parquet\n", + "2024-12-14 16:07:08.123 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting event\n", + "2024-12-14 16:07:08.124 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 16:07:08.124 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 16:07:08.124 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.124 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.125 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 16:07:08.143 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.024349\n", + "2024-12-14 16:07:08.143 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/chartevents/.[0-668862).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/chartevents/.[0-668862).parquet_cache/locks/2024-12-14T16:07:08.118982.json\n", + "2024-12-14 16:07:08.144 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 16:07:08.144 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.144266. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.144 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 16:07:08.144 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.144 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/outputevents/[0-9362).parquet\n", + "2024-12-14 16:07:08.148 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting output\n", + "2024-12-14 16:07:08.149 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 16:07:08.149 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 16:07:08.149 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.149 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.149 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 16:07:08.152 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008152\n", + "2024-12-14 16:07:08.152 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/.[0-9362).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/.[0-9362).parquet_cache/locks/2024-12-14T16:07:08.144266.json\n", + "2024-12-14 16:07:08.153 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 16:07:08.153 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.153615. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.153 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 16:07:08.153 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.154 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 16:07:08.158 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_start\n", + "2024-12-14 16:07:08.158 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 16:07:08.158 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 16:07:08.158 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce([col(\"starttime\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 16:07:08.158 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_stop\n", + "2024-12-14 16:07:08.159 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 16:07:08.159 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column stoptime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 16:07:08.159 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"stoptime\").str.strptime([String(raise)]).coalesce([col(\"stoptime\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 16:07:08.159 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 16:07:08.164 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010973\n", + "2024-12-14 16:07:08.164 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/pharmacy/.[0-15306).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/pharmacy/.[0-15306).parquet_cache/locks/2024-12-14T16:07:08.153615.json\n", + "2024-12-14 16:07:08.165 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 16:07:08.165 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.165366. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.165 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 16:07:08.165 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.165 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 16:07:08.169 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hcpcs\n", + "2024-12-14 16:07:08.169 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column short_description\n", + "2024-12-14 16:07:08.169 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 16:07:08.170 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.170 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 16:07:08.171 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.005897\n", + "2024-12-14 16:07:08.171 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/hcpcsevents/.[0-61).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/hcpcsevents/.[0-61).parquet_cache/locks/2024-12-14T16:07:08.165366.json\n", + "2024-12-14 16:07:08.171 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 16:07:08.172 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.172125. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.172 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 16:07:08.172 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.172 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 16:07:08.176 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drg\n", + "2024-12-14 16:07:08.176 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column description\n", + "2024-12-14 16:07:08.176 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_type\n", + "2024-12-14 16:07:08.176 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_code\n", + "2024-12-14 16:07:08.176 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.177 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.177 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 16:07:08.178 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006754\n", + "2024-12-14 16:07:08.178 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/.[0-454).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/.[0-454).parquet_cache/locks/2024-12-14T16:07:08.172125.json\n", + "2024-12-14 16:07:08.179 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 16:07:08.180 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.179926. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.180 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 16:07:08.180 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.180 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 16:07:08.184 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting procedure\n", + "2024-12-14 16:07:08.184 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", + "2024-12-14 16:07:08.184 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", + "2024-12-14 16:07:08.184 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 16:07:08.185 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.185 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 16:07:08.186 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006649\n", + "2024-12-14 16:07:08.186 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/procedures_icd/.[0-722).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/procedures_icd/.[0-722).parquet_cache/locks/2024-12-14T16:07:08.179926.json\n", + "2024-12-14 16:07:08.187 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 16:07:08.187 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.187501. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.187 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 16:07:08.187 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.188 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/labevents/[0-107727).parquet\n", + "2024-12-14 16:07:08.192 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", + "2024-12-14 16:07:08.192 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 16:07:08.192 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 16:07:08.192 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.193 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.193 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 16:07:08.197 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010020\n", + "2024-12-14 16:07:08.197 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/labevents/.[0-107727).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/labevents/.[0-107727).parquet_cache/locks/2024-12-14T16:07:08.187501.json\n", + "2024-12-14 16:07:08.198 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 16:07:08.199 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.198888. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.199 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 16:07:08.199 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.199 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/inputevents/[0-20404).parquet\n", + "2024-12-14 16:07:08.203 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_start\n", + "2024-12-14 16:07:08.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 16:07:08.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_end\n", + "2024-12-14 16:07:08.205 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 16:07:08.205 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.206 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.206 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting subject_weight\n", + "2024-12-14 16:07:08.206 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.206 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.206 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 16:07:08.213 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.014441\n", + "2024-12-14 16:07:08.213 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/.[0-20404).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/.[0-20404).parquet_cache/locks/2024-12-14T16:07:08.198888.json\n", + "2024-12-14 16:07:08.213 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/icustays/[0-140).parquet\n", + "2024-12-14 16:07:08.214 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.214164. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.214 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", + "2024-12-14 16:07:08.214 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.214 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/icustays/[0-140).parquet\n", + "2024-12-14 16:07:08.218 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_admission\n", + "2024-12-14 16:07:08.219 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column first_careunit\n", + "2024-12-14 16:07:08.219 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.219 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.219 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_discharge\n", + "2024-12-14 16:07:08.219 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column last_careunit\n", + "2024-12-14 16:07:08.219 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column outtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.219 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"outtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.219 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/icustays/[0-140).parquet\n", + "2024-12-14 16:07:08.222 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007988\n", + "2024-12-14 16:07:08.222 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/icustays/.[0-140).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/icustays/.[0-140).parquet_cache/locks/2024-12-14T16:07:08.214164.json\n", + "2024-12-14 16:07:08.222 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 16:07:08.223 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.223318. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.223 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 16:07:08.223 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.223 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/transfers/[0-1190).parquet\n", + "2024-12-14 16:07:08.227 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting transfer\n", + "2024-12-14 16:07:08.227 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column eventtype\n", + "2024-12-14 16:07:08.227 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column careunit\n", + "2024-12-14 16:07:08.228 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.228 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.228 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 16:07:08.230 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006777\n", + "2024-12-14 16:07:08.230 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/transfers/.[0-1190).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/transfers/.[0-1190).parquet_cache/locks/2024-12-14T16:07:08.223318.json\n", + "2024-12-14 16:07:08.230 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/omr/[0-2964).parquet\n", + "2024-12-14 16:07:08.230 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.230725. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.230 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", + "2024-12-14 16:07:08.231 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.231 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/omr/[0-2964).parquet\n", + "2024-12-14 16:07:08.234 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting omr\n", + "2024-12-14 16:07:08.234 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column result_name\n", + "2024-12-14 16:07:08.234 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 16:07:08.235 | WARNING | MEDS_transforms.extract.convert_to_sharded_events:extract_event:507 - Source column 'col(result_value)' for event column text_value is always interpreted as a column name. Removing col() function call and setting source column to result_value.\n", + "2024-12-14 16:07:08.235 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:558 - Filtering out rows with null codes via col(\"result_name\").is_not_null()\n", + "2024-12-14 16:07:08.235 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.235 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/omr/[0-2964).parquet\n", + "2024-12-14 16:07:08.237 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006615\n", + "2024-12-14 16:07:08.237 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/omr/.[0-2964).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/omr/.[0-2964).parquet_cache/locks/2024-12-14T16:07:08.230725.json\n", + "2024-12-14 16:07:08.238 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 16:07:08.238 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.238859. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.239 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 16:07:08.239 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.239 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 16:07:08.244 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting start\n", + "2024-12-14 16:07:08.244 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 16:07:08.244 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.245 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.245 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting end\n", + "2024-12-14 16:07:08.245 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 16:07:08.245 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.245 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.245 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 16:07:08.257 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.017998\n", + "2024-12-14 16:07:08.257 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/.[0-1468).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/.[0-1468).parquet_cache/locks/2024-12-14T16:07:08.238859.json\n", + "2024-12-14 16:07:08.260 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 16:07:08.262 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.261546. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.263 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 16:07:08.263 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.264 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 16:07:08.279 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", + "2024-12-14 16:07:08.280 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", + "2024-12-14 16:07:08.280 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", + "2024-12-14 16:07:08.280 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.280 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.281 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 16:07:08.286 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.024554\n", + "2024-12-14 16:07:08.286 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/.[0-4506).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/.[0-4506).parquet_cache/locks/2024-12-14T16:07:08.261546.json\n", + "2024-12-14 16:07:08.286 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/emar/[0-35835).parquet\n", + "2024-12-14 16:07:08.287 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.287021. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.287 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", + "2024-12-14 16:07:08.287 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/emar/[0-35835).parquet\n", + "2024-12-14 16:07:08.291 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication\n", + "2024-12-14 16:07:08.291 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 16:07:08.291 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column event_txt\n", + "2024-12-14 16:07:08.292 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.292 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.292 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/emar/[0-35835).parquet\n", + "2024-12-14 16:07:08.310 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.023385\n", + "2024-12-14 16:07:08.310 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/emar/.[0-35835).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/emar/.[0-35835).parquet_cache/locks/2024-12-14T16:07:08.287021.json\n", + "2024-12-14 16:07:08.311 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/[0-275).parquet\n", + "2024-12-14 16:07:08.311 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.311468. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.311 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", + "2024-12-14 16:07:08.311 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.312 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/admissions/[0-275).parquet\n", + "2024-12-14 16:07:08.316 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_registration\n", + "2024-12-14 16:07:08.316 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edregtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.316 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edregtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.316 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_out\n", + "2024-12-14 16:07:08.316 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edouttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.316 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edouttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.316 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission\n", + "2024-12-14 16:07:08.317 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_type\n", + "2024-12-14 16:07:08.317 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_location\n", + "2024-12-14 16:07:08.317 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column admittime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.317 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admittime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.317 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting discharge\n", + "2024-12-14 16:07:08.317 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column discharge_location\n", + "2024-12-14 16:07:08.317 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dischtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.318 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dischtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.318 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/[0-275).parquet\n", + "2024-12-14 16:07:08.323 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.012409\n", + "2024-12-14 16:07:08.323 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/.[0-275).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/.[0-275).parquet_cache/locks/2024-12-14T16:07:08.311468.json\n", + "2024-12-14 16:07:08.324 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/patients/[0-100).parquet\n", + "2024-12-14 16:07:08.324 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.324714. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.324 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", + "2024-12-14 16:07:08.325 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.325 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/patients/[0-100).parquet\n", + "2024-12-14 16:07:08.329 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", + "2024-12-14 16:07:08.329 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", + "2024-12-14 16:07:08.329 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", + "2024-12-14 16:07:08.329 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", + "2024-12-14 16:07:08.329 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column year_of_birth in possible formats %Y\n", + "2024-12-14 16:07:08.329 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"year_of_birth\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.329 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting death\n", + "2024-12-14 16:07:08.330 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dod in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 16:07:08.330 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dod\").str.strptime([String(raise)]).coalesce([col(\"dod\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 16:07:08.330 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/patients/[0-100).parquet\n", + "2024-12-14 16:07:08.332 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008220\n", + "2024-12-14 16:07:08.333 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/patients/.[0-100).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/patients/.[0-100).parquet_cache/locks/2024-12-14T16:07:08.324714.json\n", + "2024-12-14 16:07:08.333 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 16:07:08.333 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.333867. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.334 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 16:07:08.334 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.334 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/chartevents/[0-668862).parquet\n", + "2024-12-14 16:07:08.338 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting event\n", + "2024-12-14 16:07:08.339 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 16:07:08.339 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 16:07:08.339 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.339 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.339 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 16:07:08.544 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.210303\n", + "2024-12-14 16:07:08.544 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/chartevents/.[0-668862).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/chartevents/.[0-668862).parquet_cache/locks/2024-12-14T16:07:08.333867.json\n", + "2024-12-14 16:07:08.544 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 16:07:08.545 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.545351. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.545 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 16:07:08.545 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.546 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/outputevents/[0-9362).parquet\n", + "2024-12-14 16:07:08.550 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting output\n", + "2024-12-14 16:07:08.551 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 16:07:08.551 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 16:07:08.551 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.551 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.551 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 16:07:08.556 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.011422\n", + "2024-12-14 16:07:08.556 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/.[0-9362).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/.[0-9362).parquet_cache/locks/2024-12-14T16:07:08.545351.json\n", + "2024-12-14 16:07:08.557 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 16:07:08.558 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.557861. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.558 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 16:07:08.558 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.558 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 16:07:08.562 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_start\n", + "2024-12-14 16:07:08.562 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 16:07:08.562 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 16:07:08.563 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce([col(\"starttime\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 16:07:08.563 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_stop\n", + "2024-12-14 16:07:08.563 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 16:07:08.563 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column stoptime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 16:07:08.564 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"stoptime\").str.strptime([String(raise)]).coalesce([col(\"stoptime\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 16:07:08.564 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 16:07:08.581 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.023592\n", + "2024-12-14 16:07:08.581 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/pharmacy/.[0-15306).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/pharmacy/.[0-15306).parquet_cache/locks/2024-12-14T16:07:08.557861.json\n", + "2024-12-14 16:07:08.582 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 16:07:08.582 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.582313. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.582 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 16:07:08.582 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.582 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 16:07:08.586 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hcpcs\n", + "2024-12-14 16:07:08.587 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column short_description\n", + "2024-12-14 16:07:08.587 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 16:07:08.587 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.587 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 16:07:08.588 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006400\n", + "2024-12-14 16:07:08.588 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/hcpcsevents/.[0-61).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/hcpcsevents/.[0-61).parquet_cache/locks/2024-12-14T16:07:08.582313.json\n", + "2024-12-14 16:07:08.589 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 16:07:08.589 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.589478. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.589 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 16:07:08.589 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.589 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 16:07:08.593 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drg\n", + "2024-12-14 16:07:08.593 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column description\n", + "2024-12-14 16:07:08.594 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_type\n", + "2024-12-14 16:07:08.594 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_code\n", + "2024-12-14 16:07:08.594 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.594 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.594 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 16:07:08.596 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007142\n", + "2024-12-14 16:07:08.596 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/.[0-454).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/.[0-454).parquet_cache/locks/2024-12-14T16:07:08.589478.json\n", + "2024-12-14 16:07:08.597 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 16:07:08.597 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.597686. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.597 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 16:07:08.598 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.598 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 16:07:08.602 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting procedure\n", + "2024-12-14 16:07:08.602 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", + "2024-12-14 16:07:08.602 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", + "2024-12-14 16:07:08.602 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 16:07:08.602 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.603 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 16:07:08.605 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007402\n", + "2024-12-14 16:07:08.605 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/procedures_icd/.[0-722).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/procedures_icd/.[0-722).parquet_cache/locks/2024-12-14T16:07:08.597686.json\n", + "2024-12-14 16:07:08.605 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 16:07:08.606 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.606130. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.606 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 16:07:08.606 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.606 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/labevents/[0-107727).parquet\n", + "2024-12-14 16:07:08.610 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", + "2024-12-14 16:07:08.611 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 16:07:08.611 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 16:07:08.611 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.611 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.611 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 16:07:08.642 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.036436\n", + "2024-12-14 16:07:08.642 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/labevents/.[0-107727).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/labevents/.[0-107727).parquet_cache/locks/2024-12-14T16:07:08.606130.json\n", + "2024-12-14 16:07:08.643 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 16:07:08.644 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.644292. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.644 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 16:07:08.644 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.644 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/inputevents/[0-20404).parquet\n", + "2024-12-14 16:07:08.649 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_start\n", + "2024-12-14 16:07:08.649 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 16:07:08.649 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.650 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.650 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_end\n", + "2024-12-14 16:07:08.651 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 16:07:08.651 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.651 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.651 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting subject_weight\n", + "2024-12-14 16:07:08.652 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.652 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.652 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 16:07:08.676 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.032631\n", + "2024-12-14 16:07:08.677 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/.[0-20404).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/.[0-20404).parquet_cache/locks/2024-12-14T16:07:08.644292.json\n", + "2024-12-14 16:07:08.678 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/icustays/[0-140).parquet\n", + "2024-12-14 16:07:08.678 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.678769. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:08.679 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", + "2024-12-14 16:07:08.679 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:08.679 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/icustays/[0-140).parquet\n", + "2024-12-14 16:07:08.683 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_admission\n", + "2024-12-14 16:07:08.683 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column first_careunit\n", + "2024-12-14 16:07:08.683 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.683 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.683 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_discharge\n", + "2024-12-14 16:07:08.684 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column last_careunit\n", + "2024-12-14 16:07:08.684 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column outtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 16:07:08.684 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"outtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 16:07:08.684 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/icustays/[0-140).parquet\n", + "2024-12-14 16:07:08.686 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008114\n", + "2024-12-14 16:07:08.686 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/icustays/.[0-140).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/icustays/.[0-140).parquet_cache/locks/2024-12-14T16:07:08.678769.json\n", + "2024-12-14 16:07:08.687 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:823 - Subsharded into converted events.\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:08.766\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: merge_to_MEDS_cohort\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:08.772\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-merge_to_MEDS_cohort --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=merge_to_MEDS_cohort\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:10.205\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:10.207\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 16:07:09.354 | INFO | MEDS_transforms.utils:stage_init:73 - Running merge_to_MEDS_cohort with the following configuration:\n", + "input_dir: ${oc.env:MIMICIV_PRE_MEDS_DIR}\n", + "cohort_dir: ${oc.env:MIMICIV_MEDS_COHORT_DIR}\n", + "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", + " at the top of your specific pipeline\n", + "\n", + " configuration file.'\n", + "log_dir: ${stage_cfg.output_dir}/.logs\n", + "do_overwrite: false\n", + "seed: 1\n", + "stages:\n", + "- shard_events\n", + "- split_and_shard_subjects\n", + "- convert_to_sharded_events\n", + "- merge_to_MEDS_cohort\n", + "- extract_code_metadata\n", + "- finalize_MEDS_metadata\n", + "- finalize_MEDS_data\n", + "stage_configs:\n", + " shard_events:\n", + " row_chunksize: 200000000\n", + " infer_schema_length: 999999999\n", + " data_input_dir: ${input_dir}\n", + " split_and_shard_subjects:\n", + " is_metadata: true\n", + " output_dir: ${cohort_dir}/metadata\n", + " n_subjects_per_shard: 1000\n", + " external_splits_json_fp: null\n", + " split_fracs:\n", + " train: 0.8\n", + " tuning: 0.1\n", + " held_out: 0.1\n", + " convert_to_sharded_events:\n", + " do_dedup_text_and_numeric: true\n", + " merge_to_MEDS_cohort:\n", + " unique_by: '*'\n", + " additional_sort_by: null\n", + " extract_code_metadata:\n", + " is_metadata: true\n", + " description_separator: '\n", + "\n", + " '\n", + " finalize_MEDS_metadata:\n", + " is_metadata: true\n", + " do_retype: true\n", + " finalize_MEDS_data:\n", + " do_retype: true\n", + " split_fracs:\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", + "worker: 0\n", + "polling_time: 300\n", + "stage: merge_to_MEDS_cohort\n", + "stage_cfg: ${oc.create:${populate_stage:${stage}, ${input_dir}, ${cohort_dir}, ${stages},\n", + " ${stage_configs}}}\n", + "etl_metadata:\n", + " pipeline_name: ???\n", + " dataset_name: MIMIC-IV\n", + " dataset_version: 2.2\n", + " package_name: ${get_package_name:}\n", + " package_version: ${get_package_version:}\n", + "etl_metadata.pipeline_name: extract\n", + "description: \"This pipeline extracts the MIMIC-IV dataset in longitudinal, sparse\\\n", + " \\ form from an input dataset meeting\\nselect criteria and converts them to the flattened,\\\n", + " \\ MEDS format. You can control the key arguments to this\\npipeline by setting environment\\\n", + " \\ variables:\\n```bash\\n export EVENT_CONVERSION_CONFIG_FP=# Path to your event\\\n", + " \\ conversion config\\n export MIMICIV_PRE_MEDS_DIR=# Path to the output dir of the\\\n", + " \\ pre-MEDS step\\n export MIMICIV_MEDS_COHORT_DIR=# Path to where you want the dataset\\\n", + " \\ to live\\n```\"\n", + "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", + "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", + "\n", + "2024-12-14 16:07:09.370 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "unique_by: '*'\n", + "additional_sort_by: null\n", + "is_metadata: false\n", + "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events\n", + "metadata_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/split_and_shard_subjects\n", + "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", + "reducer_output_dir: null\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events\n", + " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", + " - metadata_input_dir: ❌ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/split_and_shard_subjects\n", + "2024-12-14 16:07:09.385 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator_by_shard_map:687 - Mapping computation over a maximum of 3 shards\n", + "2024-12-14 16:07:09.393 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", + "2024-12-14 16:07:09.394 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:09.393828. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:09.395 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0\n", + "2024-12-14 16:07:09.396 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 16 files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/[0-275).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/[0-4506).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/[0-454).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/emar/[0-35835).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/hcpcsevents/[0-61).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/labevents/[0-107727).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/omr/[0-2964).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/patients/[0-100).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/pharmacy/[0-15306).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/procedures_icd/[0-722).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/transfers/[0-1190).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/icustays/[0-140).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/chartevents/[0-668862).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/[0-1468).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/[0-20404).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 16:07:09.405 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:09.405 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", + "2024-12-14 16:07:09.460 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.066957\n", + "2024-12-14 16:07:09.460 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/.0.parquet_cache/locks/2024-12-14T16:07:09.393828.json\n", + "2024-12-14 16:07:09.461 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", + "2024-12-14 16:07:09.461 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:09.461748. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:09.462 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0\n", + "2024-12-14 16:07:09.463 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 16 files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/[0-275).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/[0-4506).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/[0-454).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/emar/[0-35835).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/hcpcsevents/[0-61).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/labevents/[0-107727).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/omr/[0-2964).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/patients/[0-100).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/pharmacy/[0-15306).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/procedures_icd/[0-722).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/transfers/[0-1190).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/icustays/[0-140).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/chartevents/[0-668862).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/[0-1468).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/[0-20404).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 16:07:09.464 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:09.465 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", + "2024-12-14 16:07:09.512 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.050432\n", + "2024-12-14 16:07:09.512 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/.0.parquet_cache/locks/2024-12-14T16:07:09.461748.json\n", + "2024-12-14 16:07:09.512 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", + "2024-12-14 16:07:09.513 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:09.513190. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:09.513 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0\n", + "2024-12-14 16:07:09.514 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 16 files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/[0-275).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/[0-4506).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/[0-454).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/emar/[0-35835).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/hcpcsevents/[0-61).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/labevents/[0-107727).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/omr/[0-2964).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/patients/[0-100).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/pharmacy/[0-15306).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/procedures_icd/[0-722).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/transfers/[0-1190).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/icustays/[0-140).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/chartevents/[0-668862).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/[0-1468).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/[0-20404).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 16:07:09.516 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:09.516 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", + "2024-12-14 16:07:10.123 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.610197\n", + "2024-12-14 16:07:10.123 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/.0.parquet_cache/locks/2024-12-14T16:07:09.513190.json\n", + "2024-12-14 16:07:10.123 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:00.753831\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:10.209\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: extract_code_metadata\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:10.216\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-extract_code_metadata --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=extract_code_metadata\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:11.170\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:11.171\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 16:07:10.771 | INFO | MEDS_transforms.utils:stage_init:73 - Running extract_code_metadata with the following configuration:\n", + "input_dir: ${oc.env:MIMICIV_PRE_MEDS_DIR}\n", + "cohort_dir: ${oc.env:MIMICIV_MEDS_COHORT_DIR}\n", + "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", + " at the top of your specific pipeline\n", + "\n", + " configuration file.'\n", + "log_dir: ${stage_cfg.output_dir}/.logs\n", + "do_overwrite: false\n", + "seed: 1\n", + "stages:\n", + "- shard_events\n", + "- split_and_shard_subjects\n", + "- convert_to_sharded_events\n", + "- merge_to_MEDS_cohort\n", + "- extract_code_metadata\n", + "- finalize_MEDS_metadata\n", + "- finalize_MEDS_data\n", + "stage_configs:\n", + " shard_events:\n", + " row_chunksize: 200000000\n", + " infer_schema_length: 999999999\n", + " data_input_dir: ${input_dir}\n", + " split_and_shard_subjects:\n", + " is_metadata: true\n", + " output_dir: ${cohort_dir}/metadata\n", + " n_subjects_per_shard: 1000\n", + " external_splits_json_fp: null\n", + " split_fracs:\n", + " train: 0.8\n", + " tuning: 0.1\n", + " held_out: 0.1\n", + " convert_to_sharded_events:\n", + " do_dedup_text_and_numeric: true\n", + " merge_to_MEDS_cohort:\n", + " unique_by: '*'\n", + " additional_sort_by: null\n", + " extract_code_metadata:\n", + " is_metadata: true\n", + " description_separator: '\n", + "\n", + " '\n", + " finalize_MEDS_metadata:\n", + " is_metadata: true\n", + " do_retype: true\n", + " finalize_MEDS_data:\n", + " do_retype: true\n", + " split_fracs:\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", + "worker: 0\n", + "polling_time: 300\n", + "stage: extract_code_metadata\n", + "stage_cfg: ${oc.create:${populate_stage:${stage}, ${input_dir}, ${cohort_dir}, ${stages},\n", + " ${stage_configs}}}\n", + "etl_metadata:\n", + " pipeline_name: ???\n", + " dataset_name: MIMIC-IV\n", + " dataset_version: 2.2\n", + " package_name: ${get_package_name:}\n", + " package_version: ${get_package_version:}\n", + "etl_metadata.pipeline_name: extract\n", + "description: \"This pipeline extracts the MIMIC-IV dataset in longitudinal, sparse\\\n", + " \\ form from an input dataset meeting\\nselect criteria and converts them to the flattened,\\\n", + " \\ MEDS format. You can control the key arguments to this\\npipeline by setting environment\\\n", + " \\ variables:\\n```bash\\n export EVENT_CONVERSION_CONFIG_FP=# Path to your event\\\n", + " \\ conversion config\\n export MIMICIV_PRE_MEDS_DIR=# Path to the output dir of the\\\n", + " \\ pre-MEDS step\\n export MIMICIV_MEDS_COHORT_DIR=# Path to where you want the dataset\\\n", + " \\ to live\\n```\"\n", + "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", + "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", + "\n", + "2024-12-14 16:07:10.787 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "is_metadata: true\n", + "description_separator: '\n", + "\n", + " '\n", + "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", + "metadata_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/split_and_shard_subjects\n", + "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata\n", + "reducer_output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata\n", + "train_only: true\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", + " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata\n", + " - metadata_input_dir: ❌ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/split_and_shard_subjects\n", + "2024-12-14 16:07:10.788 | INFO | MEDS_transforms.extract.extract_code_metadata:main:359 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs/event_configs.yaml\n", + "2024-12-14 16:07:10.819 | INFO | MEDS_transforms.extract.extract_code_metadata:main:361 - Event conversion config:\n", + "subject_id_col: subject_id\n", + "hosp/admissions:\n", + " ed_registration:\n", + " code: ED_REGISTRATION\n", + " time: col(edregtime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " ed_out:\n", + " code: ED_OUT\n", + " time: col(edouttime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " admission:\n", + " code:\n", + " - HOSPITAL_ADMISSION\n", + " - col(admission_type)\n", + " - col(admission_location)\n", + " time: col(admittime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " insurance: insurance\n", + " language: language\n", + " marital_status: marital_status\n", + " race: race\n", + " hadm_id: hadm_id\n", + " discharge:\n", + " code:\n", + " - HOSPITAL_DISCHARGE\n", + " - col(discharge_location)\n", + " time: col(dischtime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " hadm_id: hadm_id\n", + "hosp/diagnoses_icd:\n", + " diagnosis:\n", + " code:\n", + " - DIAGNOSIS\n", + " - ICD\n", + " - col(icd_version)\n", + " - col(icd_code)\n", + " hadm_id: hadm_id\n", + " time: col(hadm_discharge_time)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " _metadata:\n", + " hosp/d_icd_diagnoses:\n", + " description: long_title\n", + " parent_codes: ICD{icd_version}CM/{norm_icd_code}\n", + "hosp/drgcodes:\n", + " drg:\n", + " code:\n", + " - DRG\n", + " - col(drg_type)\n", + " - col(drg_code)\n", + " - col(description)\n", + " hadm_id: hadm_id\n", + " time: col(hadm_discharge_time)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " drg_severity: drg_severity\n", + " drg_mortality: drg_mortality\n", + "hosp/emar:\n", + " medication:\n", + " code:\n", + " - MEDICATION\n", + " - col(medication)\n", + " - col(event_txt)\n", + " time: col(charttime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " hadm_id: hadm_id\n", + " emar_id: emar_id\n", + " emar_seq: emar_seq\n", + "hosp/hcpcsevents:\n", + " hcpcs:\n", + " code:\n", + " - HCPCS\n", + " - col(short_description)\n", + " hadm_id: hadm_id\n", + " time: col(chartdate)\n", + " time_format: '%Y-%m-%d'\n", + " _metadata:\n", + " hosp/d_hcpcs:\n", + " description: long_description\n", + " possibly_cpt_code: code\n", + "hosp/labevents:\n", + " lab:\n", + " code:\n", + " - LAB\n", + " - col(itemid)\n", + " - col(valueuom)\n", + " hadm_id: hadm_id\n", + " time: col(charttime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " numeric_value: valuenum\n", + " text_value: value\n", + " priority: priority\n", + " _metadata:\n", + " d_labitems_to_loinc:\n", + " description:\n", + " - omop_concept_name\n", + " - label\n", + " itemid: itemid (omop_source_code)\n", + " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", + " valueuom: valueuom\n", + "hosp/omr:\n", + " omr:\n", + " code: col(result_name)\n", + " text_value: col(result_value)\n", + " time: col(chartdate)\n", + " time_format: '%Y-%m-%d'\n", + "hosp/patients:\n", + " gender:\n", + " code:\n", + " - GENDER\n", + " - col(gender)\n", + " time: null\n", + " dob:\n", + " code: MEDS_BIRTH\n", + " time: col(year_of_birth)\n", + " time_format: '%Y'\n", + " death:\n", + " code: MEDS_DEATH\n", + " time: col(dod)\n", + " time_format:\n", + " - '%Y-%m-%d %H:%M:%S'\n", + " - '%Y-%m-%d'\n", + "hosp/pharmacy:\n", + " medication_start:\n", + " code:\n", + " - MEDICATION\n", + " - START\n", + " - col(medication)\n", + " time: col(starttime)\n", + " route: route\n", + " frequency: frequency\n", + " doses_per_24_hrs: doses_per_24_hrs\n", + " poe_id: poe_id\n", + " time_format:\n", + " - '%Y-%m-%d %H:%M:%S'\n", + " - '%Y-%m-%d'\n", + " medication_stop:\n", + " code:\n", + " - MEDICATION\n", + " - STOP\n", + " - col(medication)\n", + " time: col(stoptime)\n", + " poe_id: poe_id\n", + " time_format:\n", + " - '%Y-%m-%d %H:%M:%S'\n", + " - '%Y-%m-%d'\n", + "hosp/procedures_icd:\n", + " procedure:\n", + " code:\n", + " - PROCEDURE\n", + " - ICD\n", + " - col(icd_version)\n", + " - col(icd_code)\n", + " hadm_id: hadm_id\n", + " time: col(chartdate)\n", + " time_format: '%Y-%m-%d'\n", + " _metadata:\n", + " hosp/d_icd_procedures:\n", + " description: long_title\n", + " parent_codes:\n", + " - ICD{icd_version}Proc/{norm_icd_code}:\n", + " icd_version: '9'\n", + " - ICD{icd_version}PCS/{norm_icd_code}:\n", + " icd_version: '10'\n", + "hosp/transfers:\n", + " transfer:\n", + " code:\n", + " - TRANSFER_TO\n", + " - col(eventtype)\n", + " - col(careunit)\n", + " time: col(intime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " hadm_id: hadm_id\n", + "icu/icustays:\n", + " icu_admission:\n", + " code:\n", + " - ICU_ADMISSION\n", + " - col(first_careunit)\n", + " time: col(intime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " hadm_id: hadm_id\n", + " icustay_id: stay_id\n", + " icu_discharge:\n", + " code:\n", + " - ICU_DISCHARGE\n", + " - col(last_careunit)\n", + " time: col(outtime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " hadm_id: hadm_id\n", + " icustay_id: stay_id\n", + "icu/chartevents:\n", + " event:\n", + " code:\n", + " - LAB\n", + " - col(itemid)\n", + " - col(valueuom)\n", + " time: col(charttime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " numeric_value: valuenum\n", + " text_value: value\n", + " hadm_id: hadm_id\n", + " icustay_id: stay_id\n", + " _metadata:\n", + " d_labitems_to_loinc:\n", + " description:\n", + " - omop_concept_name\n", + " - label\n", + " itemid: itemid (omop_source_code)\n", + " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", + " valueuom: valueuom\n", + "icu/procedureevents:\n", + " start:\n", + " code:\n", + " - PROCEDURE\n", + " - START\n", + " - col(itemid)\n", + " time: col(starttime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " hadm_id: hadm_id\n", + " icustay_id: stay_id\n", + " _metadata:\n", + " proc_datetimeevents:\n", + " description:\n", + " - omop_concept_name\n", + " - label\n", + " itemid: itemid (omop_source_code)\n", + " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", + " proc_itemid:\n", + " description:\n", + " - omop_concept_name\n", + " - label\n", + " itemid: itemid (omop_source_code)\n", + " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", + " end:\n", + " code:\n", + " - PROCEDURE\n", + " - END\n", + " - col(itemid)\n", + " time: col(endtime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " hadm_id: hadm_id\n", + " icustay_id: stay_id\n", + " _metadata:\n", + " proc_datetimeevents:\n", + " description:\n", + " - omop_concept_name\n", + " - label\n", + " itemid: itemid (omop_source_code)\n", + " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", + " proc_itemid:\n", + " description:\n", + " - omop_concept_name\n", + " - label\n", + " itemid: itemid (omop_source_code)\n", + " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", + "icu/inputevents:\n", + " input_start:\n", + " code:\n", + " - INFUSION_START\n", + " - col(itemid)\n", + " time: col(starttime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " hadm_id: hadm_id\n", + " icustay_id: stay_id\n", + " order_id: orderid\n", + " link_order_id: linkorderid\n", + " numeric_value: rate\n", + " unit: rateuom\n", + " ordercategorydescription: ordercategorydescription\n", + " _metadata:\n", + " inputevents_to_rxnorm:\n", + " description:\n", + " - omop_concept_name\n", + " - label\n", + " itemid: itemid (omop_source_code)\n", + " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", + " input_end:\n", + " code:\n", + " - INFUSION_END\n", + " - col(itemid)\n", + " time: col(endtime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " hadm_id: hadm_id\n", + " icustay_id: stay_id\n", + " order_id: orderid\n", + " link_order_id: linkorderid\n", + " numeric_value: amount\n", + " ordercategorydescription: ordercategorydescription\n", + " statusdescription: statusdescription\n", + " unit: amountuom\n", + " _metadata:\n", + " inputevents_to_rxnorm:\n", + " description:\n", + " - omop_concept_name\n", + " - label\n", + " itemid: itemid (omop_source_code)\n", + " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", + " subject_weight:\n", + " code:\n", + " - SUBJECT_WEIGHT_AT_INFUSION\n", + " - KG\n", + " time: col(starttime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " numeric_value: patientweight\n", + "icu/outputevents:\n", + " output:\n", + " code:\n", + " - SUBJECT_FLUID_OUTPUT\n", + " - col(itemid)\n", + " - col(valueuom)\n", + " time: col(charttime)\n", + " time_format: '%Y-%m-%d %H:%M:%S'\n", + " hadm_id: hadm_id\n", + " icustay_id: stay_id\n", + " numeric_value: value\n", + " _metadata:\n", + " outputevents_to_loinc:\n", + " description:\n", + " - omop_concept_name\n", + " - label\n", + " itemid: itemid (omop_source_code)\n", + " valueuom: unitname\n", + " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", + "\n", + "2024-12-14 16:07:10.871 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet\n", + "2024-12-14 16:07:10.871 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_icd_diagnoses.parquet\n", + "2024-12-14 16:07:10.872 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:10.872428. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:10.874 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet\n", + "2024-12-14 16:07:10.874 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/extract_code_metadata.py:184: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + " if metadata_df.schema[mandatory_col] is not mandatory_type:\n", + "2024-12-14 16:07:10.880 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 16:07:10.881 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 16:07:10.881 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_icd_diagnoses.parquet\n", + "2024-12-14 16:07:10.932 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.059924\n", + "2024-12-14 16:07:10.932 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_icd_diagnoses.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_icd_diagnoses.parquet_cache/locks/2024-12-14T16:07:10.872428.json\n", + "2024-12-14 16:07:10.933 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_hcpcs.csv\n", + "2024-12-14 16:07:10.933 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_hcpcs.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_hcpcs.parquet\n", + "2024-12-14 16:07:10.933 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:10.933340. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:10.933 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_hcpcs.csv\n", + "2024-12-14 16:07:10.934 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:10.936 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 16:07:10.936 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_hcpcs.parquet\n", + "2024-12-14 16:07:10.967 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.034517\n", + "2024-12-14 16:07:10.967 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_hcpcs.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_hcpcs.parquet_cache/locks/2024-12-14T16:07:10.933340.json\n", + "2024-12-14 16:07:10.968 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/d_labitems_to_loinc.csv\n", + "2024-12-14 16:07:10.968 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/d_labitems_to_loinc.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/d_labitems_to_loinc.parquet\n", + "2024-12-14 16:07:10.969 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:10.969008. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:10.969 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/d_labitems_to_loinc.csv\n", + "2024-12-14 16:07:10.969 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:10.972 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 16:07:10.972 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 16:07:10.973 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 16:07:10.974 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 16:07:10.974 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/d_labitems_to_loinc.parquet\n", + "2024-12-14 16:07:10.996 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.027039\n", + "2024-12-14 16:07:10.996 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.d_labitems_to_loinc.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.d_labitems_to_loinc.parquet_cache/locks/2024-12-14T16:07:10.969008.json\n", + "2024-12-14 16:07:10.996 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet\n", + "2024-12-14 16:07:10.996 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_icd_procedures.parquet\n", + "2024-12-14 16:07:10.997 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:10.997044. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:10.997 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet\n", + "2024-12-14 16:07:10.997 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:11.001 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 16:07:11.001 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 16:07:11.001 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_icd_procedures.parquet\n", + "2024-12-14 16:07:11.028 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.031820\n", + "2024-12-14 16:07:11.028 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_icd_procedures.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_icd_procedures.parquet_cache/locks/2024-12-14T16:07:10.997044.json\n", + "2024-12-14 16:07:11.029 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/inputevents_to_rxnorm.csv\n", + "2024-12-14 16:07:11.029 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/inputevents_to_rxnorm.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/inputevents_to_rxnorm.parquet\n", + "2024-12-14 16:07:11.029 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:11.029852. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:11.030 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/inputevents_to_rxnorm.csv\n", + "2024-12-14 16:07:11.030 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:11.032 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 16:07:11.032 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 16:07:11.033 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 16:07:11.033 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 16:07:11.033 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/inputevents_to_rxnorm.parquet\n", + "2024-12-14 16:07:11.044 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.014199\n", + "2024-12-14 16:07:11.044 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.inputevents_to_rxnorm.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.inputevents_to_rxnorm.parquet_cache/locks/2024-12-14T16:07:11.029852.json\n", + "2024-12-14 16:07:11.044 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_itemid.csv\n", + "2024-12-14 16:07:11.044 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_itemid.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/proc_itemid.parquet\n", + "2024-12-14 16:07:11.045 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:11.045049. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:11.045 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_itemid.csv\n", + "2024-12-14 16:07:11.045 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:11.047 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 16:07:11.047 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 16:07:11.048 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 16:07:11.048 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 16:07:11.048 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/proc_itemid.parquet\n", + "2024-12-14 16:07:11.056 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.011186\n", + "2024-12-14 16:07:11.056 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.proc_itemid.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.proc_itemid.parquet_cache/locks/2024-12-14T16:07:11.045049.json\n", + "2024-12-14 16:07:11.056 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/outputevents_to_loinc.csv\n", + "2024-12-14 16:07:11.056 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/outputevents_to_loinc.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/outputevents_to_loinc.parquet\n", + "2024-12-14 16:07:11.057 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:11.057011. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:11.057 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/outputevents_to_loinc.csv\n", + "2024-12-14 16:07:11.057 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:11.059 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 16:07:11.059 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 16:07:11.059 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/outputevents_to_loinc.parquet\n", + "2024-12-14 16:07:11.061 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.004929\n", + "2024-12-14 16:07:11.062 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.outputevents_to_loinc.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.outputevents_to_loinc.parquet_cache/locks/2024-12-14T16:07:11.057011.json\n", + "2024-12-14 16:07:11.062 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_datetimeevents.csv\n", + "2024-12-14 16:07:11.062 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_datetimeevents.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/proc_datetimeevents.parquet\n", + "2024-12-14 16:07:11.063 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:11.062871. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:11.063 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_datetimeevents.csv\n", + "2024-12-14 16:07:11.063 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:11.065 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 16:07:11.065 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 16:07:11.066 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 16:07:11.066 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 16:07:11.066 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/proc_datetimeevents.parquet\n", + "2024-12-14 16:07:11.072 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.009717\n", + "2024-12-14 16:07:11.072 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.proc_datetimeevents.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.proc_datetimeevents.parquet_cache/locks/2024-12-14T16:07:11.062871.json\n", + "2024-12-14 16:07:11.072 | INFO | MEDS_transforms.extract.extract_code_metadata:main:398 - Extracted metadata for all events. Merging.\n", + "2024-12-14 16:07:11.072 | INFO | MEDS_transforms.extract.extract_code_metadata:main:404 - Starting reduction process\n", + "2024-12-14 16:07:11.072 | INFO | MEDS_transforms.extract.extract_code_metadata:main:412 - All map shards complete! Starting code metadata reduction computation.\n", + "2024-12-14 16:07:11.089 | INFO | MEDS_transforms.extract.extract_code_metadata:main:424 - Collected metadata for 2661 unique codes among 42898 total observations.\n", + "2024-12-14 16:07:11.124 | INFO | MEDS_transforms.extract.extract_code_metadata:main:449 - Finished reduction in 0:00:00.051941\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:11.175\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: finalize_MEDS_metadata\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:11.221\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-finalize_MEDS_metadata --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=finalize_MEDS_metadata\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:12.072\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:12.072\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 16:07:11.807 | INFO | MEDS_transforms.utils:stage_init:73 - Running finalize_MEDS_metadata with the following configuration:\n", + "input_dir: ${oc.env:MIMICIV_PRE_MEDS_DIR}\n", + "cohort_dir: ${oc.env:MIMICIV_MEDS_COHORT_DIR}\n", + "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", + " at the top of your specific pipeline\n", + "\n", + " configuration file.'\n", + "log_dir: ${stage_cfg.output_dir}/.logs\n", + "do_overwrite: false\n", + "seed: 1\n", + "stages:\n", + "- shard_events\n", + "- split_and_shard_subjects\n", + "- convert_to_sharded_events\n", + "- merge_to_MEDS_cohort\n", + "- extract_code_metadata\n", + "- finalize_MEDS_metadata\n", + "- finalize_MEDS_data\n", + "stage_configs:\n", + " shard_events:\n", + " row_chunksize: 200000000\n", + " infer_schema_length: 999999999\n", + " data_input_dir: ${input_dir}\n", + " split_and_shard_subjects:\n", + " is_metadata: true\n", + " output_dir: ${cohort_dir}/metadata\n", + " n_subjects_per_shard: 1000\n", + " external_splits_json_fp: null\n", + " split_fracs:\n", + " train: 0.8\n", + " tuning: 0.1\n", + " held_out: 0.1\n", + " convert_to_sharded_events:\n", + " do_dedup_text_and_numeric: true\n", + " merge_to_MEDS_cohort:\n", + " unique_by: '*'\n", + " additional_sort_by: null\n", + " extract_code_metadata:\n", + " is_metadata: true\n", + " description_separator: '\n", + "\n", + " '\n", + " finalize_MEDS_metadata:\n", + " is_metadata: true\n", + " do_retype: true\n", + " finalize_MEDS_data:\n", + " do_retype: true\n", + " split_fracs:\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", + "worker: 0\n", + "polling_time: 300\n", + "stage: finalize_MEDS_metadata\n", + "stage_cfg: ${oc.create:${populate_stage:${stage}, ${input_dir}, ${cohort_dir}, ${stages},\n", + " ${stage_configs}}}\n", + "etl_metadata:\n", + " pipeline_name: ???\n", + " dataset_name: MIMIC-IV\n", + " dataset_version: 2.2\n", + " package_name: ${get_package_name:}\n", + " package_version: ${get_package_version:}\n", + "etl_metadata.pipeline_name: extract\n", + "description: \"This pipeline extracts the MIMIC-IV dataset in longitudinal, sparse\\\n", + " \\ form from an input dataset meeting\\nselect criteria and converts them to the flattened,\\\n", + " \\ MEDS format. You can control the key arguments to this\\npipeline by setting environment\\\n", + " \\ variables:\\n```bash\\n export EVENT_CONVERSION_CONFIG_FP=# Path to your event\\\n", + " \\ conversion config\\n export MIMICIV_PRE_MEDS_DIR=# Path to the output dir of the\\\n", + " \\ pre-MEDS step\\n export MIMICIV_MEDS_COHORT_DIR=# Path to where you want the dataset\\\n", + " \\ to live\\n```\"\n", + "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", + "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", + "\n", + "2024-12-14 16:07:11.825 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "is_metadata: true\n", + "do_retype: true\n", + "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", + "metadata_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata\n", + "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/finalize_MEDS_metadata\n", + "reducer_output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata\n", + "train_only: true\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", + " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/finalize_MEDS_metadata\n", + " - metadata_input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata\n", + "2024-12-14 16:07:11.830 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:170 - Validating code metadata\n", + "2024-12-14 16:07:11.830 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:173 - Reading code metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/codes.parquet\n", + "2024-12-14 16:07:12.007 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:183 - Writing finalized metadata df to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata/codes.parquet\n", + "2024-12-14 16:07:12.016 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:187 - Creating dataset metadata\n", + "2024-12-14 16:07:12.019 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:199 - Writing finalized dataset metadata to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata/dataset.json\n", + "2024-12-14 16:07:12.019 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:204 - Creating subject splits from {str(shards_map_fp.resolve())}\n", + "2024-12-14 16:07:12.019 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split train has 80 subjects\n", + "2024-12-14 16:07:12.020 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split tuning has 10 subjects\n", + "2024-12-14 16:07:12.020 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split held_out has 10 subjects\n", + "2024-12-14 16:07:12.022 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:222 - Writing finalized subject splits to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata/subject_splits.parquet\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:12.073\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: finalize_MEDS_data\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:12.079\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-finalize_MEDS_data --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=finalize_MEDS_data\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:13.112\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:07:13.112\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 16:07:12.623 | INFO | MEDS_transforms.utils:stage_init:73 - Running finalize_MEDS_data with the following configuration:\n", + "input_dir: ${oc.env:MIMICIV_PRE_MEDS_DIR}\n", + "cohort_dir: ${oc.env:MIMICIV_MEDS_COHORT_DIR}\n", + "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", + " at the top of your specific pipeline\n", + "\n", + " configuration file.'\n", + "log_dir: ${stage_cfg.output_dir}/.logs\n", + "do_overwrite: false\n", + "seed: 1\n", + "stages:\n", + "- shard_events\n", + "- split_and_shard_subjects\n", + "- convert_to_sharded_events\n", + "- merge_to_MEDS_cohort\n", + "- extract_code_metadata\n", + "- finalize_MEDS_metadata\n", + "- finalize_MEDS_data\n", + "stage_configs:\n", + " shard_events:\n", + " row_chunksize: 200000000\n", + " infer_schema_length: 999999999\n", + " data_input_dir: ${input_dir}\n", + " split_and_shard_subjects:\n", + " is_metadata: true\n", + " output_dir: ${cohort_dir}/metadata\n", + " n_subjects_per_shard: 1000\n", + " external_splits_json_fp: null\n", + " split_fracs:\n", + " train: 0.8\n", + " tuning: 0.1\n", + " held_out: 0.1\n", + " convert_to_sharded_events:\n", + " do_dedup_text_and_numeric: true\n", + " merge_to_MEDS_cohort:\n", + " unique_by: '*'\n", + " additional_sort_by: null\n", + " extract_code_metadata:\n", + " is_metadata: true\n", + " description_separator: '\n", + "\n", + " '\n", + " finalize_MEDS_metadata:\n", + " is_metadata: true\n", + " do_retype: true\n", + " finalize_MEDS_data:\n", + " do_retype: true\n", + " split_fracs:\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", + "worker: 0\n", + "polling_time: 300\n", + "stage: finalize_MEDS_data\n", + "stage_cfg: ${oc.create:${populate_stage:${stage}, ${input_dir}, ${cohort_dir}, ${stages},\n", + " ${stage_configs}}}\n", + "etl_metadata:\n", + " pipeline_name: ???\n", + " dataset_name: MIMIC-IV\n", + " dataset_version: 2.2\n", + " package_name: ${get_package_name:}\n", + " package_version: ${get_package_version:}\n", + "etl_metadata.pipeline_name: extract\n", + "description: \"This pipeline extracts the MIMIC-IV dataset in longitudinal, sparse\\\n", + " \\ form from an input dataset meeting\\nselect criteria and converts them to the flattened,\\\n", + " \\ MEDS format. You can control the key arguments to this\\npipeline by setting environment\\\n", + " \\ variables:\\n```bash\\n export EVENT_CONVERSION_CONFIG_FP=# Path to your event\\\n", + " \\ conversion config\\n export MIMICIV_PRE_MEDS_DIR=# Path to the output dir of the\\\n", + " \\ pre-MEDS step\\n export MIMICIV_MEDS_COHORT_DIR=# Path to where you want the dataset\\\n", + " \\ to live\\n```\"\n", + "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", + "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", + "\n", + "2024-12-14 16:07:12.640 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "do_retype: true\n", + "is_metadata: false\n", + "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", + "metadata_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata\n", + "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", + "reducer_output_dir: null\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", + " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", + " - metadata_input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata\n", + "2024-12-14 16:07:12.658 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator:600 - Mapping computation over a maximum of 3 shards\n", + "2024-12-14 16:07:12.667 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\n", + "2024-12-14 16:07:12.668 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:12.668203. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:12.669 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", + "2024-12-14 16:07:12.670 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:12.762 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\n", + "2024-12-14 16:07:12.777 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.109013\n", + "2024-12-14 16:07:12.777 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/.0.parquet_cache/locks/2024-12-14T16:07:12.668203.json\n", + "2024-12-14 16:07:12.777 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\n", + "2024-12-14 16:07:12.778 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:12.777977. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:12.778 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", + "2024-12-14 16:07:12.778 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:12.783 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\n", + "2024-12-14 16:07:12.802 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.024486\n", + "2024-12-14 16:07:12.802 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/.0.parquet_cache/locks/2024-12-14T16:07:12.777977.json\n", + "2024-12-14 16:07:12.802 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\n", + "2024-12-14 16:07:12.803 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:12.803208. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:07:12.803 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", + "2024-12-14 16:07:12.803 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:07:12.858 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\n", + "2024-12-14 16:07:13.051 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.248693\n", + "2024-12-14 16:07:13.052 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/.0.parquet_cache/locks/2024-12-14T16:07:12.803208.json\n", + "2024-12-14 16:07:13.052 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:00.411314\n", + "\u001b[0m\n" + ] + } + ], + "source": [ + "# Convert to MEDS\n", + "TUTORIAL_DIR = f\"{ROOT_DIR}/MIMIC-IV_Example\"\n", + "MIMICIV_RAW_DIR = f\"{ROOT_DIR}/raw_data/mimic-iv-demo/2.2\"\n", + "MIMICIV_PRE_MEDS_DIR = f\"{ROOT_DIR}/pre_meds/\"\n", + "MIMICIV_MEDS_DIR = f\"{ROOT_DIR}/meds/\"\n", + "\n", + "EVENT_CONVERSION_CONFIG_FP=f\"{ROOT_DIR}/MIMIC-IV_Example/configs/event_config.yaml\"\n", + "PIPELINE_CONFIG_PATH=f\"{ROOT_DIR}/MIMIC-IV_Example/configs/pipeline_config.yaml\"\n", + "!echo {TUTORIAL_DIR}\n", + "!cd {TUTORIAL_DIR} && bash {TUTORIAL_DIR}/run.sh {MIMICIV_RAW_DIR} {MIMICIV_PRE_MEDS_DIR} {MIMICIV_MEDS_DIR} do_unzip=true" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 443 + }, + "id": "u2f6socuWhjd", + "outputId": "289bc4ae-e374-4ed1-fd98-58c803f14e26" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (916_166, 4)
subject_idtimecodenumeric_value
i64datetime[μs]strf32
10012853null"GENDER//F"null
100128532084-01-01 00:00:00"MEDS_BIRTH"null
100128532175-04-05 00:00:00"Height (Inches)"null
100128532175-04-05 00:00:00"Weight (Lbs)"null
100128532175-04-05 00:00:00"BMI (kg/m2)"null
100398312116-01-09 11:00:00"LAB//50983//mEq/L"134.0
100398312116-01-09 11:00:00"LAB//50882//mEq/L"25.0
100398312116-01-09 11:00:00"LAB//50863//IU/L"112.0
100398312116-01-09 11:00:00"LAB//51237//UNK"3.3
100398312116-01-09 11:00:00"LAB//51274//sec"33.0
" + ], + "text/plain": [ + "shape: (916_166, 4)\n", + "┌────────────┬─────────────────────┬───────────────────┬───────────────┐\n", + "│ subject_id ┆ time ┆ code ┆ numeric_value │\n", + "│ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ datetime[μs] ┆ str ┆ f32 │\n", + "╞════════════╪═════════════════════╪═══════════════════╪═══════════════╡\n", + "│ 10012853 ┆ null ┆ GENDER//F ┆ null │\n", + "│ 10012853 ┆ 2084-01-01 00:00:00 ┆ MEDS_BIRTH ┆ null │\n", + "│ 10012853 ┆ 2175-04-05 00:00:00 ┆ Height (Inches) ┆ null │\n", + "│ 10012853 ┆ 2175-04-05 00:00:00 ┆ Weight (Lbs) ┆ null │\n", + "│ 10012853 ┆ 2175-04-05 00:00:00 ┆ BMI (kg/m2) ┆ null │\n", + "│ … ┆ … ┆ … ┆ … │\n", + "│ 10039831 ┆ 2116-01-09 11:00:00 ┆ LAB//50983//mEq/L ┆ 134.0 │\n", + "│ 10039831 ┆ 2116-01-09 11:00:00 ┆ LAB//50882//mEq/L ┆ 25.0 │\n", + "│ 10039831 ┆ 2116-01-09 11:00:00 ┆ LAB//50863//IU/L ┆ 112.0 │\n", + "│ 10039831 ┆ 2116-01-09 11:00:00 ┆ LAB//51237//UNK ┆ 3.3 │\n", + "│ 10039831 ┆ 2116-01-09 11:00:00 ┆ LAB//51274//sec ┆ 33.0 │\n", + "└────────────┴─────────────────────┴───────────────────┴───────────────┘" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#@title Examine MEDS data\n", + "\n", + "import polars as pl\n", + "data = pl.read_parquet(f'{ROOT_DIR}/meds/data/**/*.parquet')\n", + "\n", + "data[['subject_id', 'time', 'code', 'numeric_value']]" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 479 + }, + "id": "CZD9xpSxd1Wp", + "outputId": "ea758e42-b71d-464f-f931-df7eec7a4415" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/fm/gwbmjrfx6gxg6x7xxcxbnrth0000gn/T/ipykernel_61345/1492712476.py:5: DeprecationWarning: `GroupBy.count` is deprecated. It has been renamed to `len`.\n", + " icd10_events.group_by('code').count().sort('count', descending=True)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "shape: (734, 2)
codecount
stru32
"DIAGNOSIS//ICD//10//E785"57
"DIAGNOSIS//ICD//10//E039"47
"DIAGNOSIS//ICD//10//Z794"37
"DIAGNOSIS//ICD//10//Z87891"35
"DIAGNOSIS//ICD//10//I2510"33
"DIAGNOSIS//ICD//10//M4856XA"1
"DIAGNOSIS//ICD//10//Z800"1
"DIAGNOSIS//ICD//10//M720"1
"DIAGNOSIS//ICD//10//Y92121"1
"DIAGNOSIS//ICD//10//T383X1A"1
" + ], + "text/plain": [ + "shape: (734, 2)\n", + "┌─────────────────────────────┬───────┐\n", + "│ code ┆ count │\n", + "│ --- ┆ --- │\n", + "│ str ┆ u32 │\n", + "╞═════════════════════════════╪═══════╡\n", + "│ DIAGNOSIS//ICD//10//E785 ┆ 57 │\n", + "│ DIAGNOSIS//ICD//10//E039 ┆ 47 │\n", + "│ DIAGNOSIS//ICD//10//Z794 ┆ 37 │\n", + "│ DIAGNOSIS//ICD//10//Z87891 ┆ 35 │\n", + "│ DIAGNOSIS//ICD//10//I2510 ┆ 33 │\n", + "│ … ┆ … │\n", + "│ DIAGNOSIS//ICD//10//M4856XA ┆ 1 │\n", + "│ DIAGNOSIS//ICD//10//Z800 ┆ 1 │\n", + "│ DIAGNOSIS//ICD//10//M720 ┆ 1 │\n", + "│ DIAGNOSIS//ICD//10//Y92121 ┆ 1 │\n", + "│ DIAGNOSIS//ICD//10//T383X1A ┆ 1 │\n", + "└─────────────────────────────┴───────┘" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#@title A Simple Polars Analysis\n", + "\n", + "icd10_events = data.filter(pl.col('code').str.starts_with('DIAGNOSIS//ICD//10//'))\n", + "\n", + "icd10_events.group_by('code').count().sort('count', descending=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 443 + }, + "id": "u7EXKCZelho-", + "outputId": "27e81b86-1195-4c6c-f7c7-993665b826d7" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (2_661, 6)
codedescriptionparent_codespossibly_cpt_codeitemidvalueuom
strstrlist[str]list[str]list[str]list[str]
"DIAGNOSIS//ICD//9//7916""Acetonuria"["ICD9CM/791.6"][null][null][null]
"INFUSION_START//229654""amiodarone Injection"["RxNorm/1663223"][null]["229654"][null]
"DIAGNOSIS//ICD//9//82021""Closed fracture of intertrocha…["ICD9CM/820.21"][null][null][null]
"DIAGNOSIS//ICD//10//S22068A""Other fracture of T7-T8 thorac…["ICD10CM/S22.068A"][null][null][null]
"DIAGNOSIS//ICD//9//9585""Traumatic anuria"["ICD9CM/958.5"][null][null][null]
"LAB//51307//UNK""CD13 cells/100 cells in Bone m…["LOINC/51237-6"][null]["51307"][null]
"PROCEDURE//ICD//9//3722""Left heart cardiac catheteriza…["ICD9Proc/37.22"][null][null][null]
"DIAGNOSIS//ICD//9//53560""Duodenitis, without mention of…["ICD9CM/535.60"][null][null][null]
"PROCEDURE//ICD//9//9671""Continuous invasive mechanical…["ICD9Proc/96.71"][null][null][null]
"DIAGNOSIS//ICD//10//Y92129""Unspecified place in nursing h…["ICD10CM/Y92.129"][null][null][null]
" + ], + "text/plain": [ + "shape: (2_661, 6)\n", + "┌──────────────────┬──────────────────┬─────────────────┬─────────────────┬────────────┬───────────┐\n", + "│ code ┆ description ┆ parent_codes ┆ possibly_cpt_co ┆ itemid ┆ valueuom │\n", + "│ --- ┆ --- ┆ --- ┆ de ┆ --- ┆ --- │\n", + "│ str ┆ str ┆ list[str] ┆ --- ┆ list[str] ┆ list[str] │\n", + "│ ┆ ┆ ┆ list[str] ┆ ┆ │\n", + "╞══════════════════╪══════════════════╪═════════════════╪═════════════════╪════════════╪═══════════╡\n", + "│ DIAGNOSIS//ICD// ┆ Acetonuria ┆ [\"ICD9CM/791.6\" ┆ [null] ┆ [null] ┆ [null] │\n", + "│ 9//7916 ┆ ┆ ] ┆ ┆ ┆ │\n", + "│ INFUSION_START// ┆ amiodarone ┆ [\"RxNorm/166322 ┆ [null] ┆ [\"229654\"] ┆ [null] │\n", + "│ 229654 ┆ Injection ┆ 3\"] ┆ ┆ ┆ │\n", + "│ DIAGNOSIS//ICD// ┆ Closed fracture ┆ [\"ICD9CM/820.21 ┆ [null] ┆ [null] ┆ [null] │\n", + "│ 9//82021 ┆ of intertrocha… ┆ \"] ┆ ┆ ┆ │\n", + "│ DIAGNOSIS//ICD// ┆ Other fracture ┆ [\"ICD10CM/S22.0 ┆ [null] ┆ [null] ┆ [null] │\n", + "│ 10//S22068A ┆ of T7-T8 thorac… ┆ 68A\"] ┆ ┆ ┆ │\n", + "│ DIAGNOSIS//ICD// ┆ Traumatic anuria ┆ [\"ICD9CM/958.5\" ┆ [null] ┆ [null] ┆ [null] │\n", + "│ 9//9585 ┆ ┆ ] ┆ ┆ ┆ │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ LAB//51307//UNK ┆ CD13 cells/100 ┆ [\"LOINC/51237-6 ┆ [null] ┆ [\"51307\"] ┆ [null] │\n", + "│ ┆ cells in Bone m… ┆ \"] ┆ ┆ ┆ │\n", + "│ PROCEDURE//ICD// ┆ Left heart ┆ [\"ICD9Proc/37.2 ┆ [null] ┆ [null] ┆ [null] │\n", + "│ 9//3722 ┆ cardiac ┆ 2\"] ┆ ┆ ┆ │\n", + "│ ┆ catheteriza… ┆ ┆ ┆ ┆ │\n", + "│ DIAGNOSIS//ICD// ┆ Duodenitis, ┆ [\"ICD9CM/535.60 ┆ [null] ┆ [null] ┆ [null] │\n", + "│ 9//53560 ┆ without mention ┆ \"] ┆ ┆ ┆ │\n", + "│ ┆ of… ┆ ┆ ┆ ┆ │\n", + "│ PROCEDURE//ICD// ┆ Continuous ┆ [\"ICD9Proc/96.7 ┆ [null] ┆ [null] ┆ [null] │\n", + "│ 9//9671 ┆ invasive ┆ 1\"] ┆ ┆ ┆ │\n", + "│ ┆ mechanical… ┆ ┆ ┆ ┆ │\n", + "│ DIAGNOSIS//ICD// ┆ Unspecified ┆ [\"ICD10CM/Y92.1 ┆ [null] ┆ [null] ┆ [null] │\n", + "│ 10//Y92129 ┆ place in nursing ┆ 29\"] ┆ ┆ ┆ │\n", + "│ ┆ h… ┆ ┆ ┆ ┆ │\n", + "└──────────────────┴──────────────────┴─────────────────┴─────────────────┴────────────┴───────────┘" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pl.read_parquet(f\"{ROOT_DIR}/meds/metadata/codes.parquet\")\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/\n" + ] + } + ], + "source": [ + "#@title Download E-ICU demo\n", + "import tempfile\n", + "import os\n", + "from pathlib import Path\n", + "temp_dir = tempfile.TemporaryDirectory()\n", + "notebook_dir = os.getcwd()\n", + "\n", + "ROOT_DIR=f\"{notebook_dir}/work_dir/eicu_demo/\"\n", + "Path(ROOT_DIR).mkdir(parents=True, exist_ok=True)\n", + "\n", + "!echo {ROOT_DIR}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Download pre-meds script, event config (defining how raw data is converted to meds data), and meds-transform config\n", + "!mkdir {ROOT_DIR}/meds-transform/\n", + "!git clone --depth 1 https://github.com/mmcdermott/MEDS_transforms.git {ROOT_DIR}/tmp/\n", + "!mv {ROOT_DIR}/tmp/eICU_Example {ROOT_DIR}/eICU_Example\n", + "# Override configs, we remove the column apneaparms and the whole infusionDrug table as they are not in the demo\n", + "!cp configs/extract_eICU.yaml {ROOT_DIR}/eICU_Example/configs/.\n", + "!cp configs/table_preprocessors.yaml {ROOT_DIR}/eICU_Example/configs/.\n", + "!cp configs/event_configs.yaml {ROOT_DIR}/eICU_Example/configs/." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "!wget -q -r -N -c --no-host-directories --cut-dirs=1 -np -P {ROOT_DIR}/raw_data https://physionet.org/files/eicu-crd-demo/2.0.1/" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo//eICU_Example\n", + "Note that eICU has a lot more observations per subject than does MIMIC-IV, so to keep to a reasonable \n", + "memory burden (e.g., < 150GB per worker), you will want a smaller shard size, as well as to turn off \n", + "the final unique check (which should not be necessary given the structure of eICU and is expensive) \n", + "in the merge stage. You can do this by setting the following parameters at the end of the mandatory \n", + "args when running this script:\n", + " * stage_configs.split_and_shard_subjects.n_subjects_per_shard=10000\n", + " * stage_configs.merge_to_MEDS_cohort.unique_by=null\n", + "Additionally, consider reducing N_PARALLEL_WORKERS if > 1\n", + "Skipping unzipping.\n", + "Running pre-MEDS conversion.\n", + "\u001b[32m2024-12-14 16:18:06.881\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m278\u001b[0m - \u001b[1mLoading table preprocessors from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs/table_preprocessors.yaml...\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:06.900\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for admissionDx:\n", + "offset_col: admitdxenteredoffset\n", + "pseudotime_col: admitDxEnteredTimestamp\n", + "output_data_cols:\n", + "- admitdxname\n", + "- admissiondxid\n", + "warning_items:\n", + "- How should we use `admitdxtest`?\n", + "- How should we use `admitdxpath`?\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:06.900\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for allergy:\n", + "offset_col: allergyenteredoffset\n", + "pseudotime_col: allergyEnteredTimestamp\n", + "output_data_cols:\n", + "- allergytype\n", + "- allergyname\n", + "warning_items:\n", + "- How should we use `allergyNoteType`?\n", + "- How should we use `specialtyType`?\n", + "- How should we use `userType`?\n", + "- Is `drugName` the name of the drug to which the patient is allergic or the drug\n", + " given to the patient (docs say 'name of the selected admission drug')?\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:06.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for carePlanGeneral:\n", + "offset_col: cplitemoffset\n", + "pseudotime_col: carePlanGeneralItemEnteredTimestamp\n", + "output_data_cols:\n", + "- cplgroup\n", + "- cplitemvalue\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:06.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for carePlanEOL:\n", + "offset_col: cpleoldiscussionoffset\n", + "pseudotime_col: carePlanEolDiscussionOccurredTimestamp\n", + "warning_items:\n", + "- Is the DiscussionOffset time actually reliable? Should we fall back on the SaveOffset\n", + " time?\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:06.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for carePlanGoal:\n", + "offset_col: cplgoaloffset\n", + "pseudotime_col: carePlanGoalEnteredTimestamp\n", + "output_data_cols:\n", + "- cplgoalcategory\n", + "- cplgoalvalue\n", + "- cplgoalstatus\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:06.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for carePlanInfectiousDisease:\n", + "offset_col: cplinfectdiseaseoffset\n", + "pseudotime_col: carePlanInfectDiseaseEnteredTimestamp\n", + "output_data_cols:\n", + "- infectdiseasesite\n", + "- infectdiseaseassessment\n", + "- responsetotherapy\n", + "- treatment\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:06.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for diagnosis:\n", + "offset_col: diagnosisoffset\n", + "pseudotime_col: diagnosisEnteredTimestamp\n", + "output_data_cols:\n", + "- icd9code\n", + "- diagnosispriority\n", + "- diagnosisstring\n", + "warning_items:\n", + "- Though we use it, the `diagnosisString` field documentation is unclear -- by what\n", + " is it separated?\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:06.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for infusionDrug:\n", + "offset_col: infusionoffset\n", + "pseudotime_col: infusionEnteredTimestamp\n", + "output_data_cols:\n", + "- infusiondrugid\n", + "- drugname\n", + "- drugrate\n", + "- infusionrate\n", + "- drugamount\n", + "- volumeoffluid\n", + "- patientweight\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:06.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for lab:\n", + "offset_col: labresultoffset\n", + "pseudotime_col: labResultDrawnTimestamp\n", + "output_data_cols:\n", + "- labname\n", + "- labresult\n", + "- labresulttext\n", + "- labmeasurenamesystem\n", + "- labmeasurenameinterface\n", + "- labtypeid\n", + "warning_items:\n", + "- Is this the time the lab was drawn? Entered? The time the result came in?\n", + "- We **IGNORE** the `labResultRevisedOffset` column -- this may be a mistake!\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:06.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for medication:\n", + "offset_col:\n", + "- drugorderoffset\n", + "- drugstartoffset\n", + "- drugstopoffset\n", + "pseudotime_col:\n", + "- drugordertimestamp\n", + "- drugstarttimestamp\n", + "- drugstoptimestamp\n", + "output_data_cols:\n", + "- medicationid\n", + "- drugivadmixture\n", + "- drugname\n", + "- drughiclseqno\n", + "- dosage\n", + "- routeadmin\n", + "- frequency\n", + "- loadingdose\n", + "- prn\n", + "- gtc\n", + "warning_items:\n", + "- We **IGNORE** the `drugOrderCancelled` column -- this may be a mistake!\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:06.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for nurseAssessment:\n", + "offset_col:\n", + "- nurseassessoffset\n", + "- nurseassessentryoffset\n", + "pseudotime_col:\n", + "- nurseAssessPerformedTimestamp\n", + "- nurseAssessEnteredTimestamp\n", + "output_data_cols:\n", + "- nurseassessid\n", + "- celllabel\n", + "- cellattribute\n", + "- cellattributevalue\n", + "warning_items:\n", + "- Should we be using `cellAttributePath` instead of `cellAttribute`?\n", + "- SOME MAY BE LISTS\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:06.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for nurseCare:\n", + "offset_col:\n", + "- nursecareoffset\n", + "- nursecareentryoffset\n", + "pseudotime_col:\n", + "- nurseCarePerformedTimestamp\n", + "- nurseCareEnteredTimestamp\n", + "output_data_cols:\n", + "- nursecareid\n", + "- celllabel\n", + "- cellattribute\n", + "- cellattributevalue\n", + "warning_items:\n", + "- Should we be using `cellAttributePath` instead of `cellAttribute`?\n", + "- SOME MAY BE LISTS\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:06.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for nurseCharting:\n", + "offset_col:\n", + "- nursingchartoffset\n", + "- nursingchartentryoffset\n", + "pseudotime_col:\n", + "- nursingChartPerformedTimestamp\n", + "- nursingChartEnteredTimestamp\n", + "output_data_cols:\n", + "- nursingchartid\n", + "- nursingchartcelltypecat\n", + "- nursingchartcelltypevalname\n", + "- nursingchartcelltypevallabel\n", + "- nursingchartvalue\n", + "warning_items:\n", + "- SOME MAY BE LISTS\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:06.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for pastHistory:\n", + "offset_col:\n", + "- pasthistoryoffset\n", + "- pasthistoryenteredoffset\n", + "pseudotime_col:\n", + "- pastHistoryTakenTimestamp\n", + "- pastHistoryEnteredTimestamp\n", + "output_data_cols:\n", + "- pasthistoryid\n", + "- pasthistorynotetype\n", + "- pasthistorypath\n", + "- pasthistoryvalue\n", + "- pasthistoryvaluetext\n", + "warning_items:\n", + "- SOME MAY BE LISTS\n", + "- How should we use `pastHistoryPath` vs. `pastHistoryNoteType`?\n", + "- How should we use `pastHistoryValue` vs. `pastHistoryValueText`?\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:06.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for physicalExam:\n", + "offset_col: physicalexamoffset\n", + "pseudotime_col: physicalExamEnteredTimestamp\n", + "output_data_cols:\n", + "- physicalexamid\n", + "- physicalexamtext\n", + "- physicalexampath\n", + "- physicalexamvalue\n", + "warning_items:\n", + "- How should we use `physicalExamValue` vs. `physicalExamText`?\n", + "- I believe the `physicalExamValue` is a **LIST**. This must be processed specially.\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:06.906\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for respiratoryCare:\n", + "offset_col:\n", + "- respcarestatusoffset\n", + "- ventstartoffset\n", + "- ventendoffset\n", + "pseudotime_col:\n", + "- respCareStatusEnteredTimestamp\n", + "- ventStartTimestamp\n", + "- ventEndTimestamp\n", + "output_data_cols:\n", + "- respcareid\n", + "- airwaytype\n", + "- airwaysize\n", + "- airwayposition\n", + "- cuffpressure\n", + "- lowexhmvlimit\n", + "- hiexhmvlimit\n", + "- lowexhtvlimit\n", + "- hipeakpreslimit\n", + "- lowpeakpreslimit\n", + "- hirespratelimit\n", + "- lowrespratelimit\n", + "- sighpreslimit\n", + "- lowironoxlimit\n", + "- highironoxlimit\n", + "- meanairwaypreslimit\n", + "- peeplimit\n", + "- cpaplimit\n", + "- setapneainterval\n", + "- setapneatv\n", + "- setapneaippeephigh\n", + "- setapnearr\n", + "- setapneapeakflow\n", + "- setapneainsptime\n", + "- setapneaie\n", + "- setapneafio2\n", + "warning_items:\n", + "- We ignore the `priorVent*` columns -- this may be a mistake!\n", + "- There is a lot of data in this table -- what should be incorporated into the event\n", + " structure?\n", + "- We might be able to use `priorVent` timestamps to further refine true season of\n", + " unit admission.\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:06.907\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for respiratoryCharting:\n", + "offset_col:\n", + "- respchartoffset\n", + "- respchartentryoffset\n", + "pseudotime_col:\n", + "- respChartPerformedTimestamp\n", + "- respChartEnteredTimestamp\n", + "output_data_cols:\n", + "- respchartid\n", + "- respcharttypecat\n", + "- respchartvaluelabel\n", + "- respchartvalue\n", + "warning_items:\n", + "- SOME MAY BE LISTS\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:06.907\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for treatment:\n", + "offset_col: treatmentoffset\n", + "pseudotime_col: treatmentEnteredTimestamp\n", + "output_data_cols:\n", + "- treatmentid\n", + "- treatmentstring\n", + "warning_items:\n", + "- Absence of entries in table do not indicate absence of treatments\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:06.908\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for vitalAperiodic:\n", + "offset_col: observationoffset\n", + "pseudotime_col: observationEnteredTimestamp\n", + "output_data_cols:\n", + "- vitalaperiodicid\n", + "- noninvasivesystolic\n", + "- noninvasivediastolic\n", + "- noninvasivemean\n", + "- paop\n", + "- cardiacoutput\n", + "- cardiacinput\n", + "- svr\n", + "- svri\n", + "- pvr\n", + "- pvri\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:06.908\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for vitalPeriodic:\n", + "offset_col: observationoffset\n", + "pseudotime_col: observationEnteredTimestamp\n", + "output_data_cols:\n", + "- vitalperiodicid\n", + "- temperature\n", + "- sao2\n", + "- heartrate\n", + "- respiration\n", + "- cvp\n", + "- etco2\n", + "- systemicsystolic\n", + "- systemicdiastolic\n", + "- systemicmean\n", + "- pasystolic\n", + "- padiastolic\n", + "- pamean\n", + "- st1\n", + "- st2\n", + "- st3\n", + "- icp\n", + "warning_items:\n", + "- These are 5-minute median values. There are going to be a *lot* of events.\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:06.909\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m291\u001b[0m - \u001b[1mReloading processed patient df from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet\u001b[0m\n", + "Done with diagnosis. Continuing\n", + "Done with vitalAperiodic. Continuing\n", + "Done with admissionDx. Continuing\n", + "Done with respiratoryCare. Continuing\n", + "Done with nurseAssessment. Continuing\n", + "Done with vitalPeriodic. Continuing\n", + "Done with carePlanGeneral. Continuing\n", + "Done with carePlanGoal. Continuing\n", + "Done with treatment. Continuing\n", + "\u001b[32m2024-12-14 16:18:07.013\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping apacheApsVar as it is not supported in this pipeline.\u001b[0m\n", + "Done with carePlanEOL. Continuing\n", + "\u001b[32m2024-12-14 16:18:07.013\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m331\u001b[0m - \u001b[33m\u001b[1mNo function needed for infusiondrug. For eICU, THIS IS UNEXPECTED\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:07.013\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping carePlanCareProvider as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:07.013\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping microLab as it is not supported in this pipeline.\u001b[0m\n", + "Done with nurseCare. Continuing\n", + "Done with physicalExam. Continuing\n", + "Done with respiratoryCharting. Continuing\n", + "\u001b[32m2024-12-14 16:18:07.013\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping note as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:07.013\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping admissiondrug as it is not supported in this pipeline.\u001b[0m\n", + "Done with lab. Continuing\n", + "\u001b[32m2024-12-14 16:18:07.013\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping apachePredVar as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:07.013\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping customLab as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:07.013\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping apachePatientResult as it is not supported in this pipeline.\u001b[0m\n", + "Done with carePlanInfectiousDisease. Continuing\n", + "Done with allergy. Continuing\n", + "Done with nurseCharting. Continuing\n", + "Done with pastHistory. Continuing\n", + "Done with medication. Continuing\n", + "\u001b[32m2024-12-14 16:18:07.014\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping intakeOutput as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:07.014\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m352\u001b[0m - \u001b[1mDone! All dataframes processed and written to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds\u001b[0m\n", + "Setting N_WORKERS to 1 to avoid issues with the runners.\n", + "Running extraction pipeline.\n", + "\u001b[32m2024-12-14 16:18:07.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m324\u001b[0m - \u001b[1mSkipping stage shard_events as it is already complete.\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:07.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m324\u001b[0m - \u001b[1mSkipping stage split_and_shard_subjects as it is already complete.\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:07.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m324\u001b[0m - \u001b[1mSkipping stage convert_to_sharded_events as it is already complete.\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:07.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: merge_to_MEDS_cohort\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:07.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-merge_to_MEDS_cohort --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=merge_to_MEDS_cohort\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:53.120\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:53.121\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 16:18:08.184 | INFO | MEDS_transforms.utils:stage_init:73 - Running merge_to_MEDS_cohort with the following configuration:\n", + "input_dir: ${oc.env:EICU_PRE_MEDS_DIR}\n", + "cohort_dir: ${oc.env:EICU_MEDS_COHORT_DIR}\n", + "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", + " at the top of your specific pipeline\n", + "\n", + " configuration file.'\n", + "log_dir: ${stage_cfg.output_dir}/.logs\n", + "do_overwrite: false\n", + "seed: 1\n", + "stages:\n", + "- shard_events\n", + "- split_and_shard_subjects\n", + "- convert_to_sharded_events\n", + "- merge_to_MEDS_cohort\n", + "- finalize_MEDS_metadata\n", + "- finalize_MEDS_data\n", + "stage_configs:\n", + " shard_events:\n", + " row_chunksize: 200000000\n", + " infer_schema_length: 999999999\n", + " data_input_dir: ${input_dir}\n", + " split_and_shard_subjects:\n", + " is_metadata: true\n", + " output_dir: ${cohort_dir}/metadata\n", + " n_subjects_per_shard: 10000\n", + " external_splits_json_fp: null\n", + " split_fracs:\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", + " convert_to_sharded_events:\n", + " do_dedup_text_and_numeric: true\n", + " merge_to_MEDS_cohort:\n", + " unique_by: null\n", + " additional_sort_by: null\n", + " extract_code_metadata:\n", + " is_metadata: true\n", + " description_separator: '\n", + "\n", + " '\n", + " finalize_MEDS_metadata:\n", + " is_metadata: true\n", + " do_retype: true\n", + " finalize_MEDS_data:\n", + " do_retype: true\n", + "worker: 0\n", + "polling_time: 300\n", + "stage: merge_to_MEDS_cohort\n", + "stage_cfg: ${oc.create:${populate_stage:${stage}, ${input_dir}, ${cohort_dir}, ${stages},\n", + " ${stage_configs}}}\n", + "etl_metadata:\n", + " pipeline_name: ???\n", + " dataset_name: eICU\n", + " dataset_version: 2.0\n", + " package_name: ${get_package_name:}\n", + " package_version: ${get_package_version:}\n", + "etl_metadata.pipeline_name: extract\n", + "description: \"This pipeline extracts the eICU dataset in longitudinal, sparse form\\\n", + " \\ from an input dataset meeting\\nselect criteria and converts them to the flattened,\\\n", + " \\ MEDS format. You can control the key arguments to this\\npipeline by setting environment\\\n", + " \\ variables:\\n```bash\\n export EVENT_CONVERSION_CONFIG_FP=# Path to your event\\\n", + " \\ conversion config\\n export EICU_PRE_MEDS_DIR=# Path to the output dir of the\\\n", + " \\ pre-MEDS step\\n export EICU_MEDS_COHORT_DIR=# Path to where you want the dataset\\\n", + " \\ to live\\n```\"\n", + "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", + "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", + "\n", + "2024-12-14 16:18:08.199 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "unique_by: null\n", + "additional_sort_by: null\n", + "is_metadata: false\n", + "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events\n", + "metadata_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects\n", + "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort\n", + "reducer_output_dir: null\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events\n", + " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort\n", + " - metadata_input_dir: ❌ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects\n", + "2024-12-14 16:18:08.214 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator_by_shard_map:687 - Mapping computation over a maximum of 3 shards\n", + "2024-12-14 16:18:08.222 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", + "2024-12-14 16:18:08.224 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:302 - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet is in progress as of 2024-12-14 16:17:18.058267. Returning.\n", + "2024-12-14 16:18:08.224 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", + "2024-12-14 16:18:08.224 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:18:08.224594. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:18:08.225 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0\n", + "2024-12-14 16:18:08.226 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 20 files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/patient/[0-2520).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/admissionDx/[0-7578).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/allergy/[0-2475).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGeneral/[0-33148).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanEOL/[0-15).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGoal/[0-3633).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanInfectiousDisease/[0-112).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/diagnosis/[0-24978).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/lab/[0-434660).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/medication/[0-75604).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseAssessment/[0-91589).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCare/[0-42080).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCharting/[0-1477163).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/pastHistory/[0-12109).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/physicalExam/[0-84058).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCare/[0-5436).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCharting/[0-176089).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/treatment/[0-38290).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalAperiodic/[0-274088).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 16:18:08.236 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:18:08.237 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", + "2024-12-14 16:18:20.068 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:11.843473\n", + "2024-12-14 16:18:20.068 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/.0.parquet_cache/locks/2024-12-14T16:18:08.224594.json\n", + "2024-12-14 16:18:20.069 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", + "2024-12-14 16:18:20.070 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:18:20.070463. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:18:20.070 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0\n", + "2024-12-14 16:18:20.072 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 20 files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/patient/[0-2520).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/admissionDx/[0-7578).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/allergy/[0-2475).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGeneral/[0-33148).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanEOL/[0-15).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGoal/[0-3633).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanInfectiousDisease/[0-112).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/diagnosis/[0-24978).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/lab/[0-434660).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/medication/[0-75604).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseAssessment/[0-91589).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCare/[0-42080).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCharting/[0-1477163).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/pastHistory/[0-12109).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/physicalExam/[0-84058).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCare/[0-5436).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCharting/[0-176089).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/treatment/[0-38290).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalAperiodic/[0-274088).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 16:18:20.083 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:18:20.083 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", + "2024-12-14 16:18:52.696 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:32.625813\n", + "2024-12-14 16:18:52.698 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/.0.parquet_cache/locks/2024-12-14T16:18:20.070463.json\n", + "2024-12-14 16:18:52.698 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:44.499135\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:53.124\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: finalize_MEDS_metadata\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:53.176\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-finalize_MEDS_metadata --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=finalize_MEDS_metadata\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:53.928\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:53.928\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 16:18:53.861 | INFO | MEDS_transforms.utils:stage_init:73 - Running finalize_MEDS_metadata with the following configuration:\n", + "input_dir: ${oc.env:EICU_PRE_MEDS_DIR}\n", + "cohort_dir: ${oc.env:EICU_MEDS_COHORT_DIR}\n", + "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", + " at the top of your specific pipeline\n", + "\n", + " configuration file.'\n", + "log_dir: ${stage_cfg.output_dir}/.logs\n", + "do_overwrite: false\n", + "seed: 1\n", + "stages:\n", + "- shard_events\n", + "- split_and_shard_subjects\n", + "- convert_to_sharded_events\n", + "- merge_to_MEDS_cohort\n", + "- finalize_MEDS_metadata\n", + "- finalize_MEDS_data\n", + "stage_configs:\n", + " shard_events:\n", + " row_chunksize: 200000000\n", + " infer_schema_length: 999999999\n", + " data_input_dir: ${input_dir}\n", + " split_and_shard_subjects:\n", + " is_metadata: true\n", + " output_dir: ${cohort_dir}/metadata\n", + " n_subjects_per_shard: 10000\n", + " external_splits_json_fp: null\n", + " split_fracs:\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", + " convert_to_sharded_events:\n", + " do_dedup_text_and_numeric: true\n", + " merge_to_MEDS_cohort:\n", + " unique_by: null\n", + " additional_sort_by: null\n", + " extract_code_metadata:\n", + " is_metadata: true\n", + " description_separator: '\n", + "\n", + " '\n", + " finalize_MEDS_metadata:\n", + " is_metadata: true\n", + " do_retype: true\n", + " finalize_MEDS_data:\n", + " do_retype: true\n", + "worker: 0\n", + "polling_time: 300\n", + "stage: finalize_MEDS_metadata\n", + "stage_cfg: ${oc.create:${populate_stage:${stage}, ${input_dir}, ${cohort_dir}, ${stages},\n", + " ${stage_configs}}}\n", + "etl_metadata:\n", + " pipeline_name: ???\n", + " dataset_name: eICU\n", + " dataset_version: 2.0\n", + " package_name: ${get_package_name:}\n", + " package_version: ${get_package_version:}\n", + "etl_metadata.pipeline_name: extract\n", + "description: \"This pipeline extracts the eICU dataset in longitudinal, sparse form\\\n", + " \\ from an input dataset meeting\\nselect criteria and converts them to the flattened,\\\n", + " \\ MEDS format. You can control the key arguments to this\\npipeline by setting environment\\\n", + " \\ variables:\\n```bash\\n export EVENT_CONVERSION_CONFIG_FP=# Path to your event\\\n", + " \\ conversion config\\n export EICU_PRE_MEDS_DIR=# Path to the output dir of the\\\n", + " \\ pre-MEDS step\\n export EICU_MEDS_COHORT_DIR=# Path to where you want the dataset\\\n", + " \\ to live\\n```\"\n", + "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", + "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", + "\n", + "2024-12-14 16:18:53.877 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "is_metadata: true\n", + "do_retype: true\n", + "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort\n", + "metadata_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects\n", + "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/finalize_MEDS_metadata\n", + "reducer_output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata\n", + "train_only: true\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort\n", + " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/finalize_MEDS_metadata\n", + " - metadata_input_dir: ❌ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects\n", + "2024-12-14 16:18:53.881 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:170 - Validating code metadata\n", + "2024-12-14 16:18:53.881 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:179 - No code metadata found at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects/codes.parquet. Making empty metadata file.\n", + "2024-12-14 16:18:53.883 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:183 - Writing finalized metadata df to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata/codes.parquet\n", + "2024-12-14 16:18:53.885 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:187 - Creating dataset metadata\n", + "2024-12-14 16:18:53.887 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:199 - Writing finalized dataset metadata to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata/dataset.json\n", + "2024-12-14 16:18:53.887 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:204 - Creating subject splits from {str(shards_map_fp.resolve())}\n", + "2024-12-14 16:18:53.888 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split train has 1087 subjects\n", + "2024-12-14 16:18:53.888 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split tuning has 544 subjects\n", + "2024-12-14 16:18:53.888 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split held_out has 543 subjects\n", + "2024-12-14 16:18:53.889 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:222 - Writing finalized subject splits to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata/subject_splits.parquet\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:53.929\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: finalize_MEDS_data\u001b[0m\n", + "\u001b[32m2024-12-14 16:18:53.936\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-finalize_MEDS_data --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=finalize_MEDS_data\u001b[0m\n", + "\u001b[32m2024-12-14 16:19:11.520\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 16:19:11.521\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 16:18:54.479 | INFO | MEDS_transforms.utils:stage_init:73 - Running finalize_MEDS_data with the following configuration:\n", + "input_dir: ${oc.env:EICU_PRE_MEDS_DIR}\n", + "cohort_dir: ${oc.env:EICU_MEDS_COHORT_DIR}\n", + "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", + " at the top of your specific pipeline\n", + "\n", + " configuration file.'\n", + "log_dir: ${stage_cfg.output_dir}/.logs\n", + "do_overwrite: false\n", + "seed: 1\n", + "stages:\n", + "- shard_events\n", + "- split_and_shard_subjects\n", + "- convert_to_sharded_events\n", + "- merge_to_MEDS_cohort\n", + "- finalize_MEDS_metadata\n", + "- finalize_MEDS_data\n", + "stage_configs:\n", + " shard_events:\n", + " row_chunksize: 200000000\n", + " infer_schema_length: 999999999\n", + " data_input_dir: ${input_dir}\n", + " split_and_shard_subjects:\n", + " is_metadata: true\n", + " output_dir: ${cohort_dir}/metadata\n", + " n_subjects_per_shard: 10000\n", + " external_splits_json_fp: null\n", + " split_fracs:\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", + " convert_to_sharded_events:\n", + " do_dedup_text_and_numeric: true\n", + " merge_to_MEDS_cohort:\n", + " unique_by: null\n", + " additional_sort_by: null\n", + " extract_code_metadata:\n", + " is_metadata: true\n", + " description_separator: '\n", + "\n", + " '\n", + " finalize_MEDS_metadata:\n", + " is_metadata: true\n", + " do_retype: true\n", + " finalize_MEDS_data:\n", + " do_retype: true\n", + "worker: 0\n", + "polling_time: 300\n", + "stage: finalize_MEDS_data\n", + "stage_cfg: ${oc.create:${populate_stage:${stage}, ${input_dir}, ${cohort_dir}, ${stages},\n", + " ${stage_configs}}}\n", + "etl_metadata:\n", + " pipeline_name: ???\n", + " dataset_name: eICU\n", + " dataset_version: 2.0\n", + " package_name: ${get_package_name:}\n", + " package_version: ${get_package_version:}\n", + "etl_metadata.pipeline_name: extract\n", + "description: \"This pipeline extracts the eICU dataset in longitudinal, sparse form\\\n", + " \\ from an input dataset meeting\\nselect criteria and converts them to the flattened,\\\n", + " \\ MEDS format. You can control the key arguments to this\\npipeline by setting environment\\\n", + " \\ variables:\\n```bash\\n export EVENT_CONVERSION_CONFIG_FP=# Path to your event\\\n", + " \\ conversion config\\n export EICU_PRE_MEDS_DIR=# Path to the output dir of the\\\n", + " \\ pre-MEDS step\\n export EICU_MEDS_COHORT_DIR=# Path to where you want the dataset\\\n", + " \\ to live\\n```\"\n", + "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", + "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", + "\n", + "2024-12-14 16:18:54.495 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "do_retype: true\n", + "is_metadata: false\n", + "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort\n", + "metadata_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata\n", + "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data\n", + "reducer_output_dir: null\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort\n", + " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data\n", + " - metadata_input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata\n", + "2024-12-14 16:18:54.511 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator:600 - Mapping computation over a maximum of 2 shards\n", + "2024-12-14 16:18:54.519 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/tuning/0.parquet\n", + "2024-12-14 16:18:54.520 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:18:54.520021. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:18:54.521 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", + "2024-12-14 16:18:54.522 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:18:56.160 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/tuning/0.parquet\n", + "2024-12-14 16:18:59.125 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:04.605576\n", + "2024-12-14 16:18:59.125 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/tuning/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/tuning/.0.parquet_cache/locks/2024-12-14T16:18:54.520021.json\n", + "2024-12-14 16:18:59.331 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/train/0.parquet\n", + "2024-12-14 16:18:59.332 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:18:59.331903. Double checking no earlier locks have been registered.\n", + "2024-12-14 16:18:59.332 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", + "2024-12-14 16:18:59.332 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 16:19:03.318 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/train/0.parquet\n", + "2024-12-14 16:19:10.540 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:11.208637\n", + "2024-12-14 16:19:10.540 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/train/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/train/.0.parquet_cache/locks/2024-12-14T16:18:59.331903.json\n", + "2024-12-14 16:19:11.376 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:16.881477\n", + "\u001b[0m\n" + ] + } + ], + "source": [ + "# Convert to MEDS\n", + "TUTORIAL_DIR = f\"{ROOT_DIR}/eICU_Example\"\n", + "MIMICIV_RAW_DIR = f\"{ROOT_DIR}/raw_data/eicu-crd-demo/2.0.1\"\n", + "MIMICIV_PRE_MEDS_DIR = f\"{ROOT_DIR}/pre_meds/\"\n", + "MIMICIV_MEDS_DIR = f\"{ROOT_DIR}/meds/\"\n", + "\n", + "EVENT_CONVERSION_CONFIG_FP=f\"{ROOT_DIR}/eICU_Example/configs/event_config.yaml\"\n", + "PIPELINE_CONFIG_PATH=f\"{ROOT_DIR}/eICU_Example/configs/pipeline_config.yaml\"\n", + "!echo {TUTORIAL_DIR}\n", + "!cd {TUTORIAL_DIR} && bash run.sh {MIMICIV_RAW_DIR} {MIMICIV_PRE_MEDS_DIR} {MIMICIV_MEDS_DIR} do_unzip=true" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (25_221_384, 4)
subject_idtimecodenumeric_value
i64datetime[μs]strf32
129391null"GENDER//Female"null
129391null"ETHNICITY//Caucasian"null
1293911929-06-29 23:44:00"MEDS_BIRTH"null
1293911929-07-01 13:14:00"MEDS_BIRTH"null
1293912015-12-29 20:34:00"NURSE_CHARTING//PERFORMED//NOT…null
27362412015-12-31 13:00:00"NURSE_CHARTING//ENTERED//NOT Y…null
27362412015-12-31 13:00:00"RESP_CARE//STATUS//NOT YET DON…null
27362412015-12-31 19:29:00"HOSPITAL_DISCHARGE//Alive//Hom…null
27362412015-12-31 23:43:00"MEDICATION//STOPPED//MORPHINE …null
27362412016-01-01 13:25:00"MEDICATION//STOPPED//KETOROLAC…null
" + ], + "text/plain": [ + "shape: (25_221_384, 4)\n", + "┌────────────┬─────────────────────┬─────────────────────────────────┬───────────────┐\n", + "│ subject_id ┆ time ┆ code ┆ numeric_value │\n", + "│ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ datetime[μs] ┆ str ┆ f32 │\n", + "╞════════════╪═════════════════════╪═════════════════════════════════╪═══════════════╡\n", + "│ 129391 ┆ null ┆ GENDER//Female ┆ null │\n", + "│ 129391 ┆ null ┆ ETHNICITY//Caucasian ┆ null │\n", + "│ 129391 ┆ 1929-06-29 23:44:00 ┆ MEDS_BIRTH ┆ null │\n", + "│ 129391 ┆ 1929-07-01 13:14:00 ┆ MEDS_BIRTH ┆ null │\n", + "│ 129391 ┆ 2015-12-29 20:34:00 ┆ NURSE_CHARTING//PERFORMED//NOT… ┆ null │\n", + "│ … ┆ … ┆ … ┆ … │\n", + "│ 2736241 ┆ 2015-12-31 13:00:00 ┆ NURSE_CHARTING//ENTERED//NOT Y… ┆ null │\n", + "│ 2736241 ┆ 2015-12-31 13:00:00 ┆ RESP_CARE//STATUS//NOT YET DON… ┆ null │\n", + "│ 2736241 ┆ 2015-12-31 19:29:00 ┆ HOSPITAL_DISCHARGE//Alive//Hom… ┆ null │\n", + "│ 2736241 ┆ 2015-12-31 23:43:00 ┆ MEDICATION//STOPPED//MORPHINE … ┆ null │\n", + "│ 2736241 ┆ 2016-01-01 13:25:00 ┆ MEDICATION//STOPPED//KETOROLAC… ┆ null │\n", + "└────────────┴─────────────────────┴─────────────────────────────────┴───────────────┘" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#@title Examine MEDS data\n", + "\n", + "import polars as pl\n", + "data = pl.read_parquet(f'{ROOT_DIR}/meds/data/**/*.parquet')\n", + "\n", + "data[['subject_id', 'time', 'code', 'numeric_value']]" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.8" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/demo/meds_cehrbert.py b/demo/meds_cehrbert.py new file mode 100644 index 0000000..de1c37f --- /dev/null +++ b/demo/meds_cehrbert.py @@ -0,0 +1,397 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.16.4 +# kernelspec: +# display_name: Python 3 +# name: python3 +# --- + +# %% [Colab-only] Switch Colab to python 3.12 +# !sudo apt-get install python3.12 python3.12-venv +# import sys +# !python3.12 -m venv meds_env +# import os +# os.environ['PATH'] = '/content/meds_env/bin:' + os.environ['PATH'] +# !pip install --upgrade pip + +# # Then in a new code cell: +# import sys +# sys.executable = '/content/meds_env/bin/python' + +# # Confirm python version is 3.12 +# !python --version + +# %% [markdown] +# ## Install dependencies + +# %% +!pip -q install meds_etl==0.3.6 meds_transforms==0.0.7 + +# %% [markdown] +# # Download MIMIC-IV demo + +# %% +# macOS users should install wget (e.g. through brew) +!wget -q -r -N -c --no-host-directories --cut-dirs=1 -np -P ./content/download https://physionet.org/files/mimic-iv-demo/2.2/ + +# %% +# Download pre-meds script, event config (defining how raw data is converted to meds data), and meds-transform config +!mkdir -p ./content/meds-transform/ +!git clone --depth 1 https://github.com/mmcdermott/MEDS_transforms.git ./content/tmp/ +!mv ./content/tmp/MIMIC-IV_Example ./content/MIMIC-IV_Example + +# %% +# Download MIMIC IV metadata +MIMICIV_RAW_DIR = "https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map" +MIMICIV_PRE_MEDS_DIR = "./content/pre_meds/" +!mkdir {MIMICIV_PRE_MEDS_DIR} + +OUTPUT_DIR = "./content/download/mimic-iv-demo/2.2" + +files = [ + 'd_labitems_to_loinc.csv', + 'inputevents_to_rxnorm.csv', + 'lab_itemid_to_loinc.csv', + 'meas_chartevents_main.csv', + 'meas_chartevents_value.csv', + 'numerics-summary.csv', + 'outputevents_to_loinc.csv', + 'proc_datetimeevents.csv', + 'proc_itemid.csv', + 'waveforms-summary.csv' +] + +for file in files: + !wget -O {OUTPUT_DIR}/{file} {MIMICIV_RAW_DIR}/{file} + !wget -O {MIMICIV_PRE_MEDS_DIR}/{file} {MIMICIV_RAW_DIR}/{file} + +# %% +# Convert to MEDS +CURRENT_DIR = !pwd +CURRENT_DIR = CURRENT_DIR[0] + +# %% +# Convert to MEDS +TUTORIAL_DIR = CURRENT_DIR + "/content/MIMIC-IV_Example" +MIMICIV_RAW_DIR = CURRENT_DIR + "/content/download/mimic-iv-demo/2.2" +MIMICIV_PRE_MEDS_DIR = CURRENT_DIR + "/content/pre_meds" +MIMICIV_MEDS_DIR = CURRENT_DIR + "/content/meds" + +EVENT_CONVERSION_CONFIG_FP = CURRENT_DIR + "/content/MIMIC-IV_Example/configs/event_config.yaml" +PIPELINE_CONFIG_PATH = CURRENT_DIR + "/content/MIMIC-IV_Example/configs/pipeline_config.yaml" +!cd {TUTORIAL_DIR} && ./run.sh {MIMICIV_RAW_DIR} {MIMICIV_PRE_MEDS_DIR} {MIMICIV_MEDS_DIR} do_unzip=true + +# %% [markdown] +# # Examine MEDS data + +# %% +import polars as pl + +data = pl.read_parquet('./content/meds/data/**/*.parquet') +data[['subject_id', 'time', 'code', 'numeric_value']] + +# %% [markdown] +# # A simple Polars analysis + +# %% +icd10_events = data.filter(pl.col('code').str.starts_with('DIAGNOSIS//ICD//10//')) +icd10_events.group_by('code').count().sort('count', descending=True) + +# %% +df = pl.read_parquet("./content/meds/metadata/codes.parquet") +df + +# %% [markdown] +# ## Using an example MEDS tool, ACES for labeling + +# %% [markdown] +# ## Install ACES + +# %% +!pip install es-aces + +# %% + +# From ACES documentation +task_config = """ +description: >- + This file specifies the base configuration for the prediction of a hospital los being greater than 3days, + leveraging only the first 48 hours of data after admission, with a 24 hour gap between the input window + and the target window. Patients who die or are discharged in the gap window are excluded. Note that this + task is in-**hospital** los, not in-**ICU** los which is a different task. + +predicates: + hospital_admission: + code: {regex: "HOSPITAL_ADMISSION//.*"} + hospital_discharge: + code: {regex: "HOSPITAL_DISCHARGE//.*"} + death: + code: MEDS_DEATH + discharge_or_death: + expr: or(hospital_discharge, death) + +trigger: hospital_admission + +windows: + input: + start: NULL + end: trigger + 48h + start_inclusive: True + end_inclusive: True + index_timestamp: end + gap: + start: input.end + end: start + 24h + start_inclusive: False + end_inclusive: True + has: + hospital_admission: (None, 0) + discharge_or_death: (None, 0) + target: + start: trigger + end: start + 3d + start_inclusive: False + end_inclusive: True + label: discharge_or_death +""" + +!mkdir ./content/tasks/ -p +TASK_NAME = "in_hospital_3d_los_after_48h" +TASK_CONFIG_FP = f"./content/tasks/{TASK_NAME}.yaml" +with open(TASK_CONFIG_FP, 'w') as f: + f.write(task_config) + + +# %% +!pip install es-aces + +# %% +!echo $TASK_NAME +!echo $TASK_CONFIG_FP + +# %% +!aces-cli --multirun data=sharded data.standard=meds data.root="$MIMICIV_MEDS_DIR/data" "data.shard=$(expand_shards ./content/meds/data/)" cohort_dir=" ./content/tasks" cohort_name="$TASK_NAME" config_path="$TASK_CONFIG_FP" + +# %% +# TODO: reimporting polars due to dependencies? +import polars as pl + +# Execute query and get results +df = pl.read_parquet(f"./content/tasks/{TASK_NAME}/**/*.parquet") + +print("train prevalence: " + str(round(pl.read_parquet(f"./content/tasks/{TASK_NAME}/train/*.parquet")['boolean_value'].mean(), 3))) +print("tuning prevalence: " + str(round(pl.read_parquet(f"./content/tasks/{TASK_NAME}/tuning/*.parquet")['boolean_value'].mean(), 3))) +print("held_out prevalence: " + str(round(pl.read_parquet(f"./content/tasks/{TASK_NAME}/held_out/*.parquet")['boolean_value'].mean(), 3))) + + +df.sort('boolean_value') + +# %% [markdown] +# ## Switch Colab to python 3.11 for cehrbert +# %% +# %%capture +# !sudo apt-get install python3.11 python3.11-venv +# import sys +# !python3.11 -m venv cehrbert +# import os +# os.environ['PATH'] = './content/cehrbert/bin:' + os.environ['PATH'] +# !pip install --upgrade pip + +# %% +# import sys +# sys.executable = './content/cehrbert/bin/python' + +# %% [markdown] +# ## Install cehrbert and its dependencies + +# %% +!pip install meds_reader==0.1.9 +!pip install setuptools +!pip install cehrbert==1.3.1 + +# %% +MIMICIV_MEDS_DIR = "./content/meds/" +MIMICIV_MEDS_READER_DIR = "./content/meds_reader/" +TASK_DIR="./content/tasks/" +TASK_NAME="in_hospital_3d_los_after_48h" +OUTPUT_PRETRAIN_MODEL_DIR="./content/output/cehrbert/" +# TODO this variable has an identical name? +OUTPUT_PRETRAIN_MODEL_DIR="./content/output/cehrbert_finetuned/" + +# %% [markdown] +# Run meds_reader on the MEDS data + +# %% +!meds_reader_convert $MIMICIV_MEDS_DIR $MIMICIV_MEDS_READER_DIR + +# %% +!mkdir -p ./content/output/cehrbert/ +!mkdir -p ./content/output/cehrbert_dataset_prepared/ +!mkdir -p ./content/output/cehrbert_finetuned/ + +# %% +# !mkdir ./content/github_repo;cd ./content/github_repo;git clone https://github.com/cumc-dbmi/cehrbert.git;cd cehrbert;git checkout fix/meds_evaluation;pip install .; + +# %% [markdown] +# Create the cehrbert pretraining configuration yaml file + +# %% +cehrbert_pretrain_config = """ +#Model arguments +model_name_or_path: "./content/output/cehrbert/" +tokenizer_name_or_path: "./content/output/cehrbert/" +num_hidden_layers: 6 +max_position_embeddings: 1024 +hidden_size: 768 +vocab_size: 100000 +min_frequency: 50 +include_value_prediction: false # additional CEHR-BERT learning objective + +#Data arguments +data_folder: "./content/meds_reader/" +dataset_prepared_path: "./content/output/cehrbert_dataset_prepared/" + +# Below is a list of Med-to-CehrBert related arguments +preprocessing_num_workers: 2 +preprocessing_batch_size: 128 +# if is_data_in_med is false, it assumes the data is in the cehrbert format +is_data_in_meds: true +att_function_type: "cehr_bert" +inpatient_att_function_type: "mix" +include_auxiliary_token: true +include_demographic_prompt: false +# if the data is in the meds format, the validation split will be omitted +# as the meds already provide train/tuning/held_out splits +validation_split_percentage: 0.05 + +# Huggingface Arguments +dataloader_num_workers: 2 +dataloader_prefetch_factor: 2 + +overwrite_output_dir: false +resume_from_checkpoint: # automatically infer the latest checkpoint from the output folder +seed: 42 + +output_dir: "./content/output/cehrbert/" +evaluation_strategy: "epoch" +save_strategy: "epoch" +eval_accumulation_steps: 10 + +learning_rate: 0.00005 +per_device_train_batch_size: 8 +per_device_eval_batch_size: 8 +gradient_accumulation_steps: 2 + +num_train_epochs: 50 # for large datasets, 5-10 epochs should suffice +warmup_steps: 10 +weight_decay: 0.01 +logging_dir: "./logs" +logging_steps: 10 + +save_total_limit: +load_best_model_at_end: true +metric_for_best_model: "eval_loss" +greater_is_better: false + +report_to: "none" +""" +PRETRAIN_CONFIG_FP = f"./content/output/cehrbert/cehrbert_pretrain_config.yaml" +with open(PRETRAIN_CONFIG_FP, 'w') as f: + f.write(cehrbert_pretrain_config) + +# %% [markdown] +# ## Pretrain cehrbert using MLM +!python3.11 -m cehrbert.runners.hf_cehrbert_pretrain_runner ./content/output/cehrbert/cehrbert_pretrain_config.yaml + +# %% [markdown] +# ## Create the cehrbert finetuning configuration yaml file +cehrbert_finetune_config = f""" +#Model arguments +model_name_or_path: "./content/output/cehrbert/" +tokenizer_name_or_path: "./content/output/cehrbert/" +num_hidden_layers: 6 +max_position_embeddings: 1024 +hidden_size: 768 +vocab_size: 100000 +min_frequency: 50 +include_value_prediction: false # additional CEHR-BERT learning objective + +#Data arguments +cohort_folder: "./content/tasks/{TASK_NAME}/" +data_folder: "./content/meds_reader/" +dataset_prepared_path: "./content/output/cehrbert_dataset_prepared/" + +#LORA +use_lora: True +lora_rank: 64 +lora_alpha: 16 +target_modules: [ "query", "value" ] +lora_dropout: 0.1 + +# Below is a list of Med-to-CehrBert related arguments +preprocessing_num_workers: 2 +preprocessing_batch_size: 128 +# if is_data_in_med is false, it assumes the data is in the cehrbert format +is_data_in_meds: true +att_function_type: "cehr_bert" +inpatient_att_function_type: "mix" +include_auxiliary_token: true +include_demographic_prompt: false +# if the data is in the meds format, the validation split will be omitted +# as the meds already provide train/tuning/held_out splits +validation_split_percentage: 0.05 + +# Huggingface Arguments +dataloader_num_workers: 2 +dataloader_prefetch_factor: 2 + +overwrite_output_dir: false +resume_from_checkpoint: # automatically infer the latest checkpoint from the output folder +seed: 42 + +output_dir: "./content/output/cehrbert_finetuned" +evaluation_strategy: "epoch" +save_strategy: "epoch" +eval_accumulation_steps: 10 + +do_train: True +do_predict: True + +learning_rate: 0.00005 +per_device_train_batch_size: 8 +per_device_eval_batch_size: 8 +gradient_accumulation_steps: 2 + +num_train_epochs: 10 +warmup_steps: 10 +weight_decay: 0.01 +logging_dir: "./logs" +logging_steps: 10 + +save_total_limit: +load_best_model_at_end: true +metric_for_best_model: "eval_loss" +greater_is_better: false + +report_to: "none" +""" +FINETUNE_CONFIG_FP = f"./content/output/cehrbert/cehrbert_finetune_config.yaml" +with open(FINETUNE_CONFIG_FP, 'w') as f: + f.write(cehrbert_finetune_config) + +# %% +# ## Finetune cehrbert for the downstream task +!python3.11 -m cehrbert.runners.hf_cehrbert_finetune_runner ./content/output/cehrbert/cehrbert_finetune_config.yaml + +# %% +import pandas as pd + +pd.read_parquet("./content/output/cehrbert_finetuned/test_predictions") + +# %% +!cat ./content/output/cehrbert_finetuned/test_results.json diff --git a/demo/meds_tab.ipynb b/demo/meds_tab.ipynb new file mode 100644 index 0000000..cde4110 --- /dev/null +++ b/demo/meds_tab.ipynb @@ -0,0 +1,285 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "PZmYRVX2W8m7" + }, + "source": [ + "# Using an example MEDS tool, ACES for labeling" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "collapsed": true, + "id": "z3_pG9YAWpKy", + "outputId": "efa4c286-413d-4a91-a53d-fb41769cd4f2" + }, + "outputs": [], + "source": [ + "#@title Install ACES\n", + "\n", + "\n", + "!pip install es-aces" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "H6fqe217XDhi" + }, + "outputs": [], + "source": [ + "# From the ACES documentation\n", + "\n", + "task_config = \"\"\"\n", + "description: >-\n", + " This file specifies the base configuration for the prediction of a hospital los being greater than 3days,\n", + " leveraging only the first 48 hours of data after admission, with a 24 hour gap between the input window\n", + " and the target window. Patients who die or are discharged in the gap window are excluded. Note that this\n", + " task is in-**hospital** los, not in-**ICU** los which is a different task.\n", + "\n", + "predicates:\n", + " hospital_admission:\n", + " code: {regex: \"HOSPITAL_ADMISSION//.*\"}\n", + " hospital_discharge:\n", + " code: {regex: \"HOSPITAL_DISCHARGE//.*\"}\n", + " death:\n", + " code: MEDS_DEATH\n", + " discharge_or_death:\n", + " expr: or(hospital_discharge, death)\n", + "\n", + "trigger: hospital_admission\n", + "\n", + "windows:\n", + " input:\n", + " start: NULL\n", + " end: trigger + 48h\n", + " start_inclusive: True\n", + " end_inclusive: True\n", + " index_timestamp: end\n", + " gap:\n", + " start: input.end\n", + " end: start + 24h\n", + " start_inclusive: False\n", + " end_inclusive: True\n", + " has:\n", + " hospital_admission: (None, 0)\n", + " discharge_or_death: (None, 0)\n", + " target:\n", + " start: trigger\n", + " end: start + 3d\n", + " start_inclusive: False\n", + " end_inclusive: True\n", + " label: discharge_or_death\n", + "\"\"\"\n", + "!mkdir /content/tasks/ -p\n", + "TASK_NAME = \"in_hospital_3d_los_after_48h\"\n", + "TASK_CONFIG_FP = f\"/content/tasks/{TASK_NAME}.yaml\"\n", + "with open(TASK_CONFIG_FP, 'w') as f:\n", + " f.write(task_config)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "es-39eHOyp5a", + "outputId": "9d5e2468-fdd5-4c4b-8615-fe24f5a9310f" + }, + "outputs": [], + "source": [ + "!pip install es-aces" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bXLiJGEry-Gb", + "outputId": "7d954ab4-cf5c-4d02-a99c-669b5822bf44" + }, + "outputs": [], + "source": [ + "!aces-cli --multirun data=sharded data.standard=meds data.root=\"$MIMICIV_MEDS_DIR/data\" \"data.shard=$(expand_shards /content/meds/data/)\" cohort_dir=\" /content/tasks\" cohort_name=\"$TASK_NAME\" config_path=\"$TASK_CONFIG_FP\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 497 + }, + "id": "7Vvac7DIWyRT", + "outputId": "40493f0e-48ba-4f5e-9d9a-401e26f1a9b7" + }, + "outputs": [], + "source": [ + "import polars as pl\n", + "\n", + "# execute query and get results\n", + "df = pl.read_parquet(f\"/content/tasks/{TASK_NAME}/**/*.parquet\")\n", + "\n", + "print(\"train prevalence: \" + str(round(pl.read_parquet(f\"/content/tasks/{TASK_NAME}/train/*.parquet\")['boolean_value'].mean(), 3)))\n", + "print(\"tuning prevalence: \" + str(round(pl.read_parquet(f\"/content/tasks/{TASK_NAME}/tuning/*.parquet\")['boolean_value'].mean(), 3)))\n", + "print(\"held_out prevalence: \" + str(round(pl.read_parquet(f\"/content/tasks/{TASK_NAME}/held_out/*.parquet\")['boolean_value'].mean(), 3)))\n", + "\n", + "\n", + "df.sort('boolean_value')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "XWB7O1UGhRIo", + "outputId": "e3416d5e-7427-4cf4-c0ab-20053a9d3430" + }, + "outputs": [], + "source": [ + "#@title Install meds-tab\n", + "\n", + "!pip uninstall es-aces -y\n", + "!pip install meds-tab" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SeGawIqli0nn" + }, + "outputs": [], + "source": [ + "MIMICIV_MEDS_DIR = \"/content/meds/\"\n", + "OUTPUT_TABULARIZATION_DIR=\"/content/tabularized/\"\n", + "TASK_DIR=\"/content/tasks/\"\n", + "TASK_NAME=\"in_hospital_3d_los_after_48h\"\n", + "OUTPUT_MODEL_DIR=\"/content/output/meds_tab/\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Tud0_0cgjljP", + "outputId": "fb3417e0-3ba4-4f9a-ab95-ce3ba8731ca1" + }, + "outputs": [], + "source": [ + "!meds-tab-describe input_dir={MIMICIV_MEDS_DIR}/data output_dir={OUTPUT_TABULARIZATION_DIR}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RVLBdOn1mnV5" + }, + "outputs": [], + "source": [ + "# Define the window sizes and aggregations to generate features for\n", + "WINDOW_SIZES = \"tabularization.window_sizes=[1d,30d,365d]\"\n", + "AGGREGATIONS = \"tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max]\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KhCPqBmduNYK" + }, + "outputs": [], + "source": [ + "!rm -rf /content/tabularized/tabularize/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "p_D07KzxjVUl", + "outputId": "8836b076-cf64-4f29-da81-ac5125ab7608" + }, + "outputs": [], + "source": [ + "!meds-tab-tabularize-static \"input_dir=$MIMICIV_MEDS_DIR/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "u-e-mV2Hk-Qf", + "outputId": "c292be12-ff74-44e4-f039-758e10ccc909" + }, + "outputs": [], + "source": [ + "!meds-tab-tabularize-time-series --multirun \"worker=range(0,2)\" \"hydra/launcher=joblib\" \"input_dir=$MIMICIV_MEDS_DIR/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NmaR_-Fik4eH" + }, + "outputs": [], + "source": [ + "!meds-tab-cache-task \"input_dir={MIMICIV_MEDS_DIR}/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" \"input_label_dir=$TASK_DIR/$TASK_NAME/\" \"task_name=$TASK_NAME\" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dLIkOzTblBB2" + }, + "outputs": [], + "source": [ + "!meds-tab-xgboost --multirun \"input_dir=$MIMICIV_MEDS_DIR/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" \"output_model_dir=$OUTPUT_MODEL_DIR/$TASK_NAME/\" \"task_name=$TASK_NAME\" do_overwrite=False \"hydra.sweeper.n_trials=10\" $WINDOW_SIZES $AGGREGATIONS \"tabularization.min_code_inclusion_count=10\"" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/demo/meds_tab.py b/demo/meds_tab.py new file mode 100644 index 0000000..4fae3ae --- /dev/null +++ b/demo/meds_tab.py @@ -0,0 +1,240 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.16.4 +# kernelspec: +# display_name: Python 3 +# name: python3 +# --- + +# %% [Colab-only] Switch Colab to python 3.12 +# !sudo apt-get install python3.12 python3.12-venv +# import sys +# !python3.12 -m venv meds_env +# import os +# os.environ['PATH'] = '/content/meds_env/bin:' + os.environ['PATH'] +# !pip install --upgrade pip + +# # Then in a new code cell: +# import sys +# sys.executable = '/content/meds_env/bin/python' + +# # Confirm python version is 3.12 +# !python --version + +# %% +!pwd # Should be .../src/MEDS_DEV/demo + +# %% [markdown] +# ## Install dependencies + +# %% +!pip -q install meds_etl==0.3.6 meds_transforms==0.0.7 + +# TODO install meds-evaluation + +# %% [markdown] +# # Download MIMIC-IV demo + +# %% +# macOS users should install wget (e.g. through brew) +!wget -q -r -N -c --no-host-directories --cut-dirs=1 -np -P ./content/download https://physionet.org/files/mimic-iv-demo/2.2/ + +# %% +# Download pre-meds script, event config (defining how raw data is converted to meds data), and meds-transform config +!mkdir -p ./content/meds-transform/ +!git clone --depth 1 https://github.com/mmcdermott/MEDS_transforms.git ./content/tmp/ +!mv ./content/tmp/MIMIC-IV_Example ./content/MIMIC-IV_Example + +# %% +# Download MIMIC-IV metadata +MIMICIV_RAW_DIR = "https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map" +MIMICIV_PRE_MEDS_DIR = "./content/pre_meds/" +!mkdir {MIMICIV_PRE_MEDS_DIR} + +OUTPUT_DIR = "./content/download/mimic-iv-demo/2.2" + +files = [ + 'd_labitems_to_loinc.csv', + 'inputevents_to_rxnorm.csv', + 'lab_itemid_to_loinc.csv', + 'meas_chartevents_main.csv', + 'meas_chartevents_value.csv', + 'numerics-summary.csv', + 'outputevents_to_loinc.csv', + 'proc_datetimeevents.csv', + 'proc_itemid.csv', + 'waveforms-summary.csv' +] + +for file in files: + !wget -O {OUTPUT_DIR}/{file} {MIMICIV_RAW_DIR}/{file} + !wget -O {MIMICIV_PRE_MEDS_DIR}/{file} {MIMICIV_RAW_DIR}/{file} + +# %% +# Convert to MEDS +CURRENT_DIR = !pwd +CURRENT_DIR = CURRENT_DIR[0] +# %% +TUTORIAL_DIR = CURRENT_DIR + "/content/MIMIC-IV_Example" +MIMICIV_RAW_DIR = CURRENT_DIR + "/content/download/mimic-iv-demo/2.2" +MIMICIV_PRE_MEDS_DIR = CURRENT_DIR + "/content/pre_meds" +MIMICIV_MEDS_DIR = CURRENT_DIR + "/content/meds" + +EVENT_CONVERSION_CONFIG_FP = CURRENT_DIR + "/content/MIMIC-IV_Example/configs/event_config.yaml" +PIPELINE_CONFIG_PATH = CURRENT_DIR + "/content/MIMIC-IV_Example/configs/pipeline_config.yaml" +!cd {TUTORIAL_DIR} && ./run.sh {MIMICIV_RAW_DIR} {MIMICIV_PRE_MEDS_DIR} {MIMICIV_MEDS_DIR} do_unzip=true + +# %% [markdown] +# # Examine MEDS data + +# %% +import polars as pl + +data = pl.read_parquet('./content/meds/data/**/*.parquet') + +data[['subject_id', 'time', 'code', 'numeric_value']] + +# %% [markdown] +# # A simple Polars analysis + +# %% +icd10_events = data.filter(pl.col('code').str.starts_with('DIAGNOSIS//ICD//10//')) +icd10_events.group_by('code').count().sort('count', descending=True) + +# %% +df = pl.read_parquet("./content/meds/metadata/codes.parquet") +df + +# %% [markdown] +# ## Using an example MEDS tool, ACES for labeling + +# %% [markdown] +# ## Install ACES + +# %% +!pip install es-aces + +# %% + +# From ACES documentation +task_config = """ +description: >- + This file specifies the base configuration for the prediction of a hospital los being greater than 3days, + leveraging only the first 48 hours of data after admission, with a 24 hour gap between the input window + and the target window. Patients who die or are discharged in the gap window are excluded. Note that this + task is in-**hospital** los, not in-**ICU** los which is a different task. + +predicates: + hospital_admission: + code: {regex: "HOSPITAL_ADMISSION//.*"} + hospital_discharge: + code: {regex: "HOSPITAL_DISCHARGE//.*"} + death: + code: MEDS_DEATH + discharge_or_death: + expr: or(hospital_discharge, death) + +trigger: hospital_admission + +windows: + input: + start: NULL + end: trigger + 48h + start_inclusive: True + end_inclusive: True + index_timestamp: end + gap: + start: input.end + end: start + 24h + start_inclusive: False + end_inclusive: True + has: + hospital_admission: (None, 0) + discharge_or_death: (None, 0) + target: + start: trigger + end: start + 3d + start_inclusive: False + end_inclusive: True + label: discharge_or_death +""" + +!mkdir ./content/tasks/ -p +TASK_NAME = "in_hospital_3d_los_after_48h" +TASK_CONFIG_FP = f"./content/tasks/{TASK_NAME}.yaml" +with open(TASK_CONFIG_FP, 'w') as f: + f.write(task_config) + +# %% +!pip install es-aces + +# %% +!aces-cli --multirun data=sharded data.standard=meds data.root="$MIMICIV_MEDS_DIR/data" "data.shard=$(expand_shards ./content/meds/data/)" cohort_dir=" ./content/tasks" cohort_name="$TASK_NAME" config_path="$TASK_CONFIG_FP" + +# %% +# TODO: reimporting polars due to dependencies? +import polars as pl + +# Execute query and get results +df = pl.read_parquet(f"./content/tasks/{TASK_NAME}/**/*.parquet") + +print("train prevalence: " + str(round(pl.read_parquet(f"./content/tasks/{TASK_NAME}/train/*.parquet")['boolean_value'].mean(), 3))) +print("tuning prevalence: " + str(round(pl.read_parquet(f"./content/tasks/{TASK_NAME}/tuning/*.parquet")['boolean_value'].mean(), 3))) +print("held_out prevalence: " + str(round(pl.read_parquet(f"./content/tasks/{TASK_NAME}/held_out/*.parquet")['boolean_value'].mean(), 3))) + + +df.sort('boolean_value') + +# %% +# ## Install meds-tab + +!pip uninstall es-aces -y # TODO ??? +!pip install meds-tab + +# %% +MIMICIV_MEDS_DIR = "./content/meds/" +OUTPUT_TABULARIZATION_DIR="./content/tabularized/" +TASK_DIR="./content/tasks/" +TASK_NAME="in_hospital_3d_los_after_48h" +OUTPUT_MODEL_DIR="./content/output/meds_tab/" + +# %% +!meds-tab-describe input_dir={MIMICIV_MEDS_DIR}/data output_dir={OUTPUT_TABULARIZATION_DIR} + +# %% +# Define the window sizes and aggregations to generate features for +# TODO define this as system variables or make sure the shell +# commands can find these +WINDOW_SIZES = "tabularization.window_sizes=[1d,30d,365d]" +AGGREGATIONS = "tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max]" + +# %% +!rm -rf ./content/tabularized/tabularize/ + +# %% +# TODO shell vs python variables +!echo {OUTPUT_TABULARIZATION_DIR} + +# %% +# TODO shell vs python variables +!echo WINDOW_SIZES +# %% +# TODO shell vs python variables +!meds-tab-tabularize-static "input_dir=$MIMICIV_MEDS_DIR/data" "output_dir=$OUTPUT_TABULARIZATION_DIR" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS + +# %% +# TODO shell vs python variables +!meds-tab-tabularize-time-series --multirun "worker=range(0,2)" "hydra/launcher=joblib" "input_dir=$MIMICIV_MEDS_DIR/data" "output_dir=$OUTPUT_TABULARIZATION_DIR" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS + +# %% +# TODO shell vs python variables +!meds-tab-cache-task "input_dir={MIMICIV_MEDS_DIR}/data" "output_dir=$OUTPUT_TABULARIZATION_DIR" "input_label_dir=$TASK_DIR/$TASK_NAME/" "task_name=$TASK_NAME" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS + +# %% +# TODO shell vs python variables +!meds-tab-xgboost --multirun "input_dir=$MIMICIV_MEDS_DIR/data" "output_dir=$OUTPUT_TABULARIZATION_DIR" "output_model_dir=$OUTPUT_MODEL_DIR/$TASK_NAME/" "task_name=$TASK_NAME" do_overwrite=False "hydra.sweeper.n_trials=10" $WINDOW_SIZES $AGGREGATIONS "tabularization.min_code_inclusion_count=10" From 91d30eeb96bc8fdd76a873e53599ce96215e87c1 Mon Sep 17 00:00:00 2001 From: Nassim Oufattole Date: Sat, 14 Dec 2024 17:14:23 -0800 Subject: [PATCH 2/8] fixed e2e meds transform bug with eicu where the extraction config still had the infusionDrug table. Added ACES and meds-dev label extraction --- .gitignore | 2 + demo/aces.ipynb | 662 +++++++++++++++++++++++------ demo/configs/extract_MIMIC.yaml | 8 +- demo/extract_meds_data.ipynb | 3 +- demo/meds_tab.ipynb | 77 ---- pyproject.toml | 2 +- src/MEDS_DEV/demo/meds_cehrbert.py | 397 ----------------- src/MEDS_DEV/demo/meds_tab.ipynb | 485 --------------------- src/MEDS_DEV/demo/meds_tab.py | 240 ----------- 9 files changed, 551 insertions(+), 1325 deletions(-) delete mode 100644 src/MEDS_DEV/demo/meds_cehrbert.py delete mode 100644 src/MEDS_DEV/demo/meds_tab.ipynb delete mode 100644 src/MEDS_DEV/demo/meds_tab.py diff --git a/.gitignore b/.gitignore index c2b1661..3a41f4d 100644 --- a/.gitignore +++ b/.gitignore @@ -361,3 +361,5 @@ $RECYCLE.BIN/ meds_env/* src/MEDS_DEV/demo/download/* src/MEDS_DEV/demo/content/* + +demo/work_dir \ No newline at end of file diff --git a/demo/aces.ipynb b/demo/aces.ipynb index 8d03cbf..7b31586 100644 --- a/demo/aces.ipynb +++ b/demo/aces.ipynb @@ -30,20 +30,48 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/\n" + ] + } + ], "source": [ - "ROOT_DIR = \"\"" + "#@title Download E-ICU demo\n", + "import tempfile\n", + "import os\n", + "from pathlib import Path\n", + "notebook_dir = os.getcwd()\n", + "\n", + "# Choose MIMICIV or eicu\n", + "ROOT_DIR=f\"{notebook_dir}/work_dir/mimiciv_demo/\"\n", + "# ROOT_DIR=f\"{notebook_dir}/work_dir/eicu_demo/\"\n", + "Path(ROOT_DIR).mkdir(parents=True, exist_ok=True)\n", + "\n", + "!echo {ROOT_DIR}" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 60, "metadata": { "id": "H6fqe217XDhi" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TASK_DIR\n", + "mkdir: -p: File exists\n" + ] + } + ], "source": [ "# From the ACES documentation\n", "\n", @@ -88,16 +116,19 @@ " end_inclusive: True\n", " label: discharge_or_death\n", "\"\"\"\n", - "!mkdir /content/tasks/ -p\n", - "TASK_NAME = \"in_hospital_3d_los_after_48h\"\n", - "TASK_CONFIG_FP = f\"/content/tasks/{TASK_NAME}.yaml\"\n", + "MEDS_DIR = ROOT_DIR + \"/meds\"\n", + "TASK_DIR = MEDS_DIR + \"/task_labels\"\n", + "! echo TASK_DIR\n", + "TASK_NAME = \"los_in_hospital_first_48h\"\n", + "TASK_CONFIG_FP = f\"{TASK_DIR}/{TASK_NAME}.yaml\"\n", + "!mkdir {TASK_DIR}/{TASK_NAME} -p\n", "with open(TASK_CONFIG_FP, 'w') as f:\n", " f.write(task_config)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 61, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -105,14 +136,178 @@ "id": "bXLiJGEry-Gb", "outputId": "7d954ab4-cf5c-4d02-a99c-669b5822bf44" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2024-12-14 17:02:13,334][HYDRA] Launching 3 jobs locally\n", + "[2024-12-14 17:02:13,334][HYDRA] \t#0 : data=sharded data.standard=meds data.root=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data data.shard=held_out/0 cohort_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels cohort_name=los_in_hospital_first_48h config_path=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml\n", + "\u001b[32m2024-12-14 17:02:13.542\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.545\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.545\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.545\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.547\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", + "standard: meds\n", + "ts_format: '%m/%d/%Y %H:%M'\n", + "root: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data\n", + "shard: held_out/0\n", + "path: ${data.root}/${data.shard}.parquet\n", + "_prefix: /${data.shard}\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.547\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.566\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_admission'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.567\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_discharge'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.568\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.568\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.575\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.576\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.576\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.576\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.578\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", + "trigger\n", + "┣━━ input.end\n", + "┃ ┣━━ input.start\n", + "┃ ┗━━ gap.end\n", + "┗━━ target.end\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.578\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.578\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.580\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 4,155 rows as they failed to satisfy '1 <= hospital_admission <= None'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.580\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.593\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.622\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.630\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= hospital_admission <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.630\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 4 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.634\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 16 valid rows returned corresponding to 10 subjects.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'discharge_or_death' from window 'target'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.645\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", + "If you need these columns, please comment on https://github.com/justin13601/ACES/issues/97\n", + "Columns:\n", + " - trigger\n", + " - input.end_summary\n", + " - input.start_summary\n", + " - gap.end_summary\n", + " - target.end_summary\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.233536. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h/held_out/0.parquet'.\u001b[0m\n", + "[2024-12-14 17:02:13,777][HYDRA] \t#1 : data=sharded data.standard=meds data.root=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data data.shard=train/0 cohort_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels cohort_name=los_in_hospital_first_48h config_path=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml\n", + "\u001b[32m2024-12-14 17:02:13.842\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.845\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.845\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.845\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.846\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", + "standard: meds\n", + "ts_format: '%m/%d/%Y %H:%M'\n", + "root: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data\n", + "shard: train/0\n", + "path: ${data.root}/${data.shard}.parquet\n", + "_prefix: /${data.shard}\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.847\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.891\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_admission'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.907\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_discharge'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.910\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.910\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.927\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.927\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.927\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.928\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.930\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", + "trigger\n", + "┣━━ input.end\n", + "┃ ┣━━ input.start\n", + "┃ ┗━━ gap.end\n", + "┗━━ target.end\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.930\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.930\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.930\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.930\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 72,649 rows as they failed to satisfy '1 <= hospital_admission <= None'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.931\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:13.972\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.059\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= hospital_admission <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 26 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.148\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.238\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 212 valid rows returned corresponding to 75 subjects.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.238\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'discharge_or_death' from window 'target'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.238\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.240\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", + "If you need these columns, please comment on https://github.com/justin13601/ACES/issues/97\n", + "Columns:\n", + " - trigger\n", + " - input.end_summary\n", + " - input.start_summary\n", + " - gap.end_summary\n", + " - target.end_summary\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.243\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.399837. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h/train/0.parquet'.\u001b[0m\n", + "[2024-12-14 17:02:14,243][HYDRA] \t#2 : data=sharded data.standard=meds data.root=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data data.shard=tuning/0 cohort_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels cohort_name=los_in_hospital_first_48h config_path=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml\n", + "\u001b[32m2024-12-14 17:02:14.309\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.312\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.312\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.312\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.313\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", + "standard: meds\n", + "ts_format: '%m/%d/%Y %H:%M'\n", + "root: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data\n", + "shard: tuning/0\n", + "path: ${data.root}/${data.shard}.parquet\n", + "_prefix: /${data.shard}\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.313\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.316\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_admission'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.317\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_discharge'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.317\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.318\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.320\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.320\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.320\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.320\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", + "trigger\n", + "┣━━ input.end\n", + "┃ ┣━━ input.start\n", + "┃ ┗━━ gap.end\n", + "┗━━ target.end\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 6,240 rows as they failed to satisfy '1 <= hospital_admission <= None'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.331\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.349\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= hospital_admission <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.349\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 2 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.352\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.360\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 15 valid rows returned corresponding to 9 subjects.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.360\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'discharge_or_death' from window 'target'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.360\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.361\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", + "If you need these columns, please comment on https://github.com/justin13601/ACES/issues/97\n", + "Columns:\n", + " - trigger\n", + " - input.end_summary\n", + " - input.start_summary\n", + " - gap.end_summary\n", + " - target.end_summary\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:14.364\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.054431. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h/tuning/0.parquet'.\u001b[0m\n" + ] + } + ], "source": [ - "!aces-cli --multirun data=sharded data.standard=meds data.root=\"$MIMICIV_MEDS_DIR/data\" \"data.shard=$(expand_shards /content/meds/data/)\" cohort_dir=\" /content/tasks\" cohort_name=\"$TASK_NAME\" config_path=\"$TASK_CONFIG_FP\"" + "!aces-cli --multirun data=sharded data.standard=meds data.root={MEDS_DIR}/data data.shard=$(expand_shards {MEDS_DIR}/data/) cohort_dir={TASK_DIR} cohort_name={TASK_NAME} config_path={TASK_CONFIG_FP}" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 62, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -121,145 +316,364 @@ "id": "7Vvac7DIWyRT", "outputId": "40493f0e-48ba-4f5e-9d9a-401e26f1a9b7" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "train prevalence: 0.231\n", + "tuning prevalence: 0.133\n", + "held_out prevalence: 0.25\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "shape: (243, 6)
subject_idprediction_timeboolean_valueinteger_valuefloat_valuecategorical_value
i64datetime[μs]booli64f64str
100128532175-04-07 15:36:00falsenullnullnull
100128532176-11-27 21:28:00falsenullnullnull
100147292125-03-01 07:15:00falsenullnullnull
100147292125-03-21 16:58:00falsenullnullnull
100167422178-07-05 21:13:00falsenullnullnull
100399972135-11-09 02:42:00truenullnullnull
100400252143-03-20 12:34:00truenullnullnull
100400252145-07-05 23:46:00truenullnullnull
100207402150-09-17 14:09:00truenullnullnull
100207402151-01-17 15:25:00truenullnullnull
" + ], + "text/plain": [ + "shape: (243, 6)\n", + "┌────────────┬───────────────────┬───────────────┬───────────────┬─────────────┬───────────────────┐\n", + "│ subject_id ┆ prediction_time ┆ boolean_value ┆ integer_value ┆ float_value ┆ categorical_value │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ datetime[μs] ┆ bool ┆ i64 ┆ f64 ┆ str │\n", + "╞════════════╪═══════════════════╪═══════════════╪═══════════════╪═════════════╪═══════════════════╡\n", + "│ 10012853 ┆ 2175-04-07 ┆ false ┆ null ┆ null ┆ null │\n", + "│ ┆ 15:36:00 ┆ ┆ ┆ ┆ │\n", + "│ 10012853 ┆ 2176-11-27 ┆ false ┆ null ┆ null ┆ null │\n", + "│ ┆ 21:28:00 ┆ ┆ ┆ ┆ │\n", + "│ 10014729 ┆ 2125-03-01 ┆ false ┆ null ┆ null ┆ null │\n", + "│ ┆ 07:15:00 ┆ ┆ ┆ ┆ │\n", + "│ 10014729 ┆ 2125-03-21 ┆ false ┆ null ┆ null ┆ null │\n", + "│ ┆ 16:58:00 ┆ ┆ ┆ ┆ │\n", + "│ 10016742 ┆ 2178-07-05 ┆ false ┆ null ┆ null ┆ null │\n", + "│ ┆ 21:13:00 ┆ ┆ ┆ ┆ │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 10039997 ┆ 2135-11-09 ┆ true ┆ null ┆ null ┆ null │\n", + "│ ┆ 02:42:00 ┆ ┆ ┆ ┆ │\n", + "│ 10040025 ┆ 2143-03-20 ┆ true ┆ null ┆ null ┆ null │\n", + "│ ┆ 12:34:00 ┆ ┆ ┆ ┆ │\n", + "│ 10040025 ┆ 2145-07-05 ┆ true ┆ null ┆ null ┆ null │\n", + "│ ┆ 23:46:00 ┆ ┆ ┆ ┆ │\n", + "│ 10020740 ┆ 2150-09-17 ┆ true ┆ null ┆ null ┆ null │\n", + "│ ┆ 14:09:00 ┆ ┆ ┆ ┆ │\n", + "│ 10020740 ┆ 2151-01-17 ┆ true ┆ null ┆ null ┆ null │\n", + "│ ┆ 15:25:00 ┆ ┆ ┆ ┆ │\n", + "└────────────┴───────────────────┴───────────────┴───────────────┴─────────────┴───────────────────┘" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import polars as pl\n", "\n", "# execute query and get results\n", - "df = pl.read_parquet(f\"/content/tasks/{TASK_NAME}/**/*.parquet\")\n", + "df = pl.read_parquet(f\"{TASK_DIR}/{TASK_NAME}/**/*.parquet\")\n", "\n", - "print(\"train prevalence: \" + str(round(pl.read_parquet(f\"/content/tasks/{TASK_NAME}/train/*.parquet\")['boolean_value'].mean(), 3)))\n", - "print(\"tuning prevalence: \" + str(round(pl.read_parquet(f\"/content/tasks/{TASK_NAME}/tuning/*.parquet\")['boolean_value'].mean(), 3)))\n", - "print(\"held_out prevalence: \" + str(round(pl.read_parquet(f\"/content/tasks/{TASK_NAME}/held_out/*.parquet\")['boolean_value'].mean(), 3)))\n", + "print(\"train prevalence: \" + str(round(pl.read_parquet(f\"{TASK_DIR}/{TASK_NAME}/train/*.parquet\")['boolean_value'].mean(), 3)))\n", + "print(\"tuning prevalence: \" + str(round(pl.read_parquet(f\"{TASK_DIR}/{TASK_NAME}/tuning/*.parquet\")['boolean_value'].mean(), 3)))\n", + "print(\"held_out prevalence: \" + str(round(pl.read_parquet(f\"{TASK_DIR}/{TASK_NAME}/held_out/*.parquet\")['boolean_value'].mean(), 3)))\n", "\n", "\n", "df.sort('boolean_value')" ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "XWB7O1UGhRIo", - "outputId": "e3416d5e-7427-4cf4-c0ab-20053a9d3430" - }, - "outputs": [], - "source": [ - "#@title Install meds-tab\n", - "\n", - "!pip uninstall es-aces -y\n", - "!pip install meds-tab" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "SeGawIqli0nn" - }, - "outputs": [], - "source": [ - "MIMICIV_MEDS_DIR = \"/content/meds/\"\n", - "OUTPUT_TABULARIZATION_DIR=\"/content/tabularized/\"\n", - "TASK_DIR=\"/content/tasks/\"\n", - "TASK_NAME=\"in_hospital_3d_los_after_48h\"\n", - "OUTPUT_MODEL_DIR=\"/content/output/meds_tab/\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Tud0_0cgjljP", - "outputId": "fb3417e0-3ba4-4f9a-ab95-ce3ba8731ca1" - }, - "outputs": [], - "source": [ - "!meds-tab-describe input_dir={MIMICIV_MEDS_DIR}/data output_dir={OUTPUT_TABULARIZATION_DIR}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "RVLBdOn1mnV5" - }, - "outputs": [], + "cell_type": "markdown", + "metadata": {}, "source": [ - "# Define the window sizes and aggregations to generate features for\n", - "WINDOW_SIZES = \"tabularization.window_sizes=[1d,30d,365d]\"\n", - "AGGREGATIONS = \"tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max]\"" + "### MEDS-DEV Has tons of pre-defined tasks we can use!!!" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "KhCPqBmduNYK" - }, - "outputs": [], + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running task mortality/in_icu/first_24h on dataset MIMIC-IV with MEDS_ROOT_DIR=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds and SHARDS=held_out/0,train/0,tuning/0\n", + "[2024-12-14 17:02:21,042][HYDRA] Launching 3 jobs locally\n", + "[2024-12-14 17:02:21,042][HYDRA] \t#0 : data.shard=held_out/0\n", + "\u001b[32m2024-12-14 17:02:21.188\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/tasks/criteria/mortality/in_icu/first_24h.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.190\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m151\u001b[0m - \u001b[1mOverriding predicates and/or demographics from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/datasets/MIMIC-IV/predicates.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.207\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.207\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.207\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.209\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", + "standard: meds\n", + "ts_format: '%m/%d/%Y %H:%M'\n", + "root: ${oc.env:MEDS_ROOT_DIR}/data\n", + "shard: held_out/0\n", + "path: ${data.root}/${data.shard}.parquet\n", + "_prefix: /${data.shard}\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.209\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.229\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_admission'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.230\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_discharge'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.231\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.231\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.237\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.238\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.238\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.238\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.240\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", + "trigger\n", + "┣━━ input.end\n", + "┃ ┗━━ input.start\n", + "┗━━ gap.end\n", + " ┗━━ target.end\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.240\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.240\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.242\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.242\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 4,163 rows as they failed to satisfy '1 <= icu_admission <= None'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.243\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.255\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.277\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.285\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= icu_admission <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.285\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 6 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.286\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.302\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 6 valid rows returned corresponding to 4 subjects.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.302\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'death' from window 'target'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.303\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m142\u001b[0m - \u001b[33m\u001b[1mAll labels in the extracted cohort are the same: '0'. This may indicate an issue with the task logic. Please double-check your configuration file if this is not expected.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.303\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.306\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", + "If you need these columns, please comment on https://github.com/justin13601/ACES/issues/97\n", + "Columns:\n", + " - trigger\n", + " - input.end_summary\n", + " - input.start_summary\n", + " - gap.end_summary\n", + " - target.end_summary\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.209584. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/mortality/in_icu/first_24h/held_out/0.parquet'.\u001b[0m\n", + "[2024-12-14 17:02:21,399][HYDRA] \t#1 : data.shard=train/0\n", + "\u001b[32m2024-12-14 17:02:21.469\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/tasks/criteria/mortality/in_icu/first_24h.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.470\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m151\u001b[0m - \u001b[1mOverriding predicates and/or demographics from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/datasets/MIMIC-IV/predicates.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.486\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", + "standard: meds\n", + "ts_format: '%m/%d/%Y %H:%M'\n", + "root: ${oc.env:MEDS_ROOT_DIR}/data\n", + "shard: train/0\n", + "path: ${data.root}/${data.shard}.parquet\n", + "_prefix: /${data.shard}\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.487\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.516\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.531\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_admission'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.546\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_discharge'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.549\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.549\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.564\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.565\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.565\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.565\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.567\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", + "trigger\n", + "┣━━ input.end\n", + "┃ ┗━━ input.start\n", + "┗━━ gap.end\n", + " ┗━━ target.end\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.568\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.568\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.568\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.568\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 72,774 rows as they failed to satisfy '1 <= icu_admission <= None'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.569\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.622\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.680\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.714\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 2 rows as they failed to satisfy 'None <= icu_admission <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.715\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 53 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.715\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.804\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 60 valid rows returned corresponding to 47 subjects.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.804\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'death' from window 'target'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.805\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.806\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", + "If you need these columns, please comment on https://github.com/justin13601/ACES/issues/97\n", + "Columns:\n", + " - trigger\n", + " - input.end_summary\n", + " - input.start_summary\n", + " - gap.end_summary\n", + " - target.end_summary\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.810\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.340355. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/mortality/in_icu/first_24h/train/0.parquet'.\u001b[0m\n", + "[2024-12-14 17:02:21,810][HYDRA] \t#2 : data.shard=tuning/0\n", + "\u001b[32m2024-12-14 17:02:21.878\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/tasks/criteria/mortality/in_icu/first_24h.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.879\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m151\u001b[0m - \u001b[1mOverriding predicates and/or demographics from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/datasets/MIMIC-IV/predicates.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.895\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.895\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.895\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", + "standard: meds\n", + "ts_format: '%m/%d/%Y %H:%M'\n", + "root: ${oc.env:MEDS_ROOT_DIR}/data\n", + "shard: tuning/0\n", + "path: ${data.root}/${data.shard}.parquet\n", + "_prefix: /${data.shard}\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.897\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_admission'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_discharge'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", + "trigger\n", + "┣━━ input.end\n", + "┃ ┗━━ input.start\n", + "┗━━ gap.end\n", + " ┗━━ target.end\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 6,242 rows as they failed to satisfy '1 <= icu_admission <= None'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.911\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.924\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.931\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= icu_admission <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.931\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 7 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.932\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.945\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 8 valid rows returned corresponding to 5 subjects.\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.945\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'death' from window 'target'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.945\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.946\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", + "If you need these columns, please comment on https://github.com/justin13601/ACES/issues/97\n", + "Columns:\n", + " - trigger\n", + " - input.end_summary\n", + " - input.start_summary\n", + " - gap.end_summary\n", + " - target.end_summary\u001b[0m\n", + "\u001b[32m2024-12-14 17:02:21.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.071263. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/mortality/in_icu/first_24h/tuning/0.parquet'.\u001b[0m\n" + ] + } + ], "source": [ - "!rm -rf /content/tabularized/tabularize/" + "TASK_NAME=\"mortality/in_icu/first_24h\"\n", + "!../src/MEDS_DEV/helpers/extract_task.sh {MEDS_DIR} \"MIMIC-IV\" {TASK_NAME}" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "p_D07KzxjVUl", - "outputId": "8836b076-cf64-4f29-da81-ac5125ab7608" - }, - "outputs": [], + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/mortality/in_icu/first_24h/**/*.parquet\n" + ] + } + ], "source": [ - "!meds-tab-tabularize-static \"input_dir=$MIMICIV_MEDS_DIR/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS" + "!echo \"{TASK_DIR}/{TASK_NAME}/**/*.parquet\"" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "train prevalence: 0.133\n", + "tuning prevalence: 0.125\n", + "held_out prevalence: 0.0\n" + ] }, - "id": "u-e-mV2Hk-Qf", - "outputId": "c292be12-ff74-44e4-f039-758e10ccc909" - }, - "outputs": [], + { + "data": { + "text/html": [ + "
\n", + "shape: (74, 6)
subject_idprediction_timeboolean_valueinteger_valuefloat_valuecategorical_value
i64datetime[μs]booli64f64str
100128532176-11-27 02:34:49falsenullnullnull
100147292125-02-28 10:03:08falsenullnullnull
100167422178-07-04 22:45:00falsenullnullnull
100167422178-07-14 08:16:00falsenullnullnull
100167422178-07-23 08:19:00falsenullnullnull
100104712155-12-03 20:33:00truenullnullnull
100159312177-03-25 21:48:07truenullnullnull
100378612117-03-15 16:34:58truenullnullnull
100379752185-01-18 19:12:12truenullnullnull
100380812115-10-10 10:15:25truenullnullnull
" + ], + "text/plain": [ + "shape: (74, 6)\n", + "┌────────────┬───────────────────┬───────────────┬───────────────┬─────────────┬───────────────────┐\n", + "│ subject_id ┆ prediction_time ┆ boolean_value ┆ integer_value ┆ float_value ┆ categorical_value │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ datetime[μs] ┆ bool ┆ i64 ┆ f64 ┆ str │\n", + "╞════════════╪═══════════════════╪═══════════════╪═══════════════╪═════════════╪═══════════════════╡\n", + "│ 10012853 ┆ 2176-11-27 ┆ false ┆ null ┆ null ┆ null │\n", + "│ ┆ 02:34:49 ┆ ┆ ┆ ┆ │\n", + "│ 10014729 ┆ 2125-02-28 ┆ false ┆ null ┆ null ┆ null │\n", + "│ ┆ 10:03:08 ┆ ┆ ┆ ┆ │\n", + "│ 10016742 ┆ 2178-07-04 ┆ false ┆ null ┆ null ┆ null │\n", + "│ ┆ 22:45:00 ┆ ┆ ┆ ┆ │\n", + "│ 10016742 ┆ 2178-07-14 ┆ false ┆ null ┆ null ┆ null │\n", + "│ ┆ 08:16:00 ┆ ┆ ┆ ┆ │\n", + "│ 10016742 ┆ 2178-07-23 ┆ false ┆ null ┆ null ┆ null │\n", + "│ ┆ 08:19:00 ┆ ┆ ┆ ┆ │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 10010471 ┆ 2155-12-03 ┆ true ┆ null ┆ null ┆ null │\n", + "│ ┆ 20:33:00 ┆ ┆ ┆ ┆ │\n", + "│ 10015931 ┆ 2177-03-25 ┆ true ┆ null ┆ null ┆ null │\n", + "│ ┆ 21:48:07 ┆ ┆ ┆ ┆ │\n", + "│ 10037861 ┆ 2117-03-15 ┆ true ┆ null ┆ null ┆ null │\n", + "│ ┆ 16:34:58 ┆ ┆ ┆ ┆ │\n", + "│ 10037975 ┆ 2185-01-18 ┆ true ┆ null ┆ null ┆ null │\n", + "│ ┆ 19:12:12 ┆ ┆ ┆ ┆ │\n", + "│ 10038081 ┆ 2115-10-10 ┆ true ┆ null ┆ null ┆ null │\n", + "│ ┆ 10:15:25 ┆ ┆ ┆ ┆ │\n", + "└────────────┴───────────────────┴───────────────┴───────────────┴─────────────┴───────────────────┘" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "!meds-tab-tabularize-time-series --multirun \"worker=range(0,2)\" \"hydra/launcher=joblib\" \"input_dir=$MIMICIV_MEDS_DIR/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "NmaR_-Fik4eH" - }, - "outputs": [], - "source": [ - "!meds-tab-cache-task \"input_dir={MIMICIV_MEDS_DIR}/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" \"input_label_dir=$TASK_DIR/$TASK_NAME/\" \"task_name=$TASK_NAME\" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS" + "import polars as pl\n", + "\n", + "\n", + "# execute query and get results\n", + "df = pl.read_parquet(f\"{TASK_DIR}/{TASK_NAME}/**/*.parquet\")\n", + "\n", + "print(\"train prevalence: \" + str(round(pl.read_parquet(f\"{TASK_DIR}/{TASK_NAME}/train/*.parquet\")['boolean_value'].mean(), 3)))\n", + "print(\"tuning prevalence: \" + str(round(pl.read_parquet(f\"{TASK_DIR}/{TASK_NAME}/tuning/*.parquet\")['boolean_value'].mean(), 3)))\n", + "print(\"held_out prevalence: \" + str(round(pl.read_parquet(f\"{TASK_DIR}/{TASK_NAME}/held_out/*.parquet\")['boolean_value'].mean(), 3)))\n", + "\n", + "\n", + "df.sort('boolean_value')" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "id": "dLIkOzTblBB2" - }, + "metadata": {}, "outputs": [], - "source": [ - "!meds-tab-xgboost --multirun \"input_dir=$MIMICIV_MEDS_DIR/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" \"output_model_dir=$OUTPUT_MODEL_DIR/$TASK_NAME/\" \"task_name=$TASK_NAME\" do_overwrite=False \"hydra.sweeper.n_trials=10\" $WINDOW_SIZES $AGGREGATIONS \"tabularization.min_code_inclusion_count=10\"" - ] + "source": [] } ], "metadata": { @@ -267,11 +681,21 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3", + "display_name": "dev", + "language": "python", "name": "python3" }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.8" } }, "nbformat": 4, diff --git a/demo/configs/extract_MIMIC.yaml b/demo/configs/extract_MIMIC.yaml index 53577f5..4002089 100644 --- a/demo/configs/extract_MIMIC.yaml +++ b/demo/configs/extract_MIMIC.yaml @@ -27,10 +27,10 @@ stage_configs: infer_schema_length: 999999999 split_and_shard_subjects: n_subjects_per_shard: 1000 - split_fracs: - train: 0.5 - tuning: 0.25 - held_out: 0.25 + split_fracs: + train: 0.5 + tuning: 0.25 + held_out: 0.25 stages: - shard_events diff --git a/demo/extract_meds_data.ipynb b/demo/extract_meds_data.ipynb index 55a228f..f6c55b6 100644 --- a/demo/extract_meds_data.ipynb +++ b/demo/extract_meds_data.ipynb @@ -3619,7 +3619,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -3635,7 +3635,6 @@ "import tempfile\n", "import os\n", "from pathlib import Path\n", - "temp_dir = tempfile.TemporaryDirectory()\n", "notebook_dir = os.getcwd()\n", "\n", "ROOT_DIR=f\"{notebook_dir}/work_dir/eicu_demo/\"\n", diff --git a/demo/meds_tab.ipynb b/demo/meds_tab.ipynb index cde4110..20de31f 100644 --- a/demo/meds_tab.ipynb +++ b/demo/meds_tab.ipynb @@ -9,83 +9,6 @@ "# Using an example MEDS tool, ACES for labeling" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "collapsed": true, - "id": "z3_pG9YAWpKy", - "outputId": "efa4c286-413d-4a91-a53d-fb41769cd4f2" - }, - "outputs": [], - "source": [ - "#@title Install ACES\n", - "\n", - "\n", - "!pip install es-aces" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "H6fqe217XDhi" - }, - "outputs": [], - "source": [ - "# From the ACES documentation\n", - "\n", - "task_config = \"\"\"\n", - "description: >-\n", - " This file specifies the base configuration for the prediction of a hospital los being greater than 3days,\n", - " leveraging only the first 48 hours of data after admission, with a 24 hour gap between the input window\n", - " and the target window. Patients who die or are discharged in the gap window are excluded. Note that this\n", - " task is in-**hospital** los, not in-**ICU** los which is a different task.\n", - "\n", - "predicates:\n", - " hospital_admission:\n", - " code: {regex: \"HOSPITAL_ADMISSION//.*\"}\n", - " hospital_discharge:\n", - " code: {regex: \"HOSPITAL_DISCHARGE//.*\"}\n", - " death:\n", - " code: MEDS_DEATH\n", - " discharge_or_death:\n", - " expr: or(hospital_discharge, death)\n", - "\n", - "trigger: hospital_admission\n", - "\n", - "windows:\n", - " input:\n", - " start: NULL\n", - " end: trigger + 48h\n", - " start_inclusive: True\n", - " end_inclusive: True\n", - " index_timestamp: end\n", - " gap:\n", - " start: input.end\n", - " end: start + 24h\n", - " start_inclusive: False\n", - " end_inclusive: True\n", - " has:\n", - " hospital_admission: (None, 0)\n", - " discharge_or_death: (None, 0)\n", - " target:\n", - " start: trigger\n", - " end: start + 3d\n", - " start_inclusive: False\n", - " end_inclusive: True\n", - " label: discharge_or_death\n", - "\"\"\"\n", - "!mkdir /content/tasks/ -p\n", - "TASK_NAME = \"in_hospital_3d_los_after_48h\"\n", - "TASK_CONFIG_FP = f\"/content/tasks/{TASK_NAME}.yaml\"\n", - "with open(TASK_CONFIG_FP, 'w') as f:\n", - " f.write(task_config)" - ] - }, { "cell_type": "code", "execution_count": null, diff --git a/pyproject.toml b/pyproject.toml index 649eef6..152d01d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,7 @@ classifiers = [ "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", ] -dependencies = ["meds==0.3.3", "es-aces==0.5.0"] +dependencies = ["meds>=0.3.3", "es-aces>=0.5.0"] [tool.setuptools_scm] diff --git a/src/MEDS_DEV/demo/meds_cehrbert.py b/src/MEDS_DEV/demo/meds_cehrbert.py deleted file mode 100644 index de1c37f..0000000 --- a/src/MEDS_DEV/demo/meds_cehrbert.py +++ /dev/null @@ -1,397 +0,0 @@ -# --- -# jupyter: -# jupytext: -# text_representation: -# extension: .py -# format_name: percent -# format_version: '1.3' -# jupytext_version: 1.16.4 -# kernelspec: -# display_name: Python 3 -# name: python3 -# --- - -# %% [Colab-only] Switch Colab to python 3.12 -# !sudo apt-get install python3.12 python3.12-venv -# import sys -# !python3.12 -m venv meds_env -# import os -# os.environ['PATH'] = '/content/meds_env/bin:' + os.environ['PATH'] -# !pip install --upgrade pip - -# # Then in a new code cell: -# import sys -# sys.executable = '/content/meds_env/bin/python' - -# # Confirm python version is 3.12 -# !python --version - -# %% [markdown] -# ## Install dependencies - -# %% -!pip -q install meds_etl==0.3.6 meds_transforms==0.0.7 - -# %% [markdown] -# # Download MIMIC-IV demo - -# %% -# macOS users should install wget (e.g. through brew) -!wget -q -r -N -c --no-host-directories --cut-dirs=1 -np -P ./content/download https://physionet.org/files/mimic-iv-demo/2.2/ - -# %% -# Download pre-meds script, event config (defining how raw data is converted to meds data), and meds-transform config -!mkdir -p ./content/meds-transform/ -!git clone --depth 1 https://github.com/mmcdermott/MEDS_transforms.git ./content/tmp/ -!mv ./content/tmp/MIMIC-IV_Example ./content/MIMIC-IV_Example - -# %% -# Download MIMIC IV metadata -MIMICIV_RAW_DIR = "https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map" -MIMICIV_PRE_MEDS_DIR = "./content/pre_meds/" -!mkdir {MIMICIV_PRE_MEDS_DIR} - -OUTPUT_DIR = "./content/download/mimic-iv-demo/2.2" - -files = [ - 'd_labitems_to_loinc.csv', - 'inputevents_to_rxnorm.csv', - 'lab_itemid_to_loinc.csv', - 'meas_chartevents_main.csv', - 'meas_chartevents_value.csv', - 'numerics-summary.csv', - 'outputevents_to_loinc.csv', - 'proc_datetimeevents.csv', - 'proc_itemid.csv', - 'waveforms-summary.csv' -] - -for file in files: - !wget -O {OUTPUT_DIR}/{file} {MIMICIV_RAW_DIR}/{file} - !wget -O {MIMICIV_PRE_MEDS_DIR}/{file} {MIMICIV_RAW_DIR}/{file} - -# %% -# Convert to MEDS -CURRENT_DIR = !pwd -CURRENT_DIR = CURRENT_DIR[0] - -# %% -# Convert to MEDS -TUTORIAL_DIR = CURRENT_DIR + "/content/MIMIC-IV_Example" -MIMICIV_RAW_DIR = CURRENT_DIR + "/content/download/mimic-iv-demo/2.2" -MIMICIV_PRE_MEDS_DIR = CURRENT_DIR + "/content/pre_meds" -MIMICIV_MEDS_DIR = CURRENT_DIR + "/content/meds" - -EVENT_CONVERSION_CONFIG_FP = CURRENT_DIR + "/content/MIMIC-IV_Example/configs/event_config.yaml" -PIPELINE_CONFIG_PATH = CURRENT_DIR + "/content/MIMIC-IV_Example/configs/pipeline_config.yaml" -!cd {TUTORIAL_DIR} && ./run.sh {MIMICIV_RAW_DIR} {MIMICIV_PRE_MEDS_DIR} {MIMICIV_MEDS_DIR} do_unzip=true - -# %% [markdown] -# # Examine MEDS data - -# %% -import polars as pl - -data = pl.read_parquet('./content/meds/data/**/*.parquet') -data[['subject_id', 'time', 'code', 'numeric_value']] - -# %% [markdown] -# # A simple Polars analysis - -# %% -icd10_events = data.filter(pl.col('code').str.starts_with('DIAGNOSIS//ICD//10//')) -icd10_events.group_by('code').count().sort('count', descending=True) - -# %% -df = pl.read_parquet("./content/meds/metadata/codes.parquet") -df - -# %% [markdown] -# ## Using an example MEDS tool, ACES for labeling - -# %% [markdown] -# ## Install ACES - -# %% -!pip install es-aces - -# %% - -# From ACES documentation -task_config = """ -description: >- - This file specifies the base configuration for the prediction of a hospital los being greater than 3days, - leveraging only the first 48 hours of data after admission, with a 24 hour gap between the input window - and the target window. Patients who die or are discharged in the gap window are excluded. Note that this - task is in-**hospital** los, not in-**ICU** los which is a different task. - -predicates: - hospital_admission: - code: {regex: "HOSPITAL_ADMISSION//.*"} - hospital_discharge: - code: {regex: "HOSPITAL_DISCHARGE//.*"} - death: - code: MEDS_DEATH - discharge_or_death: - expr: or(hospital_discharge, death) - -trigger: hospital_admission - -windows: - input: - start: NULL - end: trigger + 48h - start_inclusive: True - end_inclusive: True - index_timestamp: end - gap: - start: input.end - end: start + 24h - start_inclusive: False - end_inclusive: True - has: - hospital_admission: (None, 0) - discharge_or_death: (None, 0) - target: - start: trigger - end: start + 3d - start_inclusive: False - end_inclusive: True - label: discharge_or_death -""" - -!mkdir ./content/tasks/ -p -TASK_NAME = "in_hospital_3d_los_after_48h" -TASK_CONFIG_FP = f"./content/tasks/{TASK_NAME}.yaml" -with open(TASK_CONFIG_FP, 'w') as f: - f.write(task_config) - - -# %% -!pip install es-aces - -# %% -!echo $TASK_NAME -!echo $TASK_CONFIG_FP - -# %% -!aces-cli --multirun data=sharded data.standard=meds data.root="$MIMICIV_MEDS_DIR/data" "data.shard=$(expand_shards ./content/meds/data/)" cohort_dir=" ./content/tasks" cohort_name="$TASK_NAME" config_path="$TASK_CONFIG_FP" - -# %% -# TODO: reimporting polars due to dependencies? -import polars as pl - -# Execute query and get results -df = pl.read_parquet(f"./content/tasks/{TASK_NAME}/**/*.parquet") - -print("train prevalence: " + str(round(pl.read_parquet(f"./content/tasks/{TASK_NAME}/train/*.parquet")['boolean_value'].mean(), 3))) -print("tuning prevalence: " + str(round(pl.read_parquet(f"./content/tasks/{TASK_NAME}/tuning/*.parquet")['boolean_value'].mean(), 3))) -print("held_out prevalence: " + str(round(pl.read_parquet(f"./content/tasks/{TASK_NAME}/held_out/*.parquet")['boolean_value'].mean(), 3))) - - -df.sort('boolean_value') - -# %% [markdown] -# ## Switch Colab to python 3.11 for cehrbert -# %% -# %%capture -# !sudo apt-get install python3.11 python3.11-venv -# import sys -# !python3.11 -m venv cehrbert -# import os -# os.environ['PATH'] = './content/cehrbert/bin:' + os.environ['PATH'] -# !pip install --upgrade pip - -# %% -# import sys -# sys.executable = './content/cehrbert/bin/python' - -# %% [markdown] -# ## Install cehrbert and its dependencies - -# %% -!pip install meds_reader==0.1.9 -!pip install setuptools -!pip install cehrbert==1.3.1 - -# %% -MIMICIV_MEDS_DIR = "./content/meds/" -MIMICIV_MEDS_READER_DIR = "./content/meds_reader/" -TASK_DIR="./content/tasks/" -TASK_NAME="in_hospital_3d_los_after_48h" -OUTPUT_PRETRAIN_MODEL_DIR="./content/output/cehrbert/" -# TODO this variable has an identical name? -OUTPUT_PRETRAIN_MODEL_DIR="./content/output/cehrbert_finetuned/" - -# %% [markdown] -# Run meds_reader on the MEDS data - -# %% -!meds_reader_convert $MIMICIV_MEDS_DIR $MIMICIV_MEDS_READER_DIR - -# %% -!mkdir -p ./content/output/cehrbert/ -!mkdir -p ./content/output/cehrbert_dataset_prepared/ -!mkdir -p ./content/output/cehrbert_finetuned/ - -# %% -# !mkdir ./content/github_repo;cd ./content/github_repo;git clone https://github.com/cumc-dbmi/cehrbert.git;cd cehrbert;git checkout fix/meds_evaluation;pip install .; - -# %% [markdown] -# Create the cehrbert pretraining configuration yaml file - -# %% -cehrbert_pretrain_config = """ -#Model arguments -model_name_or_path: "./content/output/cehrbert/" -tokenizer_name_or_path: "./content/output/cehrbert/" -num_hidden_layers: 6 -max_position_embeddings: 1024 -hidden_size: 768 -vocab_size: 100000 -min_frequency: 50 -include_value_prediction: false # additional CEHR-BERT learning objective - -#Data arguments -data_folder: "./content/meds_reader/" -dataset_prepared_path: "./content/output/cehrbert_dataset_prepared/" - -# Below is a list of Med-to-CehrBert related arguments -preprocessing_num_workers: 2 -preprocessing_batch_size: 128 -# if is_data_in_med is false, it assumes the data is in the cehrbert format -is_data_in_meds: true -att_function_type: "cehr_bert" -inpatient_att_function_type: "mix" -include_auxiliary_token: true -include_demographic_prompt: false -# if the data is in the meds format, the validation split will be omitted -# as the meds already provide train/tuning/held_out splits -validation_split_percentage: 0.05 - -# Huggingface Arguments -dataloader_num_workers: 2 -dataloader_prefetch_factor: 2 - -overwrite_output_dir: false -resume_from_checkpoint: # automatically infer the latest checkpoint from the output folder -seed: 42 - -output_dir: "./content/output/cehrbert/" -evaluation_strategy: "epoch" -save_strategy: "epoch" -eval_accumulation_steps: 10 - -learning_rate: 0.00005 -per_device_train_batch_size: 8 -per_device_eval_batch_size: 8 -gradient_accumulation_steps: 2 - -num_train_epochs: 50 # for large datasets, 5-10 epochs should suffice -warmup_steps: 10 -weight_decay: 0.01 -logging_dir: "./logs" -logging_steps: 10 - -save_total_limit: -load_best_model_at_end: true -metric_for_best_model: "eval_loss" -greater_is_better: false - -report_to: "none" -""" -PRETRAIN_CONFIG_FP = f"./content/output/cehrbert/cehrbert_pretrain_config.yaml" -with open(PRETRAIN_CONFIG_FP, 'w') as f: - f.write(cehrbert_pretrain_config) - -# %% [markdown] -# ## Pretrain cehrbert using MLM -!python3.11 -m cehrbert.runners.hf_cehrbert_pretrain_runner ./content/output/cehrbert/cehrbert_pretrain_config.yaml - -# %% [markdown] -# ## Create the cehrbert finetuning configuration yaml file -cehrbert_finetune_config = f""" -#Model arguments -model_name_or_path: "./content/output/cehrbert/" -tokenizer_name_or_path: "./content/output/cehrbert/" -num_hidden_layers: 6 -max_position_embeddings: 1024 -hidden_size: 768 -vocab_size: 100000 -min_frequency: 50 -include_value_prediction: false # additional CEHR-BERT learning objective - -#Data arguments -cohort_folder: "./content/tasks/{TASK_NAME}/" -data_folder: "./content/meds_reader/" -dataset_prepared_path: "./content/output/cehrbert_dataset_prepared/" - -#LORA -use_lora: True -lora_rank: 64 -lora_alpha: 16 -target_modules: [ "query", "value" ] -lora_dropout: 0.1 - -# Below is a list of Med-to-CehrBert related arguments -preprocessing_num_workers: 2 -preprocessing_batch_size: 128 -# if is_data_in_med is false, it assumes the data is in the cehrbert format -is_data_in_meds: true -att_function_type: "cehr_bert" -inpatient_att_function_type: "mix" -include_auxiliary_token: true -include_demographic_prompt: false -# if the data is in the meds format, the validation split will be omitted -# as the meds already provide train/tuning/held_out splits -validation_split_percentage: 0.05 - -# Huggingface Arguments -dataloader_num_workers: 2 -dataloader_prefetch_factor: 2 - -overwrite_output_dir: false -resume_from_checkpoint: # automatically infer the latest checkpoint from the output folder -seed: 42 - -output_dir: "./content/output/cehrbert_finetuned" -evaluation_strategy: "epoch" -save_strategy: "epoch" -eval_accumulation_steps: 10 - -do_train: True -do_predict: True - -learning_rate: 0.00005 -per_device_train_batch_size: 8 -per_device_eval_batch_size: 8 -gradient_accumulation_steps: 2 - -num_train_epochs: 10 -warmup_steps: 10 -weight_decay: 0.01 -logging_dir: "./logs" -logging_steps: 10 - -save_total_limit: -load_best_model_at_end: true -metric_for_best_model: "eval_loss" -greater_is_better: false - -report_to: "none" -""" -FINETUNE_CONFIG_FP = f"./content/output/cehrbert/cehrbert_finetune_config.yaml" -with open(FINETUNE_CONFIG_FP, 'w') as f: - f.write(cehrbert_finetune_config) - -# %% -# ## Finetune cehrbert for the downstream task -!python3.11 -m cehrbert.runners.hf_cehrbert_finetune_runner ./content/output/cehrbert/cehrbert_finetune_config.yaml - -# %% -import pandas as pd - -pd.read_parquet("./content/output/cehrbert_finetuned/test_predictions") - -# %% -!cat ./content/output/cehrbert_finetuned/test_results.json diff --git a/src/MEDS_DEV/demo/meds_tab.ipynb b/src/MEDS_DEV/demo/meds_tab.ipynb deleted file mode 100644 index 86e5791..0000000 --- a/src/MEDS_DEV/demo/meds_tab.ipynb +++ /dev/null @@ -1,485 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "xLJYigczPaTG" - }, - "outputs": [], - "source": [ - "#@title Swtich Colab to python 3.12\n", - "%%capture\n", - "!sudo apt-get install python3.12 python3.12-venv\n", - "import sys\n", - "!python3.12 -m venv meds_env\n", - "import os\n", - "os.environ['PATH'] = '/content/meds_env/bin:' + os.environ['PATH']\n", - "!pip install --upgrade pip\n", - "\n", - "# Then in a new code cell:\n", - "import sys\n", - "sys.executable = '/content/meds_env/bin/python'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "w8Zrf0NePwfs", - "outputId": "6aa313bc-5141-453c-88f5-8c1d22956f3d" - }, - "outputs": [], - "source": [ - "# confirm python version is 3.12\n", - "!python --version" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ikPVQZOnPcI0" - }, - "outputs": [], - "source": [ - "#@title Install dependencies\n", - "!pip -q install meds_etl==0.3.6 meds_transforms==0.0.7" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true, - "id": "rjqK4CuRPfnE" - }, - "outputs": [], - "source": [ - "#@title Download MIMIC-IV demo\n", - "\n", - "!wget -q -r -N -c --no-host-directories --cut-dirs=1 -np -P download https://physionet.org/files/mimic-iv-demo/2.2/" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "qHOBI1_5StBb", - "outputId": "eb0ef7ec-54c8-4cac-b1ff-d176c986a447" - }, - "outputs": [], - "source": [ - "# Download pre-meds script, event config (defining how raw data is converted to meds data), and meds-transform config\n", - "!mkdir /content/meds-transform/\n", - "!git clone --depth 1 https://github.com/mmcdermott/MEDS_transforms.git /content/tmp/\n", - "!mv /content/tmp/MIMIC-IV_Example /content/MIMIC-IV_Example" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Sr2QdvNxpd0p", - "outputId": "7877300f-afc5-4583-95f2-e4f7089356b6" - }, - "outputs": [], - "source": [ - "# download MIMIC IV metadata\n", - "MIMICIV_RAW_DIR = \"https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map\"\n", - "MIMICIV_PRE_MEDS_DIR = \"/content/pre_meds/\"\n", - "!mkdir {MIMICIV_PRE_MEDS_DIR}\n", - "\n", - "OUTPUT_DIR = \"/content/download/mimic-iv-demo/2.2\"\n", - "\n", - "files = [\n", - " 'd_labitems_to_loinc.csv',\n", - " 'inputevents_to_rxnorm.csv',\n", - " 'lab_itemid_to_loinc.csv',\n", - " 'meas_chartevents_main.csv',\n", - " 'meas_chartevents_value.csv',\n", - " 'numerics-summary.csv',\n", - " 'outputevents_to_loinc.csv',\n", - " 'proc_datetimeevents.csv',\n", - " 'proc_itemid.csv',\n", - " 'waveforms-summary.csv'\n", - "]\n", - "\n", - "for file in files:\n", - " !wget -O {OUTPUT_DIR}/{file} {MIMICIV_RAW_DIR}/{file}\n", - " !wget -O {MIMICIV_PRE_MEDS_DIR}/{file} {MIMICIV_RAW_DIR}/{file}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "pQSLxYJhRPxm", - "outputId": "41ab56f5-512c-4489-adfc-614644c6c632" - }, - "outputs": [], - "source": [ - "# Convert to MEDS\n", - "TUTORIAL_DIR = \"/content/MIMIC-IV_Example\"\n", - "MIMICIV_RAW_DIR = \"/content/download/mimic-iv-demo/2.2\"\n", - "MIMICIV_PRE_MEDS_DIR = \"/content/pre_meds/\"\n", - "MIMICIV_MEDS_DIR = \"/content/meds/\"\n", - "\n", - "EVENT_CONVERSION_CONFIG_FP=\"/content/MIMIC-IV_Example/configs/event_config.yaml\"\n", - "PIPELINE_CONFIG_PATH=\"/content/MIMIC-IV_Example/configs/pipeline_config.yaml\"\n", - "!cd {TUTORIAL_DIR} && /content/MIMIC-IV_Example/run.sh {MIMICIV_RAW_DIR} {MIMICIV_PRE_MEDS_DIR} {MIMICIV_MEDS_DIR} do_unzip=true" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 443 - }, - "id": "u2f6socuWhjd", - "outputId": "289bc4ae-e374-4ed1-fd98-58c803f14e26" - }, - "outputs": [], - "source": [ - "#@title Examine MEDS data\n", - "\n", - "import polars as pl\n", - "data = pl.read_parquet('/content/meds/data/**/*.parquet')\n", - "\n", - "data[['subject_id', 'time', 'code', 'numeric_value']]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 479 - }, - "id": "CZD9xpSxd1Wp", - "outputId": "ea758e42-b71d-464f-f931-df7eec7a4415" - }, - "outputs": [], - "source": [ - "#@title A Simple Polars Analysis\n", - "\n", - "icd10_events = data.filter(pl.col('code').str.starts_with('DIAGNOSIS//ICD//10//'))\n", - "\n", - "icd10_events.group_by('code').count().sort('count', descending=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 443 - }, - "id": "u7EXKCZelho-", - "outputId": "27e81b86-1195-4c6c-f7c7-993665b826d7" - }, - "outputs": [], - "source": [ - "df = pl.read_parquet(\"/content/meds/metadata/codes.parquet\")\n", - "df" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "PZmYRVX2W8m7" - }, - "source": [ - "# Using an example MEDS tool, ACES for labeling" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "collapsed": true, - "id": "z3_pG9YAWpKy", - "outputId": "efa4c286-413d-4a91-a53d-fb41769cd4f2" - }, - "outputs": [], - "source": [ - "#@title Install ACES\n", - "\n", - "\n", - "!pip install es-aces" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "H6fqe217XDhi" - }, - "outputs": [], - "source": [ - "# From the ACES documentation\n", - "\n", - "task_config = \"\"\"\n", - "description: >-\n", - " This file specifies the base configuration for the prediction of a hospital los being greater than 3days,\n", - " leveraging only the first 48 hours of data after admission, with a 24 hour gap between the input window\n", - " and the target window. Patients who die or are discharged in the gap window are excluded. Note that this\n", - " task is in-**hospital** los, not in-**ICU** los which is a different task.\n", - "\n", - "predicates:\n", - " hospital_admission:\n", - " code: {regex: \"HOSPITAL_ADMISSION//.*\"}\n", - " hospital_discharge:\n", - " code: {regex: \"HOSPITAL_DISCHARGE//.*\"}\n", - " death:\n", - " code: MEDS_DEATH\n", - " discharge_or_death:\n", - " expr: or(hospital_discharge, death)\n", - "\n", - "trigger: hospital_admission\n", - "\n", - "windows:\n", - " input:\n", - " start: NULL\n", - " end: trigger + 48h\n", - " start_inclusive: True\n", - " end_inclusive: True\n", - " index_timestamp: end\n", - " gap:\n", - " start: input.end\n", - " end: start + 24h\n", - " start_inclusive: False\n", - " end_inclusive: True\n", - " has:\n", - " hospital_admission: (None, 0)\n", - " discharge_or_death: (None, 0)\n", - " target:\n", - " start: trigger\n", - " end: start + 3d\n", - " start_inclusive: False\n", - " end_inclusive: True\n", - " label: discharge_or_death\n", - "\"\"\"\n", - "!mkdir /content/tasks/ -p\n", - "TASK_NAME = \"in_hospital_3d_los_after_48h\"\n", - "TASK_CONFIG_FP = f\"/content/tasks/{TASK_NAME}.yaml\"\n", - "with open(TASK_CONFIG_FP, 'w') as f:\n", - " f.write(task_config)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "es-39eHOyp5a", - "outputId": "9d5e2468-fdd5-4c4b-8615-fe24f5a9310f" - }, - "outputs": [], - "source": [ - "!pip install es-aces" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "bXLiJGEry-Gb", - "outputId": "7d954ab4-cf5c-4d02-a99c-669b5822bf44" - }, - "outputs": [], - "source": [ - "!aces-cli --multirun data=sharded data.standard=meds data.root=\"$MIMICIV_MEDS_DIR/data\" \"data.shard=$(expand_shards /content/meds/data/)\" cohort_dir=\" /content/tasks\" cohort_name=\"$TASK_NAME\" config_path=\"$TASK_CONFIG_FP\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 497 - }, - "id": "7Vvac7DIWyRT", - "outputId": "40493f0e-48ba-4f5e-9d9a-401e26f1a9b7" - }, - "outputs": [], - "source": [ - "import polars as pl\n", - "\n", - "# execute query and get results\n", - "df = pl.read_parquet(f\"/content/tasks/{TASK_NAME}/**/*.parquet\")\n", - "\n", - "print(\"train prevalence: \" + str(round(pl.read_parquet(f\"/content/tasks/{TASK_NAME}/train/*.parquet\")['boolean_value'].mean(), 3)))\n", - "print(\"tuning prevalence: \" + str(round(pl.read_parquet(f\"/content/tasks/{TASK_NAME}/tuning/*.parquet\")['boolean_value'].mean(), 3)))\n", - "print(\"held_out prevalence: \" + str(round(pl.read_parquet(f\"/content/tasks/{TASK_NAME}/held_out/*.parquet\")['boolean_value'].mean(), 3)))\n", - "\n", - "\n", - "df.sort('boolean_value')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "XWB7O1UGhRIo", - "outputId": "e3416d5e-7427-4cf4-c0ab-20053a9d3430" - }, - "outputs": [], - "source": [ - "#@title Install meds-tab\n", - "\n", - "!pip uninstall es-aces -y\n", - "!pip install meds-tab" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "SeGawIqli0nn" - }, - "outputs": [], - "source": [ - "MIMICIV_MEDS_DIR = \"/content/meds/\"\n", - "OUTPUT_TABULARIZATION_DIR=\"/content/tabularized/\"\n", - "TASK_DIR=\"/content/tasks/\"\n", - "TASK_NAME=\"in_hospital_3d_los_after_48h\"\n", - "OUTPUT_MODEL_DIR=\"/content/output/meds_tab/\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Tud0_0cgjljP", - "outputId": "fb3417e0-3ba4-4f9a-ab95-ce3ba8731ca1" - }, - "outputs": [], - "source": [ - "!meds-tab-describe input_dir={MIMICIV_MEDS_DIR}/data output_dir={OUTPUT_TABULARIZATION_DIR}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "RVLBdOn1mnV5" - }, - "outputs": [], - "source": [ - "# Define the window sizes and aggregations to generate features for\n", - "WINDOW_SIZES = \"tabularization.window_sizes=[1d,30d,365d]\"\n", - "AGGREGATIONS = \"tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max]\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "KhCPqBmduNYK" - }, - "outputs": [], - "source": [ - "!rm -rf /content/tabularized/tabularize/" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "p_D07KzxjVUl", - "outputId": "8836b076-cf64-4f29-da81-ac5125ab7608" - }, - "outputs": [], - "source": [ - "!meds-tab-tabularize-static \"input_dir=$MIMICIV_MEDS_DIR/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "u-e-mV2Hk-Qf", - "outputId": "c292be12-ff74-44e4-f039-758e10ccc909" - }, - "outputs": [], - "source": [ - "!meds-tab-tabularize-time-series --multirun \"worker=range(0,2)\" \"hydra/launcher=joblib\" \"input_dir=$MIMICIV_MEDS_DIR/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "NmaR_-Fik4eH" - }, - "outputs": [], - "source": [ - "!meds-tab-cache-task \"input_dir={MIMICIV_MEDS_DIR}/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" \"input_label_dir=$TASK_DIR/$TASK_NAME/\" \"task_name=$TASK_NAME\" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "dLIkOzTblBB2" - }, - "outputs": [], - "source": [ - "!meds-tab-xgboost --multirun \"input_dir=$MIMICIV_MEDS_DIR/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" \"output_model_dir=$OUTPUT_MODEL_DIR/$TASK_NAME/\" \"task_name=$TASK_NAME\" do_overwrite=False \"hydra.sweeper.n_trials=10\" $WINDOW_SIZES $AGGREGATIONS \"tabularization.min_code_inclusion_count=10\"" - ] - } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/src/MEDS_DEV/demo/meds_tab.py b/src/MEDS_DEV/demo/meds_tab.py deleted file mode 100644 index 4fae3ae..0000000 --- a/src/MEDS_DEV/demo/meds_tab.py +++ /dev/null @@ -1,240 +0,0 @@ -# --- -# jupyter: -# jupytext: -# text_representation: -# extension: .py -# format_name: percent -# format_version: '1.3' -# jupytext_version: 1.16.4 -# kernelspec: -# display_name: Python 3 -# name: python3 -# --- - -# %% [Colab-only] Switch Colab to python 3.12 -# !sudo apt-get install python3.12 python3.12-venv -# import sys -# !python3.12 -m venv meds_env -# import os -# os.environ['PATH'] = '/content/meds_env/bin:' + os.environ['PATH'] -# !pip install --upgrade pip - -# # Then in a new code cell: -# import sys -# sys.executable = '/content/meds_env/bin/python' - -# # Confirm python version is 3.12 -# !python --version - -# %% -!pwd # Should be .../src/MEDS_DEV/demo - -# %% [markdown] -# ## Install dependencies - -# %% -!pip -q install meds_etl==0.3.6 meds_transforms==0.0.7 - -# TODO install meds-evaluation - -# %% [markdown] -# # Download MIMIC-IV demo - -# %% -# macOS users should install wget (e.g. through brew) -!wget -q -r -N -c --no-host-directories --cut-dirs=1 -np -P ./content/download https://physionet.org/files/mimic-iv-demo/2.2/ - -# %% -# Download pre-meds script, event config (defining how raw data is converted to meds data), and meds-transform config -!mkdir -p ./content/meds-transform/ -!git clone --depth 1 https://github.com/mmcdermott/MEDS_transforms.git ./content/tmp/ -!mv ./content/tmp/MIMIC-IV_Example ./content/MIMIC-IV_Example - -# %% -# Download MIMIC-IV metadata -MIMICIV_RAW_DIR = "https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map" -MIMICIV_PRE_MEDS_DIR = "./content/pre_meds/" -!mkdir {MIMICIV_PRE_MEDS_DIR} - -OUTPUT_DIR = "./content/download/mimic-iv-demo/2.2" - -files = [ - 'd_labitems_to_loinc.csv', - 'inputevents_to_rxnorm.csv', - 'lab_itemid_to_loinc.csv', - 'meas_chartevents_main.csv', - 'meas_chartevents_value.csv', - 'numerics-summary.csv', - 'outputevents_to_loinc.csv', - 'proc_datetimeevents.csv', - 'proc_itemid.csv', - 'waveforms-summary.csv' -] - -for file in files: - !wget -O {OUTPUT_DIR}/{file} {MIMICIV_RAW_DIR}/{file} - !wget -O {MIMICIV_PRE_MEDS_DIR}/{file} {MIMICIV_RAW_DIR}/{file} - -# %% -# Convert to MEDS -CURRENT_DIR = !pwd -CURRENT_DIR = CURRENT_DIR[0] -# %% -TUTORIAL_DIR = CURRENT_DIR + "/content/MIMIC-IV_Example" -MIMICIV_RAW_DIR = CURRENT_DIR + "/content/download/mimic-iv-demo/2.2" -MIMICIV_PRE_MEDS_DIR = CURRENT_DIR + "/content/pre_meds" -MIMICIV_MEDS_DIR = CURRENT_DIR + "/content/meds" - -EVENT_CONVERSION_CONFIG_FP = CURRENT_DIR + "/content/MIMIC-IV_Example/configs/event_config.yaml" -PIPELINE_CONFIG_PATH = CURRENT_DIR + "/content/MIMIC-IV_Example/configs/pipeline_config.yaml" -!cd {TUTORIAL_DIR} && ./run.sh {MIMICIV_RAW_DIR} {MIMICIV_PRE_MEDS_DIR} {MIMICIV_MEDS_DIR} do_unzip=true - -# %% [markdown] -# # Examine MEDS data - -# %% -import polars as pl - -data = pl.read_parquet('./content/meds/data/**/*.parquet') - -data[['subject_id', 'time', 'code', 'numeric_value']] - -# %% [markdown] -# # A simple Polars analysis - -# %% -icd10_events = data.filter(pl.col('code').str.starts_with('DIAGNOSIS//ICD//10//')) -icd10_events.group_by('code').count().sort('count', descending=True) - -# %% -df = pl.read_parquet("./content/meds/metadata/codes.parquet") -df - -# %% [markdown] -# ## Using an example MEDS tool, ACES for labeling - -# %% [markdown] -# ## Install ACES - -# %% -!pip install es-aces - -# %% - -# From ACES documentation -task_config = """ -description: >- - This file specifies the base configuration for the prediction of a hospital los being greater than 3days, - leveraging only the first 48 hours of data after admission, with a 24 hour gap between the input window - and the target window. Patients who die or are discharged in the gap window are excluded. Note that this - task is in-**hospital** los, not in-**ICU** los which is a different task. - -predicates: - hospital_admission: - code: {regex: "HOSPITAL_ADMISSION//.*"} - hospital_discharge: - code: {regex: "HOSPITAL_DISCHARGE//.*"} - death: - code: MEDS_DEATH - discharge_or_death: - expr: or(hospital_discharge, death) - -trigger: hospital_admission - -windows: - input: - start: NULL - end: trigger + 48h - start_inclusive: True - end_inclusive: True - index_timestamp: end - gap: - start: input.end - end: start + 24h - start_inclusive: False - end_inclusive: True - has: - hospital_admission: (None, 0) - discharge_or_death: (None, 0) - target: - start: trigger - end: start + 3d - start_inclusive: False - end_inclusive: True - label: discharge_or_death -""" - -!mkdir ./content/tasks/ -p -TASK_NAME = "in_hospital_3d_los_after_48h" -TASK_CONFIG_FP = f"./content/tasks/{TASK_NAME}.yaml" -with open(TASK_CONFIG_FP, 'w') as f: - f.write(task_config) - -# %% -!pip install es-aces - -# %% -!aces-cli --multirun data=sharded data.standard=meds data.root="$MIMICIV_MEDS_DIR/data" "data.shard=$(expand_shards ./content/meds/data/)" cohort_dir=" ./content/tasks" cohort_name="$TASK_NAME" config_path="$TASK_CONFIG_FP" - -# %% -# TODO: reimporting polars due to dependencies? -import polars as pl - -# Execute query and get results -df = pl.read_parquet(f"./content/tasks/{TASK_NAME}/**/*.parquet") - -print("train prevalence: " + str(round(pl.read_parquet(f"./content/tasks/{TASK_NAME}/train/*.parquet")['boolean_value'].mean(), 3))) -print("tuning prevalence: " + str(round(pl.read_parquet(f"./content/tasks/{TASK_NAME}/tuning/*.parquet")['boolean_value'].mean(), 3))) -print("held_out prevalence: " + str(round(pl.read_parquet(f"./content/tasks/{TASK_NAME}/held_out/*.parquet")['boolean_value'].mean(), 3))) - - -df.sort('boolean_value') - -# %% -# ## Install meds-tab - -!pip uninstall es-aces -y # TODO ??? -!pip install meds-tab - -# %% -MIMICIV_MEDS_DIR = "./content/meds/" -OUTPUT_TABULARIZATION_DIR="./content/tabularized/" -TASK_DIR="./content/tasks/" -TASK_NAME="in_hospital_3d_los_after_48h" -OUTPUT_MODEL_DIR="./content/output/meds_tab/" - -# %% -!meds-tab-describe input_dir={MIMICIV_MEDS_DIR}/data output_dir={OUTPUT_TABULARIZATION_DIR} - -# %% -# Define the window sizes and aggregations to generate features for -# TODO define this as system variables or make sure the shell -# commands can find these -WINDOW_SIZES = "tabularization.window_sizes=[1d,30d,365d]" -AGGREGATIONS = "tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max]" - -# %% -!rm -rf ./content/tabularized/tabularize/ - -# %% -# TODO shell vs python variables -!echo {OUTPUT_TABULARIZATION_DIR} - -# %% -# TODO shell vs python variables -!echo WINDOW_SIZES -# %% -# TODO shell vs python variables -!meds-tab-tabularize-static "input_dir=$MIMICIV_MEDS_DIR/data" "output_dir=$OUTPUT_TABULARIZATION_DIR" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS - -# %% -# TODO shell vs python variables -!meds-tab-tabularize-time-series --multirun "worker=range(0,2)" "hydra/launcher=joblib" "input_dir=$MIMICIV_MEDS_DIR/data" "output_dir=$OUTPUT_TABULARIZATION_DIR" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS - -# %% -# TODO shell vs python variables -!meds-tab-cache-task "input_dir={MIMICIV_MEDS_DIR}/data" "output_dir=$OUTPUT_TABULARIZATION_DIR" "input_label_dir=$TASK_DIR/$TASK_NAME/" "task_name=$TASK_NAME" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS - -# %% -# TODO shell vs python variables -!meds-tab-xgboost --multirun "input_dir=$MIMICIV_MEDS_DIR/data" "output_dir=$OUTPUT_TABULARIZATION_DIR" "output_model_dir=$OUTPUT_MODEL_DIR/$TASK_NAME/" "task_name=$TASK_NAME" do_overwrite=False "hydra.sweeper.n_trials=10" $WINDOW_SIZES $AGGREGATIONS "tabularization.min_code_inclusion_count=10" From 5a98d43e114c32983dadf3cd7394468dd3a1075d Mon Sep 17 00:00:00 2001 From: Nassim Oufattole Date: Sat, 14 Dec 2024 17:34:27 -0800 Subject: [PATCH 3/8] Ran e2e the extraction and aces demos, works as expected --- demo/aces.ipynb | 520 +-- demo/extract_meds_data.ipynb | 6516 +++++++++++++++++++++++++--------- 2 files changed, 5097 insertions(+), 1939 deletions(-) diff --git a/demo/aces.ipynb b/demo/aces.ipynb index 7b31586..db13aab 100644 --- a/demo/aces.ipynb +++ b/demo/aces.ipynb @@ -11,7 +11,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -20,7 +20,36 @@ "id": "z3_pG9YAWpKy", "outputId": "efa4c286-413d-4a91-a53d-fb41769cd4f2" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: es-aces in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (0.6.1)\n", + "Requirement already satisfied: polars<=1.17.1,>=1.0.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from es-aces) (1.17.1)\n", + "Requirement already satisfied: bigtree==0.18.* in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from es-aces) (0.18.3)\n", + "Requirement already satisfied: ruamel.yaml==0.18.* in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from es-aces) (0.18.6)\n", + "Requirement already satisfied: loguru==0.7.* in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from es-aces) (0.7.3)\n", + "Requirement already satisfied: hydra-core==1.3.* in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from es-aces) (1.3.2)\n", + "Requirement already satisfied: pytimeparse==1.1.* in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from es-aces) (1.1.8)\n", + "Requirement already satisfied: networkx==3.3.* in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from es-aces) (3.3)\n", + "Requirement already satisfied: pyarrow==17.* in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from es-aces) (17.0.0)\n", + "Requirement already satisfied: meds==0.3.3 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from es-aces) (0.3.3)\n", + "Requirement already satisfied: omegaconf<2.4,>=2.2 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from hydra-core==1.3.*->es-aces) (2.3.0)\n", + "Requirement already satisfied: antlr4-python3-runtime==4.9.* in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from hydra-core==1.3.*->es-aces) (4.9.3)\n", + "Requirement already satisfied: packaging in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from hydra-core==1.3.*->es-aces) (24.2)\n", + "Requirement already satisfied: jsonschema>=4.0.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds==0.3.3->es-aces) (4.23.0)\n", + "Requirement already satisfied: typing-extensions>=4.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds==0.3.3->es-aces) (4.12.2)\n", + "Requirement already satisfied: numpy>=1.16.6 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from pyarrow==17.*->es-aces) (2.2.0)\n", + "Requirement already satisfied: ruamel.yaml.clib>=0.2.7 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from ruamel.yaml==0.18.*->es-aces) (0.2.12)\n", + "Requirement already satisfied: attrs>=22.2.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds==0.3.3->es-aces) (24.2.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds==0.3.3->es-aces) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds==0.3.3->es-aces) (0.35.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds==0.3.3->es-aces) (0.22.3)\n", + "Requirement already satisfied: PyYAML>=5.1.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from omegaconf<2.4,>=2.2->hydra-core==1.3.*->es-aces) (6.0.2)\n" + ] + } + ], "source": [ "#@title Install ACES\n", "\n", @@ -30,7 +59,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -58,7 +87,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": null, "metadata": { "id": "H6fqe217XDhi" }, @@ -67,8 +96,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "TASK_DIR\n", - "mkdir: -p: File exists\n" + "TASK_DIR\n" ] } ], @@ -118,17 +146,16 @@ "\"\"\"\n", "MEDS_DIR = ROOT_DIR + \"/meds\"\n", "TASK_DIR = MEDS_DIR + \"/task_labels\"\n", - "! echo TASK_DIR\n", "TASK_NAME = \"los_in_hospital_first_48h\"\n", "TASK_CONFIG_FP = f\"{TASK_DIR}/{TASK_NAME}.yaml\"\n", - "!mkdir {TASK_DIR}/{TASK_NAME} -p\n", + "!mkdir -p {TASK_DIR}/{TASK_NAME}\n", "with open(TASK_CONFIG_FP, 'w') as f:\n", " f.write(task_config)" ] }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -141,13 +168,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "[2024-12-14 17:02:13,334][HYDRA] Launching 3 jobs locally\n", - "[2024-12-14 17:02:13,334][HYDRA] \t#0 : data=sharded data.standard=meds data.root=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data data.shard=held_out/0 cohort_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels cohort_name=los_in_hospital_first_48h config_path=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml\n", - "\u001b[32m2024-12-14 17:02:13.542\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml'\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.545\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.545\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.545\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.547\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", + "[2024-12-14 17:33:45,826][HYDRA] Launching 3 jobs locally\n", + "[2024-12-14 17:33:45,826][HYDRA] \t#0 : data=sharded data.standard=meds data.root=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data data.shard=held_out/0 cohort_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels cohort_name=los_in_hospital_first_48h config_path=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml\n", + "\u001b[32m2024-12-14 17:33:46.042\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.045\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.045\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.045\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.047\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", "standard: meds\n", "ts_format: '%m/%d/%Y %H:%M'\n", "root: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data\n", @@ -155,37 +182,37 @@ "path: ${data.root}/${data.shard}.parquet\n", "_prefix: /${data.shard}\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.547\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.566\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_admission'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.567\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_discharge'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.568\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.568\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.575\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.576\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.576\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.576\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.578\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", + "\u001b[32m2024-12-14 17:33:46.047\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.074\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_admission'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.076\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_discharge'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.077\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.077\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.085\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.086\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.086\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.086\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", "trigger\n", "┣━━ input.end\n", "┃ ┣━━ input.start\n", "┃ ┗━━ gap.end\n", "┗━━ target.end\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.578\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.578\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.580\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 4,155 rows as they failed to satisfy '1 <= hospital_admission <= None'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.580\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.593\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.622\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.630\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= hospital_admission <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.630\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 4 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.634\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 16 valid rows returned corresponding to 10 subjects.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'discharge_or_death' from window 'target'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.645\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", + "\u001b[32m2024-12-14 17:33:46.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.090\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.091\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 12,159 rows as they failed to satisfy '1 <= hospital_admission <= None'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.092\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.110\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.155\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= hospital_admission <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.155\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 8 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.160\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.173\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 39 valid rows returned corresponding to 23 subjects.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.173\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'discharge_or_death' from window 'target'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.174\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.176\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", "If you need these columns, please comment on https://github.com/justin13601/ACES/issues/97\n", "Columns:\n", " - trigger\n", @@ -193,13 +220,13 @@ " - input.start_summary\n", " - gap.end_summary\n", " - target.end_summary\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.233536. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h/held_out/0.parquet'.\u001b[0m\n", - "[2024-12-14 17:02:13,777][HYDRA] \t#1 : data=sharded data.standard=meds data.root=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data data.shard=train/0 cohort_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels cohort_name=los_in_hospital_first_48h config_path=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml\n", - "\u001b[32m2024-12-14 17:02:13.842\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml'\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.845\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.845\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.845\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.846\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", + "\u001b[32m2024-12-14 17:33:46.322\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.279571. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h/held_out/0.parquet'.\u001b[0m\n", + "[2024-12-14 17:33:46,323][HYDRA] \t#1 : data=sharded data.standard=meds data.root=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data data.shard=train/0 cohort_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels cohort_name=los_in_hospital_first_48h config_path=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml\n", + "\u001b[32m2024-12-14 17:33:46.391\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.395\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", "standard: meds\n", "ts_format: '%m/%d/%Y %H:%M'\n", "root: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data\n", @@ -207,37 +234,37 @@ "path: ${data.root}/${data.shard}.parquet\n", "_prefix: /${data.shard}\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.847\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.891\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_admission'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.907\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_discharge'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.910\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.910\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.927\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.927\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.927\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.928\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.930\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", + "\u001b[32m2024-12-14 17:33:46.395\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.422\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.429\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_admission'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_discharge'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.450\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.452\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", "trigger\n", "┣━━ input.end\n", "┃ ┣━━ input.start\n", "┃ ┗━━ gap.end\n", "┗━━ target.end\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.930\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.930\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.930\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.930\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 72,649 rows as they failed to satisfy '1 <= hospital_admission <= None'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.931\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:13.972\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.059\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= hospital_admission <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 26 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.148\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.238\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 212 valid rows returned corresponding to 75 subjects.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.238\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'discharge_or_death' from window 'target'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.238\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.240\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", + "\u001b[32m2024-12-14 17:33:46.452\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.452\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.452\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.453\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 47,886 rows as they failed to satisfy '1 <= hospital_admission <= None'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.453\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.528\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.556\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= hospital_admission <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.556\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 18 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.595\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 145 valid rows returned corresponding to 46 subjects.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.595\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'discharge_or_death' from window 'target'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.595\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.597\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", "If you need these columns, please comment on https://github.com/justin13601/ACES/issues/97\n", "Columns:\n", " - trigger\n", @@ -245,13 +272,13 @@ " - input.start_summary\n", " - gap.end_summary\n", " - target.end_summary\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.243\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.399837. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h/train/0.parquet'.\u001b[0m\n", - "[2024-12-14 17:02:14,243][HYDRA] \t#2 : data=sharded data.standard=meds data.root=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data data.shard=tuning/0 cohort_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels cohort_name=los_in_hospital_first_48h config_path=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml\n", - "\u001b[32m2024-12-14 17:02:14.309\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml'\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.312\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.312\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.312\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.313\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", + "\u001b[32m2024-12-14 17:33:46.600\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.208261. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h/train/0.parquet'.\u001b[0m\n", + "[2024-12-14 17:33:46,600][HYDRA] \t#2 : data=sharded data.standard=meds data.root=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data data.shard=tuning/0 cohort_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels cohort_name=los_in_hospital_first_48h config_path=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml\n", + "\u001b[32m2024-12-14 17:33:46.670\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.674\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", "standard: meds\n", "ts_format: '%m/%d/%Y %H:%M'\n", "root: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data\n", @@ -259,37 +286,37 @@ "path: ${data.root}/${data.shard}.parquet\n", "_prefix: /${data.shard}\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.313\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.316\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_admission'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.317\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_discharge'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.317\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.318\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.320\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.320\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.320\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.320\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", + "\u001b[32m2024-12-14 17:33:46.674\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.680\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.683\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_admission'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.686\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_discharge'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.692\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.693\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.693\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.693\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.694\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", "trigger\n", "┣━━ input.end\n", "┃ ┣━━ input.start\n", "┃ ┗━━ gap.end\n", "┗━━ target.end\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 6,240 rows as they failed to satisfy '1 <= hospital_admission <= None'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.331\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.349\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= hospital_admission <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.349\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 2 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.352\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.360\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 15 valid rows returned corresponding to 9 subjects.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.360\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'discharge_or_death' from window 'target'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.360\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.361\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", + "\u001b[32m2024-12-14 17:33:46.694\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.694\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.694\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.695\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 22,999 rows as they failed to satisfy '1 <= hospital_admission <= None'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.695\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.713\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.742\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.760\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= hospital_admission <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.760\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 6 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.764\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.792\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 59 valid rows returned corresponding to 25 subjects.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.792\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'discharge_or_death' from window 'target'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.792\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:46.793\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", "If you need these columns, please comment on https://github.com/justin13601/ACES/issues/97\n", "Columns:\n", " - trigger\n", @@ -297,7 +324,7 @@ " - input.start_summary\n", " - gap.end_summary\n", " - target.end_summary\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:14.364\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.054431. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h/tuning/0.parquet'.\u001b[0m\n" + "\u001b[32m2024-12-14 17:33:46.796\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.125503. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h/tuning/0.parquet'.\u001b[0m\n" ] } ], @@ -307,7 +334,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -321,9 +348,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "train prevalence: 0.231\n", - "tuning prevalence: 0.133\n", - "held_out prevalence: 0.25\n" + "train prevalence: 0.241\n", + "tuning prevalence: 0.169\n", + "held_out prevalence: 0.256\n" ] }, { @@ -336,7 +363,7 @@ " white-space: pre-wrap;\n", "}\n", "\n", - "shape: (243, 6)
subject_idprediction_timeboolean_valueinteger_valuefloat_valuecategorical_value
i64datetime[μs]booli64f64str
100128532175-04-07 15:36:00falsenullnullnull
100128532176-11-27 21:28:00falsenullnullnull
100147292125-03-01 07:15:00falsenullnullnull
100147292125-03-21 16:58:00falsenullnullnull
100167422178-07-05 21:13:00falsenullnullnull
100399972135-11-09 02:42:00truenullnullnull
100400252143-03-20 12:34:00truenullnullnull
100400252145-07-05 23:46:00truenullnullnull
100207402150-09-17 14:09:00truenullnullnull
100207402151-01-17 15:25:00truenullnullnull
" + "shape: (243, 6)
subject_idprediction_timeboolean_valueinteger_valuefloat_valuecategorical_value
i64datetime[μs]booli64f64str
100044572141-12-19 11:00:00falsenullnullnull
100090492174-05-28 08:21:00falsenullnullnull
100128532175-04-07 15:36:00falsenullnullnull
100128532176-11-27 21:28:00falsenullnullnull
100147292125-03-01 07:15:00falsenullnullnull
100264062129-01-05 15:55:00truenullnullnull
100356312113-07-19 17:15:00truenullnullnull
100397082140-09-27 04:17:00truenullnullnull
100397082142-05-17 17:14:00truenullnullnull
100397082142-07-08 09:08:00truenullnullnull
" ], "text/plain": [ "shape: (243, 6)\n", @@ -345,31 +372,31 @@ "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ i64 ┆ datetime[μs] ┆ bool ┆ i64 ┆ f64 ┆ str │\n", "╞════════════╪═══════════════════╪═══════════════╪═══════════════╪═════════════╪═══════════════════╡\n", + "│ 10004457 ┆ 2141-12-19 ┆ false ┆ null ┆ null ┆ null │\n", + "│ ┆ 11:00:00 ┆ ┆ ┆ ┆ │\n", + "│ 10009049 ┆ 2174-05-28 ┆ false ┆ null ┆ null ┆ null │\n", + "│ ┆ 08:21:00 ┆ ┆ ┆ ┆ │\n", "│ 10012853 ┆ 2175-04-07 ┆ false ┆ null ┆ null ┆ null │\n", "│ ┆ 15:36:00 ┆ ┆ ┆ ┆ │\n", "│ 10012853 ┆ 2176-11-27 ┆ false ┆ null ┆ null ┆ null │\n", "│ ┆ 21:28:00 ┆ ┆ ┆ ┆ │\n", "│ 10014729 ┆ 2125-03-01 ┆ false ┆ null ┆ null ┆ null │\n", "│ ┆ 07:15:00 ┆ ┆ ┆ ┆ │\n", - "│ 10014729 ┆ 2125-03-21 ┆ false ┆ null ┆ null ┆ null │\n", - "│ ┆ 16:58:00 ┆ ┆ ┆ ┆ │\n", - "│ 10016742 ┆ 2178-07-05 ┆ false ┆ null ┆ null ┆ null │\n", - "│ ┆ 21:13:00 ┆ ┆ ┆ ┆ │\n", "│ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ 10039997 ┆ 2135-11-09 ┆ true ┆ null ┆ null ┆ null │\n", - "│ ┆ 02:42:00 ┆ ┆ ┆ ┆ │\n", - "│ 10040025 ┆ 2143-03-20 ┆ true ┆ null ┆ null ┆ null │\n", - "│ ┆ 12:34:00 ┆ ┆ ┆ ┆ │\n", - "│ 10040025 ┆ 2145-07-05 ┆ true ┆ null ┆ null ┆ null │\n", - "│ ┆ 23:46:00 ┆ ┆ ┆ ┆ │\n", - "│ 10020740 ┆ 2150-09-17 ┆ true ┆ null ┆ null ┆ null │\n", - "│ ┆ 14:09:00 ┆ ┆ ┆ ┆ │\n", - "│ 10020740 ┆ 2151-01-17 ┆ true ┆ null ┆ null ┆ null │\n", - "│ ┆ 15:25:00 ┆ ┆ ┆ ┆ │\n", + "│ 10026406 ┆ 2129-01-05 ┆ true ┆ null ┆ null ┆ null │\n", + "│ ┆ 15:55:00 ┆ ┆ ┆ ┆ │\n", + "│ 10035631 ┆ 2113-07-19 ┆ true ┆ null ┆ null ┆ null │\n", + "│ ┆ 17:15:00 ┆ ┆ ┆ ┆ │\n", + "│ 10039708 ┆ 2140-09-27 ┆ true ┆ null ┆ null ┆ null │\n", + "│ ┆ 04:17:00 ┆ ┆ ┆ ┆ │\n", + "│ 10039708 ┆ 2142-05-17 ┆ true ┆ null ┆ null ┆ null │\n", + "│ ┆ 17:14:00 ┆ ┆ ┆ ┆ │\n", + "│ 10039708 ┆ 2142-07-08 ┆ true ┆ null ┆ null ┆ null │\n", + "│ ┆ 09:08:00 ┆ ┆ ┆ ┆ │\n", "└────────────┴───────────────────┴───────────────┴───────────────┴─────────────┴───────────────────┘" ] }, - "execution_count": 62, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -397,22 +424,42 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 6, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/sim/miniconda3/envs/dev/lib/python3.12/pty.py:95: RuntimeWarning: Using fork() can cause Polars to deadlock in the child process.\n", + "In addition, using fork() with Python in general is a recipe for mysterious\n", + "deadlocks and crashes.\n", + "\n", + "The most likely reason you are seeing this error is because you are using the\n", + "multiprocessing module on Linux, which uses fork() by default. This will be\n", + "fixed in Python 3.14. Until then, you want to use the \"spawn\" context instead.\n", + "\n", + "See https://docs.pola.rs/user-guide/misc/multiprocessing/ for details.\n", + "\n", + "If you really know what your doing, you can silence this warning with the warning module\n", + "or by setting POLARS_ALLOW_FORKING_THREAD=1.\n", + "\n", + " pid, fd = os.forkpty()\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ "Running task mortality/in_icu/first_24h on dataset MIMIC-IV with MEDS_ROOT_DIR=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds and SHARDS=held_out/0,train/0,tuning/0\n", - "[2024-12-14 17:02:21,042][HYDRA] Launching 3 jobs locally\n", - "[2024-12-14 17:02:21,042][HYDRA] \t#0 : data.shard=held_out/0\n", - "\u001b[32m2024-12-14 17:02:21.188\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/tasks/criteria/mortality/in_icu/first_24h.yaml'\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.190\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m151\u001b[0m - \u001b[1mOverriding predicates and/or demographics from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/datasets/MIMIC-IV/predicates.yaml'\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.207\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.207\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.207\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.209\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", + "[2024-12-14 17:33:48,126][HYDRA] Launching 3 jobs locally\n", + "[2024-12-14 17:33:48,126][HYDRA] \t#0 : data.shard=held_out/0\n", + "\u001b[32m2024-12-14 17:33:48.285\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/tasks/criteria/mortality/in_icu/first_24h.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.286\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m151\u001b[0m - \u001b[1mOverriding predicates and/or demographics from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/datasets/MIMIC-IV/predicates.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.304\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.304\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.304\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.306\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", "standard: meds\n", "ts_format: '%m/%d/%Y %H:%M'\n", "root: ${oc.env:MEDS_ROOT_DIR}/data\n", @@ -420,38 +467,37 @@ "path: ${data.root}/${data.shard}.parquet\n", "_prefix: /${data.shard}\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.209\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.229\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_admission'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.230\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_discharge'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.231\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.231\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.237\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.238\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.238\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.238\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.240\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", + "\u001b[32m2024-12-14 17:33:48.306\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.323\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.332\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_admission'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.335\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_discharge'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.336\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.336\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.344\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.346\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.346\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.346\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.348\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", "trigger\n", "┣━━ input.end\n", "┃ ┗━━ input.start\n", "┗━━ gap.end\n", " ┗━━ target.end\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.240\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.240\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.242\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.242\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 4,163 rows as they failed to satisfy '1 <= icu_admission <= None'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.243\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.255\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.277\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.285\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= icu_admission <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.285\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 6 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.286\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.302\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 6 valid rows returned corresponding to 4 subjects.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.302\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'death' from window 'target'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.303\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m142\u001b[0m - \u001b[33m\u001b[1mAll labels in the extracted cohort are the same: '0'. This may indicate an issue with the task logic. Please double-check your configuration file if this is not expected.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.303\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.306\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", + "\u001b[32m2024-12-14 17:33:48.348\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.348\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.349\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.350\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 12,173 rows as they failed to satisfy '1 <= icu_admission <= None'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.351\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.364\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= icu_admission <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 17 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.406\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.429\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 16 valid rows returned corresponding to 11 subjects.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.429\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'death' from window 'target'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.429\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.433\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", "If you need these columns, please comment on https://github.com/justin13601/ACES/issues/97\n", "Columns:\n", " - trigger\n", @@ -459,14 +505,14 @@ " - input.start_summary\n", " - gap.end_summary\n", " - target.end_summary\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.209584. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/mortality/in_icu/first_24h/held_out/0.parquet'.\u001b[0m\n", - "[2024-12-14 17:02:21,399][HYDRA] \t#1 : data.shard=train/0\n", - "\u001b[32m2024-12-14 17:02:21.469\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/tasks/criteria/mortality/in_icu/first_24h.yaml'\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.470\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m151\u001b[0m - \u001b[1mOverriding predicates and/or demographics from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/datasets/MIMIC-IV/predicates.yaml'\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.486\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", + "\u001b[32m2024-12-14 17:33:48.516\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.231224. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/mortality/in_icu/first_24h/held_out/0.parquet'.\u001b[0m\n", + "[2024-12-14 17:33:48,517][HYDRA] \t#1 : data.shard=train/0\n", + "\u001b[32m2024-12-14 17:33:48.590\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/tasks/criteria/mortality/in_icu/first_24h.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.592\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m151\u001b[0m - \u001b[1mOverriding predicates and/or demographics from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/datasets/MIMIC-IV/predicates.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.607\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.607\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.607\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.608\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", "standard: meds\n", "ts_format: '%m/%d/%Y %H:%M'\n", "root: ${oc.env:MEDS_ROOT_DIR}/data\n", @@ -474,37 +520,37 @@ "path: ${data.root}/${data.shard}.parquet\n", "_prefix: /${data.shard}\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.487\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.516\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.531\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_admission'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.546\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_discharge'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.549\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.549\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.564\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.565\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.565\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.565\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.567\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", + "\u001b[32m2024-12-14 17:33:48.609\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.625\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.636\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_admission'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.646\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_discharge'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.659\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.659\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.659\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.659\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", "trigger\n", "┣━━ input.end\n", "┃ ┗━━ input.start\n", "┗━━ gap.end\n", " ┗━━ target.end\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.568\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.568\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.568\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.568\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 72,774 rows as they failed to satisfy '1 <= icu_admission <= None'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.569\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.622\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.680\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.714\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 2 rows as they failed to satisfy 'None <= icu_admission <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.715\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 53 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.715\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.804\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 60 valid rows returned corresponding to 47 subjects.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.804\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'death' from window 'target'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.805\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.806\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", + "\u001b[32m2024-12-14 17:33:48.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 47,976 rows as they failed to satisfy '1 <= icu_admission <= None'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.688\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.778\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.805\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 2 rows as they failed to satisfy 'None <= icu_admission <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.806\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 37 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.806\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.852\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 36 valid rows returned corresponding to 27 subjects.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.852\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'death' from window 'target'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.852\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.854\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", "If you need these columns, please comment on https://github.com/justin13601/ACES/issues/97\n", "Columns:\n", " - trigger\n", @@ -512,14 +558,14 @@ " - input.start_summary\n", " - gap.end_summary\n", " - target.end_summary\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.810\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.340355. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/mortality/in_icu/first_24h/train/0.parquet'.\u001b[0m\n", - "[2024-12-14 17:02:21,810][HYDRA] \t#2 : data.shard=tuning/0\n", - "\u001b[32m2024-12-14 17:02:21.878\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/tasks/criteria/mortality/in_icu/first_24h.yaml'\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.879\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m151\u001b[0m - \u001b[1mOverriding predicates and/or demographics from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/datasets/MIMIC-IV/predicates.yaml'\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.895\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.895\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.895\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", + "\u001b[32m2024-12-14 17:33:48.858\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.267562. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/mortality/in_icu/first_24h/train/0.parquet'.\u001b[0m\n", + "[2024-12-14 17:33:48,859][HYDRA] \t#2 : data.shard=tuning/0\n", + "\u001b[32m2024-12-14 17:33:48.931\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/tasks/criteria/mortality/in_icu/first_24h.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.932\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m151\u001b[0m - \u001b[1mOverriding predicates and/or demographics from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/datasets/MIMIC-IV/predicates.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.947\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.948\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.948\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.949\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", "standard: meds\n", "ts_format: '%m/%d/%Y %H:%M'\n", "root: ${oc.env:MEDS_ROOT_DIR}/data\n", @@ -527,37 +573,37 @@ "path: ${data.root}/${data.shard}.parquet\n", "_prefix: /${data.shard}\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.897\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_admission'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_discharge'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", + "\u001b[32m2024-12-14 17:33:48.949\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.953\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_admission'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.963\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_discharge'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.963\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.963\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.968\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.969\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.969\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.969\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.970\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", "trigger\n", "┣━━ input.end\n", "┃ ┗━━ input.start\n", "┗━━ gap.end\n", " ┗━━ target.end\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 6,242 rows as they failed to satisfy '1 <= icu_admission <= None'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.911\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.924\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.931\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= icu_admission <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.931\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 7 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.932\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.945\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 8 valid rows returned corresponding to 5 subjects.\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.945\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'death' from window 'target'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.945\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.946\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", + "\u001b[32m2024-12-14 17:33:48.970\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.970\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.970\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.971\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 23,030 rows as they failed to satisfy '1 <= icu_admission <= None'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.971\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:48.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:49.018\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:49.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= icu_admission <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:49.049\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 12 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:49.049\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:49.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 22 valid rows returned corresponding to 18 subjects.\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:49.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'death' from window 'target'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:49.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", + "\u001b[32m2024-12-14 17:33:49.085\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", "If you need these columns, please comment on https://github.com/justin13601/ACES/issues/97\n", "Columns:\n", " - trigger\n", @@ -565,7 +611,7 @@ " - input.start_summary\n", " - gap.end_summary\n", " - target.end_summary\u001b[0m\n", - "\u001b[32m2024-12-14 17:02:21.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.071263. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/mortality/in_icu/first_24h/tuning/0.parquet'.\u001b[0m\n" + "\u001b[32m2024-12-14 17:33:49.089\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.157569. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/mortality/in_icu/first_24h/tuning/0.parquet'.\u001b[0m\n" ] } ], @@ -576,7 +622,7 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -593,16 +639,16 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "train prevalence: 0.133\n", - "tuning prevalence: 0.125\n", - "held_out prevalence: 0.0\n" + "train prevalence: 0.167\n", + "tuning prevalence: 0.091\n", + "held_out prevalence: 0.062\n" ] }, { @@ -615,7 +661,7 @@ " white-space: pre-wrap;\n", "}\n", "\n", - "shape: (74, 6)
subject_idprediction_timeboolean_valueinteger_valuefloat_valuecategorical_value
i64datetime[μs]booli64f64str
100128532176-11-27 02:34:49falsenullnullnull
100147292125-02-28 10:03:08falsenullnullnull
100167422178-07-04 22:45:00falsenullnullnull
100167422178-07-14 08:16:00falsenullnullnull
100167422178-07-23 08:19:00falsenullnullnull
100104712155-12-03 20:33:00truenullnullnull
100159312177-03-25 21:48:07truenullnullnull
100378612117-03-15 16:34:58truenullnullnull
100379752185-01-18 19:12:12truenullnullnull
100380812115-10-10 10:15:25truenullnullnull
" + "shape: (74, 6)
subject_idprediction_timeboolean_valueinteger_valuefloat_valuecategorical_value
i64datetime[μs]booli64f64str
100128532176-11-27 02:34:49falsenullnullnull
100147292125-02-28 10:03:08falsenullnullnull
100167422178-07-04 22:45:00falsenullnullnull
100167422178-07-14 08:16:00falsenullnullnull
100167422178-07-23 08:19:00falsenullnullnull
100104712155-12-03 20:33:00truenullnullnull
100159312177-03-25 21:48:07truenullnullnull
100378612117-03-15 16:34:58truenullnullnull
100047202186-11-13 19:55:00truenullnullnull
100379752185-01-18 19:12:12truenullnullnull
" ], "text/plain": [ "shape: (74, 6)\n", @@ -641,14 +687,14 @@ "│ ┆ 21:48:07 ┆ ┆ ┆ ┆ │\n", "│ 10037861 ┆ 2117-03-15 ┆ true ┆ null ┆ null ┆ null │\n", "│ ┆ 16:34:58 ┆ ┆ ┆ ┆ │\n", + "│ 10004720 ┆ 2186-11-13 ┆ true ┆ null ┆ null ┆ null │\n", + "│ ┆ 19:55:00 ┆ ┆ ┆ ┆ │\n", "│ 10037975 ┆ 2185-01-18 ┆ true ┆ null ┆ null ┆ null │\n", "│ ┆ 19:12:12 ┆ ┆ ┆ ┆ │\n", - "│ 10038081 ┆ 2115-10-10 ┆ true ┆ null ┆ null ┆ null │\n", - "│ ┆ 10:15:25 ┆ ┆ ┆ ┆ │\n", "└────────────┴───────────────────┴───────────────┴───────────────┴─────────────┴───────────────────┘" ] }, - "execution_count": 67, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } diff --git a/demo/extract_meds_data.ipynb b/demo/extract_meds_data.ipynb index f6c55b6..ac77546 100644 --- a/demo/extract_meds_data.ipynb +++ b/demo/extract_meds_data.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 14, + "execution_count": 36, "metadata": { "id": "ikPVQZOnPcI0" }, @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 37, "metadata": { "collapsed": true, "id": "rjqK4CuRPfnE" @@ -43,7 +43,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 38, "metadata": {}, "outputs": [], "source": [ @@ -52,7 +52,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 39, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -69,8 +69,8 @@ "remote: Enumerating objects: 144, done.\u001b[K\n", "remote: Counting objects: 100% (144/144), done.\u001b[K\n", "remote: Compressing objects: 100% (129/129), done.\u001b[K\n", - "remote: Total 144 (delta 22), reused 69 (delta 7), pack-reused 0 (from 0)\u001b[K\n", - "Receiving objects: 100% (144/144), 211.41 KiB | 979.00 KiB/s, done.\n", + "remote: Total 144 (delta 22), reused 70 (delta 7), pack-reused 0 (from 0)\u001b[K\n", + "Receiving objects: 100% (144/144), 211.41 KiB | 571.00 KiB/s, done.\n", "Resolving deltas: 100% (22/22), done.\n", "usage: cp [-R [-H | -L | -P]] [-fi | -n] [-aclpSsvXx] source_file target_file\n", " cp [-R [-H | -L | -P]] [-fi | -n] [-aclpSsvXx] source_file ... target_directory\n" @@ -87,7 +87,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 40, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -100,225 +100,225 @@ "name": "stdout", "output_type": "stream", "text": [ - "--2024-12-14 16:06:48-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/d_labitems_to_loinc.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:14-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/d_labitems_to_loinc.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 361048 (353K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/d_labitems_to_loinc.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 352.59K 664KB/s in 0.5s \n", + "/Users/sim/Document 100%[===================>] 352.59K 1.04MB/s in 0.3s \n", "\n", - "2024-12-14 16:06:49 (664 KB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/d_labitems_to_loinc.csv’ saved [361048/361048]\n", + "2024-12-14 17:15:14 (1.04 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/d_labitems_to_loinc.csv’ saved [361048/361048]\n", "\n", - "--2024-12-14 16:06:49-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/d_labitems_to_loinc.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:15-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/d_labitems_to_loinc.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 361048 (353K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//d_labitems_to_loinc.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 352.59K 942KB/s in 0.4s \n", + "/Users/sim/Document 100%[===================>] 352.59K 561KB/s in 0.6s \n", "\n", - "2024-12-14 16:06:49 (942 KB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//d_labitems_to_loinc.csv’ saved [361048/361048]\n", + "2024-12-14 17:15:15 (561 KB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//d_labitems_to_loinc.csv’ saved [361048/361048]\n", "\n", - "--2024-12-14 16:06:50-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/inputevents_to_rxnorm.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:16-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/inputevents_to_rxnorm.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 79195 (77K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/inputevents_to_rxnorm.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 77.34K --.-KB/s in 0.009s \n", + "/Users/sim/Document 100%[===================>] 77.34K 109KB/s in 0.7s \n", "\n", - "2024-12-14 16:06:50 (8.44 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/inputevents_to_rxnorm.csv’ saved [79195/79195]\n", + "2024-12-14 17:15:17 (109 KB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/inputevents_to_rxnorm.csv’ saved [79195/79195]\n", "\n", - "--2024-12-14 16:06:51-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/inputevents_to_rxnorm.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:17-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/inputevents_to_rxnorm.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 79195 (77K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//inputevents_to_rxnorm.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 77.34K --.-KB/s in 0.008s \n", + "/Users/sim/Document 100%[===================>] 77.34K --.-KB/s in 0.009s \n", "\n", - "2024-12-14 16:06:51 (9.16 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//inputevents_to_rxnorm.csv’ saved [79195/79195]\n", + "2024-12-14 17:15:17 (8.59 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//inputevents_to_rxnorm.csv’ saved [79195/79195]\n", "\n", - "--2024-12-14 16:06:51-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/lab_itemid_to_loinc.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:18-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/lab_itemid_to_loinc.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 79970 (78K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/lab_itemid_to_loinc.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 78.10K --.-KB/s in 0.01s \n", + "/Users/sim/Document 100%[===================>] 78.10K --.-KB/s in 0.08s \n", "\n", - "2024-12-14 16:06:51 (6.67 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/lab_itemid_to_loinc.csv’ saved [79970/79970]\n", + "2024-12-14 17:15:18 (986 KB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/lab_itemid_to_loinc.csv’ saved [79970/79970]\n", "\n", - "--2024-12-14 16:06:52-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/lab_itemid_to_loinc.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:18-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/lab_itemid_to_loinc.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 79970 (78K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//lab_itemid_to_loinc.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 78.10K --.-KB/s in 0.007s \n", + "/Users/sim/Document 100%[===================>] 78.10K --.-KB/s in 0.01s \n", "\n", - "2024-12-14 16:06:52 (11.4 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//lab_itemid_to_loinc.csv’ saved [79970/79970]\n", + "2024-12-14 17:15:18 (5.33 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//lab_itemid_to_loinc.csv’ saved [79970/79970]\n", "\n", - "--2024-12-14 16:06:52-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/meas_chartevents_main.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:19-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/meas_chartevents_main.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 34862 (34K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/meas_chartevents_main.csv’\n", "\n", "/Users/sim/Document 100%[===================>] 34.04K --.-KB/s in 0.003s \n", "\n", - "2024-12-14 16:06:52 (10.1 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/meas_chartevents_main.csv’ saved [34862/34862]\n", + "2024-12-14 17:15:19 (9.53 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/meas_chartevents_main.csv’ saved [34862/34862]\n", "\n", - "--2024-12-14 16:06:53-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/meas_chartevents_main.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:19-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/meas_chartevents_main.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 34862 (34K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//meas_chartevents_main.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 34.04K --.-KB/s in 0.002s \n", + "/Users/sim/Document 100%[===================>] 34.04K --.-KB/s in 0.004s \n", "\n", - "2024-12-14 16:06:53 (17.4 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//meas_chartevents_main.csv’ saved [34862/34862]\n", + "2024-12-14 17:15:19 (7.46 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//meas_chartevents_main.csv’ saved [34862/34862]\n", "\n", - "--2024-12-14 16:06:53-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/meas_chartevents_value.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:20-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/meas_chartevents_value.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 5902 (5.8K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/meas_chartevents_value.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 5.76K --.-KB/s in 0.001s \n", + "/Users/sim/Document 100%[===================>] 5.76K --.-KB/s in 0.002s \n", "\n", - "2024-12-14 16:06:54 (8.35 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/meas_chartevents_value.csv’ saved [5902/5902]\n", + "2024-12-14 17:15:20 (3.64 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/meas_chartevents_value.csv’ saved [5902/5902]\n", "\n", - "--2024-12-14 16:06:54-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/meas_chartevents_value.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:20-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/meas_chartevents_value.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 5902 (5.8K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//meas_chartevents_value.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 5.76K --.-KB/s in 0s \n", + "/Users/sim/Document 100%[===================>] 5.76K --.-KB/s in 0.001s \n", "\n", - "2024-12-14 16:06:54 (49.4 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//meas_chartevents_value.csv’ saved [5902/5902]\n", + "2024-12-14 17:15:20 (3.79 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//meas_chartevents_value.csv’ saved [5902/5902]\n", "\n", - "--2024-12-14 16:06:54-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/numerics-summary.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:21-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/numerics-summary.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 32353 (32K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/numerics-summary.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 31.59K --.-KB/s in 0.001s \n", + "/Users/sim/Document 100%[===================>] 31.59K --.-KB/s in 0.002s \n", "\n", - "2024-12-14 16:06:55 (27.5 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/numerics-summary.csv’ saved [32353/32353]\n", + "2024-12-14 17:15:21 (12.5 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/numerics-summary.csv’ saved [32353/32353]\n", "\n", - "--2024-12-14 16:06:55-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/numerics-summary.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:21-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/numerics-summary.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 32353 (32K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//numerics-summary.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 31.59K --.-KB/s in 0.001s \n", + "/Users/sim/Document 100%[===================>] 31.59K --.-KB/s in 0.004s \n", "\n", - "2024-12-14 16:06:55 (61.1 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//numerics-summary.csv’ saved [32353/32353]\n", + "2024-12-14 17:15:21 (7.26 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//numerics-summary.csv’ saved [32353/32353]\n", "\n", - "--2024-12-14 16:06:56-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/outputevents_to_loinc.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:22-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/outputevents_to_loinc.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 34008 (33K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/outputevents_to_loinc.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 33.21K --.-KB/s in 0.001s \n", + "/Users/sim/Document 100%[===================>] 33.21K --.-KB/s in 0.006s \n", "\n", - "2024-12-14 16:06:56 (22.2 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/outputevents_to_loinc.csv’ saved [34008/34008]\n", + "2024-12-14 17:15:22 (5.46 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/outputevents_to_loinc.csv’ saved [34008/34008]\n", "\n", - "--2024-12-14 16:06:56-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/outputevents_to_loinc.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:22-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/outputevents_to_loinc.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 34008 (33K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//outputevents_to_loinc.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 33.21K --.-KB/s in 0s \n", + "/Users/sim/Document 100%[===================>] 33.21K --.-KB/s in 0.004s \n", "\n", - "2024-12-14 16:06:56 (133 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//outputevents_to_loinc.csv’ saved [34008/34008]\n", + "2024-12-14 17:15:23 (9.19 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//outputevents_to_loinc.csv’ saved [34008/34008]\n", "\n", - "--2024-12-14 16:06:57-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/proc_datetimeevents.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:23-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/proc_datetimeevents.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 25205 (25K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/proc_datetimeevents.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 24.61K --.-KB/s in 0.001s \n", + "/Users/sim/Document 100%[===================>] 24.61K --.-KB/s in 0.002s \n", "\n", - "2024-12-14 16:06:57 (28.1 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/proc_datetimeevents.csv’ saved [25205/25205]\n", + "2024-12-14 17:15:23 (10.2 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/proc_datetimeevents.csv’ saved [25205/25205]\n", "\n", - "--2024-12-14 16:06:57-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/proc_datetimeevents.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:24-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/proc_datetimeevents.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 25205 (25K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//proc_datetimeevents.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 24.61K --.-KB/s in 0.006s \n", + "/Users/sim/Document 100%[===================>] 24.61K --.-KB/s in 0.004s \n", "\n", - "2024-12-14 16:06:57 (3.71 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//proc_datetimeevents.csv’ saved [25205/25205]\n", + "2024-12-14 17:15:24 (6.67 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//proc_datetimeevents.csv’ saved [25205/25205]\n", "\n", - "--2024-12-14 16:06:58-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/proc_itemid.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:24-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/proc_itemid.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 21414 (21K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/proc_itemid.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 20.91K --.-KB/s in 0.004s \n", + "/Users/sim/Document 100%[===================>] 20.91K --.-KB/s in 0.003s \n", "\n", - "2024-12-14 16:06:58 (4.98 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/proc_itemid.csv’ saved [21414/21414]\n", + "2024-12-14 17:15:24 (8.16 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/proc_itemid.csv’ saved [21414/21414]\n", "\n", - "--2024-12-14 16:06:59-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/proc_itemid.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:24-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/proc_itemid.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 21414 (21K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//proc_itemid.csv’\n", "\n", "/Users/sim/Document 100%[===================>] 20.91K --.-KB/s in 0.002s \n", "\n", - "2024-12-14 16:06:59 (10.4 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//proc_itemid.csv’ saved [21414/21414]\n", + "2024-12-14 17:15:25 (8.29 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//proc_itemid.csv’ saved [21414/21414]\n", "\n", - "--2024-12-14 16:06:59-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/waveforms-summary.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:25-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/waveforms-summary.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 5743 (5.6K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/waveforms-summary.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 5.61K --.-KB/s in 0s \n", + "/Users/sim/Document 100%[===================>] 5.61K --.-KB/s in 0.001s \n", "\n", - "2024-12-14 16:06:59 (30.4 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/waveforms-summary.csv’ saved [5743/5743]\n", + "2024-12-14 17:15:25 (4.44 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/waveforms-summary.csv’ saved [5743/5743]\n", "\n", - "--2024-12-14 16:07:00-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/waveforms-summary.csv\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "--2024-12-14 17:15:25-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/waveforms-summary.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 5743 (5.6K) [text/plain]\n", "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//waveforms-summary.csv’\n", "\n", "/Users/sim/Document 100%[===================>] 5.61K --.-KB/s in 0s \n", "\n", - "2024-12-14 16:07:00 (37.3 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//waveforms-summary.csv’ saved [5743/5743]\n", + "2024-12-14 17:15:25 (53.7 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//waveforms-summary.csv’ saved [5743/5743]\n", "\n" ] } @@ -351,7 +351,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 41, "metadata": {}, "outputs": [], "source": [ @@ -360,7 +360,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 42, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -378,95 +378,95 @@ "Setting DO_UNZIP=true\n", "Unzipping csv.gz files matching /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/*/*.csv.gz.\n", "Running pre-MEDS conversion.\n", - "\u001b[32m2024-12-14 16:07:02.391\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/poe_detail.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.391\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/poe_detail: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/poe_detail.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/poe_detail.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.392\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/provider.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.392\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/provider: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/provider.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/provider.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.393\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.393\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/pharmacy: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.393\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m243\u001b[0m - \u001b[1mSkipping hosp/index @ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/index.html as no compatible dataframe file was found.\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.393\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/emar: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.394\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/microbiologyevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/microbiologyevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/microbiologyevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/microbiologyevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.395\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.395\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/labevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.395\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/admissions: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.396\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_labitems.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/d_labitems: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_labitems.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_labitems.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.396\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/prescriptions.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.397\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/prescriptions: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/prescriptions.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/prescriptions.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.397\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.397\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/procedures_icd: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.398\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/poe.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/poe: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/poe.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/poe.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.398\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_hcpcs.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/d_hcpcs: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_hcpcs.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_hcpcs.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.399\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.399\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/omr: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.399\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.400\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/transfers: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.400\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/diagnoses_icd.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.400\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/services.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.400\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/services: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/services.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/services.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.401\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.401\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/hcpcsevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.401\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/drgcodes.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.402\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/patients.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.402\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_icd_diagnoses.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.402\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_icd_procedures.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.403\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar_detail.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.403\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/emar_detail: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar_detail.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar_detail.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.403\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m243\u001b[0m - \u001b[1mSkipping icu/index @ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/index.html as no compatible dataframe file was found.\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.404\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/d_items.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.404\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/d_items: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/d_items.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/d_items.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.404\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/procedureevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.405\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/inputevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.406\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/datetimeevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.406\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/datetimeevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/datetimeevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/datetimeevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.406\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/ingredientevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.407\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/ingredientevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/ingredientevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/ingredientevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.407\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.407\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/chartevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.408\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/caregiver.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.408\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/caregiver: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/caregiver.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/caregiver.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.408\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.408\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/outputevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.409\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.409\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/icustays: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.409\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.409\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m300\u001b[0m - \u001b[1mLoading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv for manipulating other dataframes...\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.411\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m305\u001b[0m - \u001b[1m Loaded in 0:00:00.001291\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.411\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m311\u001b[0m - \u001b[1m Processing dependent df @ hosp/diagnoses_icd...\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.411\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1m Loading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/diagnoses_icd.csv...\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.411\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m317\u001b[0m - \u001b[1m Loaded in 0:00:00.000166\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.480\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m320\u001b[0m - \u001b[1m Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet in 0:00:00.068729\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.480\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m311\u001b[0m - \u001b[1m Processing dependent df @ hosp/drgcodes...\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.480\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1m Loading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/drgcodes.csv...\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.480\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m317\u001b[0m - \u001b[1m Loaded in 0:00:00.000264\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m320\u001b[0m - \u001b[1m Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet in 0:00:00.004873\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m311\u001b[0m - \u001b[1m Processing dependent df @ hosp/patients...\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1m Loading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/patients.csv...\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m317\u001b[0m - \u001b[1m Loaded in 0:00:00.000171\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.492\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m320\u001b[0m - \u001b[1m Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet in 0:00:00.007117\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.492\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_icd_diagnoses.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.492\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m334\u001b[0m - \u001b[1mProcessing hosp/d_icd_diagnoses...\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.580\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1m Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet in 0:00:00.087729\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.580\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_icd_procedures.csv\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.580\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m334\u001b[0m - \u001b[1mProcessing hosp/d_icd_procedures...\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.633\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1m Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet in 0:00:00.052624\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:02.633\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mDone! All dataframes processed and written to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.980\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/poe_detail.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.980\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/poe_detail: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/poe_detail.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/poe_detail.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.981\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/provider.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.981\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/provider: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/provider.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/provider.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.982\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.982\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/pharmacy: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.982\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m243\u001b[0m - \u001b[1mSkipping hosp/index @ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/index.html as no compatible dataframe file was found.\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.982\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/emar: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.983\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/microbiologyevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/microbiologyevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/microbiologyevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/microbiologyevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.984\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/labevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.984\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/admissions: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.985\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_labitems.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.985\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/d_labitems: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_labitems.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_labitems.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.985\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/prescriptions.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.986\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/prescriptions: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/prescriptions.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/prescriptions.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.986\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.987\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/procedures_icd: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.987\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/poe.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.987\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/poe: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/poe.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/poe.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.987\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_hcpcs.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/d_hcpcs: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_hcpcs.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_hcpcs.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.988\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/omr: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.989\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/transfers: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.989\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/diagnoses_icd.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.989\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/services.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/services: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/services.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/services.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.990\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/hcpcsevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.991\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/drgcodes.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.991\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/patients.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.991\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_icd_diagnoses.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.992\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_icd_procedures.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.992\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar_detail.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/emar_detail: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar_detail.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar_detail.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m243\u001b[0m - \u001b[1mSkipping icu/index @ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/index.html as no compatible dataframe file was found.\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.993\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/d_items.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.994\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/d_items: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/d_items.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/d_items.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.994\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.994\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/procedureevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.995\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/inputevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.995\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/datetimeevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/datetimeevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/datetimeevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/datetimeevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.996\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/ingredientevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.996\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/ingredientevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/ingredientevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/ingredientevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.996\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/chartevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.997\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/caregiver.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/caregiver: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/caregiver.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/caregiver.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.998\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.998\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/outputevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.998\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.998\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/icustays: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.999\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:27.999\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m300\u001b[0m - \u001b[1mLoading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv for manipulating other dataframes...\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.000\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m305\u001b[0m - \u001b[1m Loaded in 0:00:00.001182\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.000\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m311\u001b[0m - \u001b[1m Processing dependent df @ hosp/patients...\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.000\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1m Loading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/patients.csv...\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.000\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m317\u001b[0m - \u001b[1m Loaded in 0:00:00.000155\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.062\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m320\u001b[0m - \u001b[1m Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet in 0:00:00.061956\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.062\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m311\u001b[0m - \u001b[1m Processing dependent df @ hosp/drgcodes...\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.062\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1m Loading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/drgcodes.csv...\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.063\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m317\u001b[0m - \u001b[1m Loaded in 0:00:00.000196\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m320\u001b[0m - \u001b[1m Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet in 0:00:00.004755\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m311\u001b[0m - \u001b[1m Processing dependent df @ hosp/diagnoses_icd...\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1m Loading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/diagnoses_icd.csv...\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m317\u001b[0m - \u001b[1m Loaded in 0:00:00.000183\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.073\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m320\u001b[0m - \u001b[1m Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet in 0:00:00.005889\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.073\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_icd_diagnoses.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.073\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m334\u001b[0m - \u001b[1mProcessing hosp/d_icd_diagnoses...\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.161\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1m Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet in 0:00:00.087207\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.161\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_icd_procedures.csv\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.161\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m334\u001b[0m - \u001b[1mProcessing hosp/d_icd_procedures...\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1m Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet in 0:00:00.053073\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mDone! All dataframes processed and written to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds\u001b[0m\n", "Setting N_WORKERS to 1 to avoid issues with the runners.\n", "Running extraction pipeline.\n", - "\u001b[32m2024-12-14 16:07:03.167\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: shard_events\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:03.187\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-shard_events --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=shard_events\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:06.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 17:15:28.756\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: shard_events\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:28.775\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-shard_events --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=shard_events\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:31.395\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:06.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 16:07:03.746 | INFO | MEDS_transforms.extract.shard_events:main:330 - Running with config:\n", + "\u001b[32m2024-12-14 17:15:31.395\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 17:15:29.323 | INFO | MEDS_transforms.extract.shard_events:main:330 - Running with config:\n", "input_dir: ${oc.env:MIMICIV_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:MIMICIV_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -495,9 +495,9 @@ " n_subjects_per_shard: 1000\n", " external_splits_json_fp: null\n", " split_fracs:\n", - " train: 0.8\n", - " tuning: 0.1\n", - " held_out: 0.1\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", " convert_to_sharded_events:\n", " do_dedup_text_and_numeric: true\n", " merge_to_MEDS_cohort:\n", @@ -513,10 +513,6 @@ " do_retype: true\n", " finalize_MEDS_data:\n", " do_retype: true\n", - " split_fracs:\n", - " train: 0.5\n", - " tuning: 0.25\n", - " held_out: 0.25\n", "worker: 0\n", "polling_time: 300\n", "stage: shard_events\n", @@ -550,368 +546,368 @@ "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events\n", "reducer_output_dir: null\n", "\n", - "2024-12-14 16:07:03.751 | INFO | MEDS_transforms.extract.shard_events:main:342 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs/event_configs.yaml to identify needed columns.\n", - "2024-12-14 16:07:03.781 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.782 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.782 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/microbiologyevents.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.783 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/caregiver.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.783 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/inputevents_to_rxnorm.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.783 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/services.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.783 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar_detail.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.783 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_datetimeevents.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.783 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/d_labitems_to_loinc.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.784 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/poe.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.784 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_hcpcs.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.784 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/numerics-summary.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.784 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_itemid.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.784 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/prescriptions.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.784 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/meas_chartevents_main.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.785 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/poe_detail.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.785 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/ingredientevents.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.785 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/datetimeevents.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.785 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/outputevents_to_loinc.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.785 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_labitems.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.785 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/meas_chartevents_value.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.786 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/d_items.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.786 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/provider.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.786 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/lab_itemid_to_loinc.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.786 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/waveforms-summary.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 16:07:03.787 | INFO | MEDS_transforms.extract.shard_events:main:368 - Starting event sub-sharding. Sub-sharding 16 files:\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv\n", + "2024-12-14 17:15:29.328 | INFO | MEDS_transforms.extract.shard_events:main:342 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs/event_configs.yaml to identify needed columns.\n", + "2024-12-14 17:15:29.358 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.358 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.359 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/ingredientevents.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.360 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/prescriptions.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.360 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_datetimeevents.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.360 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_labitems.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.361 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/numerics-summary.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.361 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/lab_itemid_to_loinc.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.361 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/microbiologyevents.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.361 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_itemid.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.361 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/provider.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.361 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/datetimeevents.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.361 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/waveforms-summary.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.361 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/meas_chartevents_value.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.362 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/services.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.362 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/outputevents_to_loinc.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.362 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/d_items.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.362 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/poe.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.362 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/poe_detail.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.362 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/meas_chartevents_main.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.362 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/d_labitems_to_loinc.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.363 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/caregiver.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.363 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar_detail.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.363 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/inputevents_to_rxnorm.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.363 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_hcpcs.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 17:15:29.364 | INFO | MEDS_transforms.extract.shard_events:main:368 - Starting event sub-sharding. Sub-sharding 16 files:\n", " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv\n", " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv\n", " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv\n", " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv\n", " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv\n", - "2024-12-14 16:07:03.790 | INFO | MEDS_transforms.extract.shard_events:main:372 - Will read raw data from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/$IN_FILE.parquet and write sub-sharded data to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/$IN_FILE/$ROW_START-$ROW_END.parquet\n", - "2024-12-14 16:07:03.792 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents.\n", - "2024-12-14 16:07:03.792 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv to determine row count.\n", - "2024-12-14 16:07:03.794 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.366 | INFO | MEDS_transforms.extract.shard_events:main:372 - Will read raw data from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/$IN_FILE.parquet and write sub-sharded data to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/$IN_FILE/$ROW_START-$ROW_END.parquet\n", + "2024-12-14 17:15:29.369 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients.\n", + "2024-12-14 17:15:29.369 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet to determine row count.\n", + "2024-12-14 17:15:29.371 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:15:29.371 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:15:29.372 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['dod', 'gender', 'subject_id', 'year_of_birth']\n", + "2024-12-14 17:15:29.378 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, dod, gender, subject_id, year_of_birth\n", + "2024-12-14 17:15:29.384 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 100 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet.\n", + "2024-12-14 17:15:29.384 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:29.384 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet row-chunk [0-100) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet.\n", + "2024-12-14 17:15:29.387 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:29.387587. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:29.389 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet\n", + "2024-12-14 17:15:29.389 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:15:29.389 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:15:29.389 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['dod', 'gender', 'subject_id', 'year_of_birth']\n", + "2024-12-14 17:15:29.389 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, dod, gender, subject_id, year_of_birth\n", + "2024-12-14 17:15:29.389 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:29.390 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", + "2024-12-14 17:15:29.398 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.011062\n", + "2024-12-14 17:15:29.398 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/.[0-100).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/.[0-100).parquet_cache/locks/2024-12-14T17:15:29.387587.json\n", + "2024-12-14 17:15:29.401 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes.\n", + "2024-12-14 17:15:29.401 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet to determine row count.\n", + "2024-12-14 17:15:29.403 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:15:29.403 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:15:29.404 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['description', 'drg_code', 'drg_mortality', 'drg_severity', 'drg_type', 'hadm_discharge_time', 'hadm_id', 'subject_id']\n", + "2024-12-14 17:15:29.404 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, description, drg_code, drg_mortality, drg_severity, drg_type, hadm_discharge_time, hadm_id, subject_id\n", + "2024-12-14 17:15:29.404 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 454 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet.\n", + "2024-12-14 17:15:29.404 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:29.404 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet row-chunk [0-454) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet.\n", + "2024-12-14 17:15:29.407 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:29.407233. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:29.407 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet\n", + "2024-12-14 17:15:29.407 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:15:29.407 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:15:29.407 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['description', 'drg_code', 'drg_mortality', 'drg_severity', 'drg_type', 'hadm_discharge_time', 'hadm_id', 'subject_id']\n", + "2024-12-14 17:15:29.407 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, description, drg_code, drg_mortality, drg_severity, drg_type, hadm_discharge_time, hadm_id, subject_id\n", + "2024-12-14 17:15:29.407 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:29.408 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 17:15:29.410 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.003108\n", + "2024-12-14 17:15:29.410 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/.[0-454).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/.[0-454).parquet_cache/locks/2024-12-14T17:15:29.407233.json\n", + "2024-12-14 17:15:29.412 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd.\n", + "2024-12-14 17:15:29.413 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv to determine row count.\n", + "2024-12-14 17:15:29.415 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:03.795 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'stay_id', 'subject_id', 'value', 'valuenum', 'valueuom']\n", - "2024-12-14 16:07:04.301 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, stay_id, subject_id, value, valuenum, valueuom\n", - "2024-12-14 16:07:04.337 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 668862 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv.\n", - "2024-12-14 16:07:04.337 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:04.337 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv row-chunk [0-668862) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet.\n", - "2024-12-14 16:07:04.343 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:04.343324. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:04.345 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv\n", - "2024-12-14 16:07:04.345 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.415 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'hadm_id', 'icd_code', 'icd_version', 'subject_id']\n", + "2024-12-14 17:15:29.419 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, hadm_id, icd_code, icd_version, subject_id\n", + "2024-12-14 17:15:29.420 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 722 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv.\n", + "2024-12-14 17:15:29.420 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:29.420 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv row-chunk [0-722) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet.\n", + "2024-12-14 17:15:29.423 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:29.423035. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:29.423 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv\n", + "2024-12-14 17:15:29.423 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:04.345 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'stay_id', 'subject_id', 'value', 'valuenum', 'valueuom']\n", - "2024-12-14 16:07:05.156 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, stay_id, subject_id, value, valuenum, valueuom\n", - "2024-12-14 16:07:05.156 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:05.156 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 16:07:05.364 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:01.021167\n", - "2024-12-14 16:07:05.364 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/.[0-668862).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/.[0-668862).parquet_cache/locks/2024-12-14T16:07:04.343324.json\n", - "2024-12-14 16:07:05.367 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar.\n", - "2024-12-14 16:07:05.367 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv to determine row count.\n", - "2024-12-14 16:07:05.369 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.423 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'hadm_id', 'icd_code', 'icd_version', 'subject_id']\n", + "2024-12-14 17:15:29.423 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, hadm_id, icd_code, icd_version, subject_id\n", + "2024-12-14 17:15:29.423 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:29.424 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 17:15:29.427 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.004191\n", + "2024-12-14 17:15:29.427 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/.[0-722).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/.[0-722).parquet_cache/locks/2024-12-14T17:15:29.423035.json\n", + "2024-12-14 17:15:29.429 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents.\n", + "2024-12-14 17:15:29.429 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv to determine row count.\n", + "2024-12-14 17:15:29.431 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.370 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'emar_id', 'emar_seq', 'event_txt', 'hadm_id', 'medication', 'subject_id']\n", - "2024-12-14 16:07:05.418 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, emar_id, emar_seq, event_txt, hadm_id, medication, subject_id\n", - "2024-12-14 16:07:05.422 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 35835 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv.\n", - "2024-12-14 16:07:05.422 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:05.422 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv row-chunk [0-35835) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet.\n", - "2024-12-14 16:07:05.425 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.425385. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:05.425 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv\n", - "2024-12-14 16:07:05.425 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.432 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'stay_id', 'subject_id', 'value', 'valueuom']\n", + "2024-12-14 17:15:29.438 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, stay_id, subject_id, value, valueuom\n", + "2024-12-14 17:15:29.439 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 9362 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv.\n", + "2024-12-14 17:15:29.439 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:29.439 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv row-chunk [0-9362) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet.\n", + "2024-12-14 17:15:29.442 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:29.442133. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:29.442 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv\n", + "2024-12-14 17:15:29.442 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.425 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'emar_id', 'emar_seq', 'event_txt', 'hadm_id', 'medication', 'subject_id']\n", - "2024-12-14 16:07:05.475 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, emar_id, emar_seq, event_txt, hadm_id, medication, subject_id\n", - "2024-12-14 16:07:05.475 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:05.475 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", - "2024-12-14 16:07:05.514 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.089048\n", - "2024-12-14 16:07:05.514 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/.[0-35835).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/.[0-35835).parquet_cache/locks/2024-12-14T16:07:05.425385.json\n", - "2024-12-14 16:07:05.517 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions.\n", - "2024-12-14 16:07:05.517 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv to determine row count.\n", - "2024-12-14 16:07:05.519 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.442 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'stay_id', 'subject_id', 'value', 'valueuom']\n", + "2024-12-14 17:15:29.448 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, stay_id, subject_id, value, valueuom\n", + "2024-12-14 17:15:29.449 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:29.449 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 17:15:29.470 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.028809\n", + "2024-12-14 17:15:29.471 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/.[0-9362).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/.[0-9362).parquet_cache/locks/2024-12-14T17:15:29.442133.json\n", + "2024-12-14 17:15:29.473 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays.\n", + "2024-12-14 17:15:29.474 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv to determine row count.\n", + "2024-12-14 17:15:29.486 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.519 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['admission_location', 'admission_type', 'admittime', 'discharge_location', 'dischtime', 'edouttime', 'edregtime', 'hadm_id', 'insurance', 'language', 'marital_status', 'race', 'subject_id']\n", - "2024-12-14 16:07:05.520 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, admission_location, admission_type, admittime, discharge_location, dischtime, edouttime, edregtime, hadm_id, insurance, language, marital_status, race, subject_id\n", - "2024-12-14 16:07:05.521 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 275 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv.\n", - "2024-12-14 16:07:05.521 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:05.521 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv row-chunk [0-275) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet.\n", - "2024-12-14 16:07:05.523 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.523672. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:05.524 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv\n", - "2024-12-14 16:07:05.524 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.487 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['first_careunit', 'hadm_id', 'intime', 'last_careunit', 'outtime', 'stay_id', 'subject_id']\n", + "2024-12-14 17:15:29.489 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, first_careunit, hadm_id, intime, last_careunit, outtime, stay_id, subject_id\n", + "2024-12-14 17:15:29.490 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 140 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv.\n", + "2024-12-14 17:15:29.490 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:29.491 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv row-chunk [0-140) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet.\n", + "2024-12-14 17:15:29.498 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:29.498163. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:29.498 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv\n", + "2024-12-14 17:15:29.498 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.524 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['admission_location', 'admission_type', 'admittime', 'discharge_location', 'dischtime', 'edouttime', 'edregtime', 'hadm_id', 'insurance', 'language', 'marital_status', 'race', 'subject_id']\n", - "2024-12-14 16:07:05.524 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, admission_location, admission_type, admittime, discharge_location, dischtime, edouttime, edregtime, hadm_id, insurance, language, marital_status, race, subject_id\n", - "2024-12-14 16:07:05.524 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:05.525 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", - "2024-12-14 16:07:05.528 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.005136\n", - "2024-12-14 16:07:05.528 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/.[0-275).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/.[0-275).parquet_cache/locks/2024-12-14T16:07:05.523672.json\n", - "2024-12-14 16:07:05.531 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents.\n", - "2024-12-14 16:07:05.531 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv to determine row count.\n", - "2024-12-14 16:07:05.534 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.499 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['first_careunit', 'hadm_id', 'intime', 'last_careunit', 'outtime', 'stay_id', 'subject_id']\n", + "2024-12-14 17:15:29.499 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, first_careunit, hadm_id, intime, last_careunit, outtime, stay_id, subject_id\n", + "2024-12-14 17:15:29.499 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:29.500 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", + "2024-12-14 17:15:29.504 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.005838\n", + "2024-12-14 17:15:29.504 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/.[0-140).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/.[0-140).parquet_cache/locks/2024-12-14T17:15:29.498163.json\n", + "2024-12-14 17:15:29.506 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents.\n", + "2024-12-14 17:15:29.506 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv to determine row count.\n", + "2024-12-14 17:15:29.509 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.534 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'stay_id', 'subject_id', 'value', 'valueuom']\n", - "2024-12-14 16:07:05.544 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, stay_id, subject_id, value, valueuom\n", - "2024-12-14 16:07:05.546 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 9362 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv.\n", - "2024-12-14 16:07:05.546 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:05.546 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv row-chunk [0-9362) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet.\n", - "2024-12-14 16:07:05.548 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.548505. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:05.548 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv\n", - "2024-12-14 16:07:05.548 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.509 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['endtime', 'hadm_id', 'itemid', 'starttime', 'stay_id', 'subject_id']\n", + "2024-12-14 17:15:29.511 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, endtime, hadm_id, itemid, starttime, stay_id, subject_id\n", + "2024-12-14 17:15:29.512 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 1468 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv.\n", + "2024-12-14 17:15:29.512 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:29.512 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv row-chunk [0-1468) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet.\n", + "2024-12-14 17:15:29.515 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:29.515574. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:29.515 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv\n", + "2024-12-14 17:15:29.516 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.549 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'stay_id', 'subject_id', 'value', 'valueuom']\n", - "2024-12-14 16:07:05.559 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, stay_id, subject_id, value, valueuom\n", - "2024-12-14 16:07:05.559 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:05.560 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:05.583 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.034846\n", - "2024-12-14 16:07:05.583 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/.[0-9362).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/.[0-9362).parquet_cache/locks/2024-12-14T16:07:05.548505.json\n", - "2024-12-14 16:07:05.586 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents.\n", - "2024-12-14 16:07:05.586 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv to determine row count.\n", - "2024-12-14 16:07:05.588 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.516 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['endtime', 'hadm_id', 'itemid', 'starttime', 'stay_id', 'subject_id']\n", + "2024-12-14 17:15:29.518 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, endtime, hadm_id, itemid, starttime, stay_id, subject_id\n", + "2024-12-14 17:15:29.518 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:29.518 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 17:15:29.527 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.011919\n", + "2024-12-14 17:15:29.527 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/.[0-1468).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/.[0-1468).parquet_cache/locks/2024-12-14T17:15:29.515574.json\n", + "2024-12-14 17:15:29.530 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar.\n", + "2024-12-14 17:15:29.530 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv to determine row count.\n", + "2024-12-14 17:15:29.532 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.588 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['endtime', 'hadm_id', 'itemid', 'starttime', 'stay_id', 'subject_id']\n", - "2024-12-14 16:07:05.592 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, endtime, hadm_id, itemid, starttime, stay_id, subject_id\n", - "2024-12-14 16:07:05.593 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 1468 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv.\n", - "2024-12-14 16:07:05.593 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:05.593 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv row-chunk [0-1468) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet.\n", - "2024-12-14 16:07:05.595 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.595809. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:05.596 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv\n", - "2024-12-14 16:07:05.596 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.532 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'emar_id', 'emar_seq', 'event_txt', 'hadm_id', 'medication', 'subject_id']\n", + "2024-12-14 17:15:29.562 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, emar_id, emar_seq, event_txt, hadm_id, medication, subject_id\n", + "2024-12-14 17:15:29.565 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 35835 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv.\n", + "2024-12-14 17:15:29.566 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:29.566 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv row-chunk [0-35835) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet.\n", + "2024-12-14 17:15:29.568 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:29.568485. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:29.568 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv\n", + "2024-12-14 17:15:29.568 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.596 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['endtime', 'hadm_id', 'itemid', 'starttime', 'stay_id', 'subject_id']\n", - "2024-12-14 16:07:05.600 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, endtime, hadm_id, itemid, starttime, stay_id, subject_id\n", - "2024-12-14 16:07:05.600 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:05.600 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 16:07:05.607 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.012174\n", - "2024-12-14 16:07:05.608 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/.[0-1468).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/.[0-1468).parquet_cache/locks/2024-12-14T16:07:05.595809.json\n", - "2024-12-14 16:07:05.610 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients.\n", - "2024-12-14 16:07:05.610 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet to determine row count.\n", - "2024-12-14 16:07:05.612 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 16:07:05.612 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet as Parquet with kwargs:\n", - ".\n", - "2024-12-14 16:07:05.612 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['dod', 'gender', 'subject_id', 'year_of_birth']\n", - "2024-12-14 16:07:05.614 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, dod, gender, subject_id, year_of_birth\n", - "2024-12-14 16:07:05.617 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 100 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet.\n", - "2024-12-14 16:07:05.617 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:05.617 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet row-chunk [0-100) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet.\n", - "2024-12-14 16:07:05.620 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.619958. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:05.620 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet\n", - "2024-12-14 16:07:05.620 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 16:07:05.620 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet as Parquet with kwargs:\n", - ".\n", - "2024-12-14 16:07:05.620 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['dod', 'gender', 'subject_id', 'year_of_birth']\n", - "2024-12-14 16:07:05.620 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, dod, gender, subject_id, year_of_birth\n", - "2024-12-14 16:07:05.620 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:05.620 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", - "2024-12-14 16:07:05.622 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.002234\n", - "2024-12-14 16:07:05.622 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/.[0-100).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/.[0-100).parquet_cache/locks/2024-12-14T16:07:05.619958.json\n", - "2024-12-14 16:07:05.624 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers.\n", - "2024-12-14 16:07:05.624 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv to determine row count.\n", - "2024-12-14 16:07:05.626 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.569 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'emar_id', 'emar_seq', 'event_txt', 'hadm_id', 'medication', 'subject_id']\n", + "2024-12-14 17:15:29.600 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, emar_id, emar_seq, event_txt, hadm_id, medication, subject_id\n", + "2024-12-14 17:15:29.600 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:29.600 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", + "2024-12-14 17:15:29.640 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.071693\n", + "2024-12-14 17:15:29.640 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/.[0-35835).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/.[0-35835).parquet_cache/locks/2024-12-14T17:15:29.568485.json\n", + "2024-12-14 17:15:29.643 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers.\n", + "2024-12-14 17:15:29.643 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv to determine row count.\n", + "2024-12-14 17:15:29.645 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.626 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['careunit', 'eventtype', 'hadm_id', 'intime', 'subject_id']\n", - "2024-12-14 16:07:05.628 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, careunit, eventtype, hadm_id, intime, subject_id\n", - "2024-12-14 16:07:05.628 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 1190 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv.\n", - "2024-12-14 16:07:05.628 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:05.628 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv row-chunk [0-1190) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet.\n", - "2024-12-14 16:07:05.631 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.631167. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:05.631 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv\n", - "2024-12-14 16:07:05.631 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.645 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['careunit', 'eventtype', 'hadm_id', 'intime', 'subject_id']\n", + "2024-12-14 17:15:29.646 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, careunit, eventtype, hadm_id, intime, subject_id\n", + "2024-12-14 17:15:29.647 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 1190 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv.\n", + "2024-12-14 17:15:29.647 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:29.647 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv row-chunk [0-1190) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet.\n", + "2024-12-14 17:15:29.650 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:29.649856. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:29.650 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv\n", + "2024-12-14 17:15:29.650 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.631 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['careunit', 'eventtype', 'hadm_id', 'intime', 'subject_id']\n", - "2024-12-14 16:07:05.632 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, careunit, eventtype, hadm_id, intime, subject_id\n", - "2024-12-14 16:07:05.632 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:05.633 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 16:07:05.636 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.005445\n", - "2024-12-14 16:07:05.636 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/.[0-1190).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/.[0-1190).parquet_cache/locks/2024-12-14T16:07:05.631167.json\n", - "2024-12-14 16:07:05.639 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd.\n", - "2024-12-14 16:07:05.639 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv to determine row count.\n", - "2024-12-14 16:07:05.641 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.650 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['careunit', 'eventtype', 'hadm_id', 'intime', 'subject_id']\n", + "2024-12-14 17:15:29.651 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, careunit, eventtype, hadm_id, intime, subject_id\n", + "2024-12-14 17:15:29.651 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:29.651 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 17:15:29.655 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.005833\n", + "2024-12-14 17:15:29.655 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/.[0-1190).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/.[0-1190).parquet_cache/locks/2024-12-14T17:15:29.649856.json\n", + "2024-12-14 17:15:29.658 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents.\n", + "2024-12-14 17:15:29.658 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv to determine row count.\n", + "2024-12-14 17:15:29.660 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.641 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'hadm_id', 'icd_code', 'icd_version', 'subject_id']\n", - "2024-12-14 16:07:05.642 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, hadm_id, icd_code, icd_version, subject_id\n", - "2024-12-14 16:07:05.642 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 722 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv.\n", - "2024-12-14 16:07:05.642 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:05.642 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv row-chunk [0-722) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet.\n", - "2024-12-14 16:07:05.645 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.644982. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:05.645 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv\n", - "2024-12-14 16:07:05.645 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.660 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'priority', 'subject_id', 'value', 'valuenum', 'valueuom']\n", + "2024-12-14 17:15:29.764 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, priority, subject_id, value, valuenum, valueuom\n", + "2024-12-14 17:15:29.771 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 107727 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv.\n", + "2024-12-14 17:15:29.771 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:29.771 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv row-chunk [0-107727) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet.\n", + "2024-12-14 17:15:29.774 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:29.773914. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:29.774 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv\n", + "2024-12-14 17:15:29.774 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.645 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'hadm_id', 'icd_code', 'icd_version', 'subject_id']\n", - "2024-12-14 16:07:05.646 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, hadm_id, icd_code, icd_version, subject_id\n", - "2024-12-14 16:07:05.646 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:05.646 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 16:07:05.648 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.003102\n", - "2024-12-14 16:07:05.648 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/.[0-722).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/.[0-722).parquet_cache/locks/2024-12-14T16:07:05.644982.json\n", - "2024-12-14 16:07:05.650 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes.\n", - "2024-12-14 16:07:05.650 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet to determine row count.\n", - "2024-12-14 16:07:05.652 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 16:07:05.652 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet as Parquet with kwargs:\n", - ".\n", - "2024-12-14 16:07:05.652 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['description', 'drg_code', 'drg_mortality', 'drg_severity', 'drg_type', 'hadm_discharge_time', 'hadm_id', 'subject_id']\n", - "2024-12-14 16:07:05.653 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, description, drg_code, drg_mortality, drg_severity, drg_type, hadm_discharge_time, hadm_id, subject_id\n", - "2024-12-14 16:07:05.653 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 454 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet.\n", - "2024-12-14 16:07:05.653 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:05.653 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet row-chunk [0-454) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet.\n", - "2024-12-14 16:07:05.655 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.655772. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:05.656 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet\n", - "2024-12-14 16:07:05.656 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 16:07:05.656 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet as Parquet with kwargs:\n", - ".\n", - "2024-12-14 16:07:05.656 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['description', 'drg_code', 'drg_mortality', 'drg_severity', 'drg_type', 'hadm_discharge_time', 'hadm_id', 'subject_id']\n", - "2024-12-14 16:07:05.656 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, description, drg_code, drg_mortality, drg_severity, drg_type, hadm_discharge_time, hadm_id, subject_id\n", - "2024-12-14 16:07:05.656 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:05.656 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 16:07:05.658 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.002542\n", - "2024-12-14 16:07:05.658 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/.[0-454).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/.[0-454).parquet_cache/locks/2024-12-14T16:07:05.655772.json\n", - "2024-12-14 16:07:05.660 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy.\n", - "2024-12-14 16:07:05.661 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv to determine row count.\n", - "2024-12-14 16:07:05.663 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.774 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'priority', 'subject_id', 'value', 'valuenum', 'valueuom']\n", + "2024-12-14 17:15:29.877 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, priority, subject_id, value, valuenum, valueuom\n", + "2024-12-14 17:15:29.877 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:29.878 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 17:15:29.928 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.154632\n", + "2024-12-14 17:15:29.928 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/.[0-107727).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/.[0-107727).parquet_cache/locks/2024-12-14T17:15:29.773914.json\n", + "2024-12-14 17:15:29.931 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents.\n", + "2024-12-14 17:15:29.931 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv to determine row count.\n", + "2024-12-14 17:15:29.933 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.663 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['doses_per_24_hrs', 'frequency', 'medication', 'poe_id', 'route', 'starttime', 'stoptime', 'subject_id']\n", - "2024-12-14 16:07:05.697 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, doses_per_24_hrs, frequency, medication, poe_id, route, starttime, stoptime, subject_id\n", - "2024-12-14 16:07:05.699 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 15306 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv.\n", - "2024-12-14 16:07:05.699 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:05.699 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv row-chunk [0-15306) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet.\n", - "2024-12-14 16:07:05.702 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.702392. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:05.702 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv\n", - "2024-12-14 16:07:05.702 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv as CSV with kwargs:\n", + "2024-12-14 17:15:29.933 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'stay_id', 'subject_id', 'value', 'valuenum', 'valueuom']\n", + "2024-12-14 17:15:30.414 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, stay_id, subject_id, value, valuenum, valueuom\n", + "2024-12-14 17:15:30.444 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 668862 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv.\n", + "2024-12-14 17:15:30.444 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:30.444 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv row-chunk [0-668862) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet.\n", + "2024-12-14 17:15:30.448 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:30.447796. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:30.448 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv\n", + "2024-12-14 17:15:30.448 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.702 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['doses_per_24_hrs', 'frequency', 'medication', 'poe_id', 'route', 'starttime', 'stoptime', 'subject_id']\n", - "2024-12-14 16:07:05.737 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, doses_per_24_hrs, frequency, medication, poe_id, route, starttime, stoptime, subject_id\n", - "2024-12-14 16:07:05.737 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:05.737 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 16:07:05.765 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.063267\n", - "2024-12-14 16:07:05.765 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/.[0-15306).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/.[0-15306).parquet_cache/locks/2024-12-14T16:07:05.702392.json\n", - "2024-12-14 16:07:05.768 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd.\n", - "2024-12-14 16:07:05.768 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet to determine row count.\n", - "2024-12-14 16:07:05.770 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 16:07:05.770 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet as Parquet with kwargs:\n", - ".\n", - "2024-12-14 16:07:05.770 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['hadm_discharge_time', 'hadm_id', 'icd_code', 'icd_version', 'subject_id']\n", - "2024-12-14 16:07:05.770 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, hadm_discharge_time, hadm_id, icd_code, icd_version, subject_id\n", - "2024-12-14 16:07:05.771 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 4506 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet.\n", - "2024-12-14 16:07:05.771 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:05.771 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet row-chunk [0-4506) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet.\n", - "2024-12-14 16:07:05.773 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.773633. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:05.773 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet\n", - "2024-12-14 16:07:05.773 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 16:07:05.774 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet as Parquet with kwargs:\n", - ".\n", - "2024-12-14 16:07:05.774 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['hadm_discharge_time', 'hadm_id', 'icd_code', 'icd_version', 'subject_id']\n", - "2024-12-14 16:07:05.774 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, hadm_discharge_time, hadm_id, icd_code, icd_version, subject_id\n", - "2024-12-14 16:07:05.774 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:05.774 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 16:07:05.776 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.003029\n", - "2024-12-14 16:07:05.776 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/.[0-4506).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/.[0-4506).parquet_cache/locks/2024-12-14T16:07:05.773633.json\n", - "2024-12-14 16:07:05.779 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents.\n", - "2024-12-14 16:07:05.779 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv to determine row count.\n", - "2024-12-14 16:07:05.781 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv as CSV with kwargs:\n", + "2024-12-14 17:15:30.448 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'stay_id', 'subject_id', 'value', 'valuenum', 'valueuom']\n", + "2024-12-14 17:15:30.923 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, stay_id, subject_id, value, valuenum, valueuom\n", + "2024-12-14 17:15:30.923 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:30.923 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 17:15:31.111 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.664018\n", + "2024-12-14 17:15:31.111 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/.[0-668862).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/.[0-668862).parquet_cache/locks/2024-12-14T17:15:30.447796.json\n", + "2024-12-14 17:15:31.114 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents.\n", + "2024-12-14 17:15:31.115 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv to determine row count.\n", + "2024-12-14 17:15:31.117 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.781 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['amount', 'amountuom', 'endtime', 'hadm_id', 'itemid', 'linkorderid', 'ordercategorydescription', 'orderid', 'patientweight', 'rate', 'rateuom', 'starttime', 'statusdescription', 'stay_id', 'subject_id']\n", - "2024-12-14 16:07:05.842 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, amount, amountuom, endtime, hadm_id, itemid, linkorderid, ordercategorydescription, orderid, patientweight, rate, rateuom, starttime, statusdescription, stay_id, subject_id\n", - "2024-12-14 16:07:05.845 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 20404 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv.\n", - "2024-12-14 16:07:05.846 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:05.846 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv row-chunk [0-20404) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet.\n", - "2024-12-14 16:07:05.848 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.848754. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:05.849 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv\n", - "2024-12-14 16:07:05.849 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv as CSV with kwargs:\n", + "2024-12-14 17:15:31.117 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'hadm_id', 'short_description', 'subject_id']\n", + "2024-12-14 17:15:31.117 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, hadm_id, short_description, subject_id\n", + "2024-12-14 17:15:31.117 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 61 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv.\n", + "2024-12-14 17:15:31.118 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:31.118 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv row-chunk [0-61) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet.\n", + "2024-12-14 17:15:31.120 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:31.120550. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:31.120 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv\n", + "2024-12-14 17:15:31.120 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.849 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['amount', 'amountuom', 'endtime', 'hadm_id', 'itemid', 'linkorderid', 'ordercategorydescription', 'orderid', 'patientweight', 'rate', 'rateuom', 'starttime', 'statusdescription', 'stay_id', 'subject_id']\n", - "2024-12-14 16:07:05.910 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, amount, amountuom, endtime, hadm_id, itemid, linkorderid, ordercategorydescription, orderid, patientweight, rate, rateuom, starttime, statusdescription, stay_id, subject_id\n", - "2024-12-14 16:07:05.910 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:05.910 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 16:07:05.940 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.092049\n", - "2024-12-14 16:07:05.940 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/.[0-20404).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/.[0-20404).parquet_cache/locks/2024-12-14T16:07:05.848754.json\n", - "2024-12-14 16:07:05.944 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays.\n", - "2024-12-14 16:07:05.944 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv to determine row count.\n", - "2024-12-14 16:07:05.946 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv as CSV with kwargs:\n", + "2024-12-14 17:15:31.121 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'hadm_id', 'short_description', 'subject_id']\n", + "2024-12-14 17:15:31.121 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, hadm_id, short_description, subject_id\n", + "2024-12-14 17:15:31.121 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:31.121 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 17:15:31.122 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.002100\n", + "2024-12-14 17:15:31.122 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/.[0-61).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/.[0-61).parquet_cache/locks/2024-12-14T17:15:31.120550.json\n", + "2024-12-14 17:15:31.124 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions.\n", + "2024-12-14 17:15:31.125 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv to determine row count.\n", + "2024-12-14 17:15:31.127 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.947 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['first_careunit', 'hadm_id', 'intime', 'last_careunit', 'outtime', 'stay_id', 'subject_id']\n", - "2024-12-14 16:07:05.947 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, first_careunit, hadm_id, intime, last_careunit, outtime, stay_id, subject_id\n", - "2024-12-14 16:07:05.947 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 140 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv.\n", - "2024-12-14 16:07:05.947 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:05.947 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv row-chunk [0-140) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet.\n", - "2024-12-14 16:07:05.950 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.950272. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:05.950 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv\n", - "2024-12-14 16:07:05.950 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv as CSV with kwargs:\n", + "2024-12-14 17:15:31.127 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['admission_location', 'admission_type', 'admittime', 'discharge_location', 'dischtime', 'edouttime', 'edregtime', 'hadm_id', 'insurance', 'language', 'marital_status', 'race', 'subject_id']\n", + "2024-12-14 17:15:31.127 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, admission_location, admission_type, admittime, discharge_location, dischtime, edouttime, edregtime, hadm_id, insurance, language, marital_status, race, subject_id\n", + "2024-12-14 17:15:31.128 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 275 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv.\n", + "2024-12-14 17:15:31.128 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:31.128 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv row-chunk [0-275) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet.\n", + "2024-12-14 17:15:31.130 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:31.130847. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:31.131 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv\n", + "2024-12-14 17:15:31.131 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.950 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['first_careunit', 'hadm_id', 'intime', 'last_careunit', 'outtime', 'stay_id', 'subject_id']\n", - "2024-12-14 16:07:05.951 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, first_careunit, hadm_id, intime, last_careunit, outtime, stay_id, subject_id\n", - "2024-12-14 16:07:05.951 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:05.951 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", - "2024-12-14 16:07:05.953 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.002913\n", - "2024-12-14 16:07:05.953 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/.[0-140).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/.[0-140).parquet_cache/locks/2024-12-14T16:07:05.950272.json\n", - "2024-12-14 16:07:05.955 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents.\n", - "2024-12-14 16:07:05.955 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv to determine row count.\n", - "2024-12-14 16:07:05.957 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv as CSV with kwargs:\n", + "2024-12-14 17:15:31.131 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['admission_location', 'admission_type', 'admittime', 'discharge_location', 'dischtime', 'edouttime', 'edregtime', 'hadm_id', 'insurance', 'language', 'marital_status', 'race', 'subject_id']\n", + "2024-12-14 17:15:31.131 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, admission_location, admission_type, admittime, discharge_location, dischtime, edouttime, edregtime, hadm_id, insurance, language, marital_status, race, subject_id\n", + "2024-12-14 17:15:31.131 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:31.131 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", + "2024-12-14 17:15:31.135 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.004670\n", + "2024-12-14 17:15:31.135 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/.[0-275).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/.[0-275).parquet_cache/locks/2024-12-14T17:15:31.130847.json\n", + "2024-12-14 17:15:31.137 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents.\n", + "2024-12-14 17:15:31.138 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv to determine row count.\n", + "2024-12-14 17:15:31.141 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.958 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'hadm_id', 'short_description', 'subject_id']\n", - "2024-12-14 16:07:05.958 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, hadm_id, short_description, subject_id\n", - "2024-12-14 16:07:05.958 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 61 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv.\n", - "2024-12-14 16:07:05.958 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:05.958 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv row-chunk [0-61) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet.\n", - "2024-12-14 16:07:05.960 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.960658. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:05.960 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv\n", - "2024-12-14 16:07:05.961 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv as CSV with kwargs:\n", + "2024-12-14 17:15:31.141 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['amount', 'amountuom', 'endtime', 'hadm_id', 'itemid', 'linkorderid', 'ordercategorydescription', 'orderid', 'patientweight', 'rate', 'rateuom', 'starttime', 'statusdescription', 'stay_id', 'subject_id']\n", + "2024-12-14 17:15:31.177 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, amount, amountuom, endtime, hadm_id, itemid, linkorderid, ordercategorydescription, orderid, patientweight, rate, rateuom, starttime, statusdescription, stay_id, subject_id\n", + "2024-12-14 17:15:31.180 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 20404 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv.\n", + "2024-12-14 17:15:31.180 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:31.180 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv row-chunk [0-20404) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet.\n", + "2024-12-14 17:15:31.183 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:31.183076. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:31.183 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv\n", + "2024-12-14 17:15:31.183 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.961 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'hadm_id', 'short_description', 'subject_id']\n", - "2024-12-14 16:07:05.961 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, hadm_id, short_description, subject_id\n", - "2024-12-14 16:07:05.961 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:05.961 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 16:07:05.962 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.001616\n", - "2024-12-14 16:07:05.962 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/.[0-61).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/.[0-61).parquet_cache/locks/2024-12-14T16:07:05.960658.json\n", - "2024-12-14 16:07:05.964 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr.\n", - "2024-12-14 16:07:05.965 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv to determine row count.\n", - "2024-12-14 16:07:05.967 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv as CSV with kwargs:\n", + "2024-12-14 17:15:31.183 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['amount', 'amountuom', 'endtime', 'hadm_id', 'itemid', 'linkorderid', 'ordercategorydescription', 'orderid', 'patientweight', 'rate', 'rateuom', 'starttime', 'statusdescription', 'stay_id', 'subject_id']\n", + "2024-12-14 17:15:31.220 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, amount, amountuom, endtime, hadm_id, itemid, linkorderid, ordercategorydescription, orderid, patientweight, rate, rateuom, starttime, statusdescription, stay_id, subject_id\n", + "2024-12-14 17:15:31.220 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:31.220 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 17:15:31.240 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.057787\n", + "2024-12-14 17:15:31.240 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/.[0-20404).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/.[0-20404).parquet_cache/locks/2024-12-14T17:15:31.183076.json\n", + "2024-12-14 17:15:31.244 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd.\n", + "2024-12-14 17:15:31.244 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet to determine row count.\n", + "2024-12-14 17:15:31.246 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:15:31.246 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:15:31.246 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['hadm_discharge_time', 'hadm_id', 'icd_code', 'icd_version', 'subject_id']\n", + "2024-12-14 17:15:31.246 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, hadm_discharge_time, hadm_id, icd_code, icd_version, subject_id\n", + "2024-12-14 17:15:31.247 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 4506 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet.\n", + "2024-12-14 17:15:31.247 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:31.247 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet row-chunk [0-4506) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet.\n", + "2024-12-14 17:15:31.249 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:31.249669. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:31.249 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet\n", + "2024-12-14 17:15:31.250 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:15:31.250 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:15:31.250 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['hadm_discharge_time', 'hadm_id', 'icd_code', 'icd_version', 'subject_id']\n", + "2024-12-14 17:15:31.250 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, hadm_discharge_time, hadm_id, icd_code, icd_version, subject_id\n", + "2024-12-14 17:15:31.250 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:31.250 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 17:15:31.252 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.003216\n", + "2024-12-14 17:15:31.252 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/.[0-4506).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/.[0-4506).parquet_cache/locks/2024-12-14T17:15:31.249669.json\n", + "2024-12-14 17:15:31.255 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy.\n", + "2024-12-14 17:15:31.255 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv to determine row count.\n", + "2024-12-14 17:15:31.257 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.967 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'result_name', 'result_value', 'subject_id']\n", - "2024-12-14 16:07:05.968 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, result_name, result_value, subject_id\n", - "2024-12-14 16:07:05.969 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 2964 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv.\n", - "2024-12-14 16:07:05.969 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:05.969 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv row-chunk [0-2964) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet.\n", - "2024-12-14 16:07:05.972 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:05.971901. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:05.972 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv\n", - "2024-12-14 16:07:05.972 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv as CSV with kwargs:\n", + "2024-12-14 17:15:31.257 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['doses_per_24_hrs', 'frequency', 'medication', 'poe_id', 'route', 'starttime', 'stoptime', 'subject_id']\n", + "2024-12-14 17:15:31.278 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, doses_per_24_hrs, frequency, medication, poe_id, route, starttime, stoptime, subject_id\n", + "2024-12-14 17:15:31.280 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 15306 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv.\n", + "2024-12-14 17:15:31.280 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:31.280 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv row-chunk [0-15306) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet.\n", + "2024-12-14 17:15:31.283 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:31.283277. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:31.283 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv\n", + "2024-12-14 17:15:31.283 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.972 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'result_name', 'result_value', 'subject_id']\n", - "2024-12-14 16:07:05.974 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, result_name, result_value, subject_id\n", - "2024-12-14 16:07:05.974 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:05.974 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", - "2024-12-14 16:07:05.978 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006211\n", - "2024-12-14 16:07:05.978 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/.[0-2964).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/.[0-2964).parquet_cache/locks/2024-12-14T16:07:05.971901.json\n", - "2024-12-14 16:07:05.980 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents.\n", - "2024-12-14 16:07:05.980 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv to determine row count.\n", - "2024-12-14 16:07:05.982 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv as CSV with kwargs:\n", + "2024-12-14 17:15:31.283 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['doses_per_24_hrs', 'frequency', 'medication', 'poe_id', 'route', 'starttime', 'stoptime', 'subject_id']\n", + "2024-12-14 17:15:31.304 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, doses_per_24_hrs, frequency, medication, poe_id, route, starttime, stoptime, subject_id\n", + "2024-12-14 17:15:31.304 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:31.305 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 17:15:31.337 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.054084\n", + "2024-12-14 17:15:31.337 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/.[0-15306).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/.[0-15306).parquet_cache/locks/2024-12-14T17:15:31.283277.json\n", + "2024-12-14 17:15:31.340 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr.\n", + "2024-12-14 17:15:31.340 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv to determine row count.\n", + "2024-12-14 17:15:31.342 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:05.983 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'priority', 'subject_id', 'value', 'valuenum', 'valueuom']\n", - "2024-12-14 16:07:06.155 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, priority, subject_id, value, valuenum, valueuom\n", - "2024-12-14 16:07:06.161 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 107727 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv.\n", - "2024-12-14 16:07:06.161 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 16:07:06.161 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv row-chunk [0-107727) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet.\n", - "2024-12-14 16:07:06.164 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:06.164674. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:06.165 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv\n", - "2024-12-14 16:07:06.165 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv as CSV with kwargs:\n", + "2024-12-14 17:15:31.342 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'result_name', 'result_value', 'subject_id']\n", + "2024-12-14 17:15:31.343 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, result_name, result_value, subject_id\n", + "2024-12-14 17:15:31.344 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 2964 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv.\n", + "2024-12-14 17:15:31.344 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:15:31.344 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv row-chunk [0-2964) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet.\n", + "2024-12-14 17:15:31.346 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:31.346587. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:31.346 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv\n", + "2024-12-14 17:15:31.346 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 16:07:06.165 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'priority', 'subject_id', 'value', 'valuenum', 'valueuom']\n", - "2024-12-14 16:07:06.337 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, priority, subject_id, value, valuenum, valueuom\n", - "2024-12-14 16:07:06.337 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:06.338 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 16:07:06.384 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.220152\n", - "2024-12-14 16:07:06.384 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/.[0-107727).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/.[0-107727).parquet_cache/locks/2024-12-14T16:07:06.164674.json\n", - "2024-12-14 16:07:06.385 | INFO | MEDS_transforms.extract.shard_events:main:430 - Sub-sharding completed in 0:00:02.594945\n", + "2024-12-14 17:15:31.347 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'result_name', 'result_value', 'subject_id']\n", + "2024-12-14 17:15:31.348 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, result_name, result_value, subject_id\n", + "2024-12-14 17:15:31.348 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:31.348 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", + "2024-12-14 17:15:31.352 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.005502\n", + "2024-12-14 17:15:31.352 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/.[0-2964).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/.[0-2964).parquet_cache/locks/2024-12-14T17:15:31.346587.json\n", + "2024-12-14 17:15:31.352 | INFO | MEDS_transforms.extract.shard_events:main:430 - Sub-sharding completed in 0:00:01.985358\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:06.447\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: split_and_shard_subjects\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:06.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-split_and_shard_subjects --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=split_and_shard_subjects\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:07.210\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 17:15:31.406\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: split_and_shard_subjects\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:31.413\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-split_and_shard_subjects --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=split_and_shard_subjects\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:32.147\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:07.210\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 16:07:07.037 | INFO | MEDS_transforms.utils:stage_init:73 - Running split_and_shard_subjects with the following configuration:\n", + "\u001b[32m2024-12-14 17:15:32.147\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 17:15:31.960 | INFO | MEDS_transforms.utils:stage_init:73 - Running split_and_shard_subjects with the following configuration:\n", "input_dir: ${oc.env:MIMICIV_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:MIMICIV_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -940,9 +936,9 @@ " n_subjects_per_shard: 1000\n", " external_splits_json_fp: null\n", " split_fracs:\n", - " train: 0.8\n", - " tuning: 0.1\n", - " held_out: 0.1\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", " convert_to_sharded_events:\n", " do_dedup_text_and_numeric: true\n", " merge_to_MEDS_cohort:\n", @@ -958,10 +954,6 @@ " do_retype: true\n", " finalize_MEDS_data:\n", " do_retype: true\n", - " split_fracs:\n", - " train: 0.5\n", - " tuning: 0.25\n", - " held_out: 0.25\n", "worker: 0\n", "polling_time: 300\n", "stage: split_and_shard_subjects\n", @@ -984,15 +976,15 @@ "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", "\n", - "2024-12-14 16:07:07.051 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "2024-12-14 17:15:31.974 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", "is_metadata: true\n", "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//metadata\n", "n_subjects_per_shard: 1000\n", "external_splits_json_fp: null\n", "split_fracs:\n", - " train: 0.8\n", - " tuning: 0.1\n", - " held_out: 0.1\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events\n", "metadata_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/metadata\n", "reducer_output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/split_and_shard_subjects\n", @@ -1002,8 +994,8 @@ " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events\n", " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata\n", " - metadata_input_dir: ❌ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/metadata\n", - "2024-12-14 16:07:07.051 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:215 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs/event_configs.yaml (needed for subject ID columns)\n", - "2024-12-14 16:07:07.082 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:219 - Event conversion config:\n", + "2024-12-14 17:15:31.975 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:215 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs/event_configs.yaml (needed for subject ID columns)\n", + "2024-12-14 17:15:32.007 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:219 - Event conversion config:\n", "subject_id_col: subject_id\n", "hosp/admissions:\n", " ed_registration:\n", @@ -1326,53 +1318,53 @@ " valueuom: unitname\n", " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", "\n", - "2024-12-14 16:07:07.083 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/admissions files:\n", + "2024-12-14 17:15:32.008 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/admissions files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", - "2024-12-14 16:07:07.084 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/diagnoses_icd files:\n", + "2024-12-14 17:15:32.009 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/diagnoses_icd files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 16:07:07.084 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/drgcodes files:\n", + "2024-12-14 17:15:32.009 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/drgcodes files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 16:07:07.084 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/emar files:\n", + "2024-12-14 17:15:32.010 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/emar files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", - "2024-12-14 16:07:07.085 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/hcpcsevents files:\n", + "2024-12-14 17:15:32.010 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/hcpcsevents files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 16:07:07.085 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/labevents files:\n", + "2024-12-14 17:15:32.010 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/labevents files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 16:07:07.085 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/omr files:\n", + "2024-12-14 17:15:32.010 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/omr files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", - "2024-12-14 16:07:07.085 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/patients files:\n", + "2024-12-14 17:15:32.010 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/patients files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", - "2024-12-14 16:07:07.085 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/pharmacy files:\n", + "2024-12-14 17:15:32.011 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/pharmacy files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 16:07:07.086 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/procedures_icd files:\n", + "2024-12-14 17:15:32.011 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/procedures_icd files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 16:07:07.086 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/transfers files:\n", + "2024-12-14 17:15:32.011 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/transfers files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 16:07:07.086 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/icustays files:\n", + "2024-12-14 17:15:32.011 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/icustays files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", - "2024-12-14 16:07:07.086 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/chartevents files:\n", + "2024-12-14 17:15:32.011 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/chartevents files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 16:07:07.086 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/procedureevents files:\n", + "2024-12-14 17:15:32.012 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/procedureevents files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 16:07:07.087 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/inputevents files:\n", + "2024-12-14 17:15:32.012 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/inputevents files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 16:07:07.087 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/outputevents files:\n", + "2024-12-14 17:15:32.012 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/outputevents files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:07.087 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:239 - Joining all subject IDs from 16 dataframes\n", - "2024-12-14 16:07:07.125 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:247 - Found 100 unique subject IDs of type int64\n", - "2024-12-14 16:07:07.128 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:262 - Sharding and splitting subjects\n", - "2024-12-14 16:07:07.166 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split train/0 has 80 subjects.\n", - "2024-12-14 16:07:07.166 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split tuning/0 has 10 subjects.\n", - "2024-12-14 16:07:07.166 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split held_out/0 has 10 subjects.\n", - "2024-12-14 16:07:07.166 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:273 - Writing sharded subjects to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata/.shards.json\n", - "2024-12-14 16:07:07.167 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:276 - Done writing sharded subjects\n", + "2024-12-14 17:15:32.012 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:239 - Joining all subject IDs from 16 dataframes\n", + "2024-12-14 17:15:32.052 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:247 - Found 100 unique subject IDs of type int64\n", + "2024-12-14 17:15:32.056 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:262 - Sharding and splitting subjects\n", + "2024-12-14 17:15:32.101 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split train/0 has 50 subjects.\n", + "2024-12-14 17:15:32.101 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split tuning/0 has 25 subjects.\n", + "2024-12-14 17:15:32.101 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split held_out/0 has 25 subjects.\n", + "2024-12-14 17:15:32.102 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:273 - Writing sharded subjects to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata/.shards.json\n", + "2024-12-14 17:15:32.102 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:276 - Done writing sharded subjects\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:07.213\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: convert_to_sharded_events\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:07.221\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-convert_to_sharded_events --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=convert_to_sharded_events\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:08.742\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 17:15:32.150\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: convert_to_sharded_events\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:32.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-convert_to_sharded_events --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=convert_to_sharded_events\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:33.739\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:08.743\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 16:07:07.767 | INFO | MEDS_transforms.utils:stage_init:73 - Running convert_to_sharded_events with the following configuration:\n", + "\u001b[32m2024-12-14 17:15:33.739\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 17:15:32.724 | INFO | MEDS_transforms.utils:stage_init:73 - Running convert_to_sharded_events with the following configuration:\n", "input_dir: ${oc.env:MIMICIV_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:MIMICIV_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -1401,9 +1393,9 @@ " n_subjects_per_shard: 1000\n", " external_splits_json_fp: null\n", " split_fracs:\n", - " train: 0.8\n", - " tuning: 0.1\n", - " held_out: 0.1\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", " convert_to_sharded_events:\n", " do_dedup_text_and_numeric: true\n", " merge_to_MEDS_cohort:\n", @@ -1419,10 +1411,6 @@ " do_retype: true\n", " finalize_MEDS_data:\n", " do_retype: true\n", - " split_fracs:\n", - " train: 0.5\n", - " tuning: 0.25\n", - " held_out: 0.25\n", "worker: 0\n", "polling_time: 300\n", "stage: convert_to_sharded_events\n", @@ -1445,7 +1433,7 @@ "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", "\n", - "2024-12-14 16:07:07.780 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "2024-12-14 17:15:32.738 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", "do_dedup_text_and_numeric: true\n", "is_metadata: false\n", "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events\n", @@ -1457,9 +1445,9 @@ " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events\n", " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events\n", " - metadata_input_dir: ❌ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/split_and_shard_subjects\n", - "2024-12-14 16:07:07.781 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:769 - Starting event conversion.\n", - "2024-12-14 16:07:07.781 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:771 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs/event_configs.yaml\n", - "2024-12-14 16:07:07.811 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:773 - Event conversion config:\n", + "2024-12-14 17:15:32.739 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:769 - Starting event conversion.\n", + "2024-12-14 17:15:32.739 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:771 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs/event_configs.yaml\n", + "2024-12-14 17:15:32.771 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:773 - Event conversion config:\n", "subject_id_col: subject_id\n", "hosp/admissions:\n", " ed_registration:\n", @@ -1782,19 +1770,36 @@ " valueuom: unitname\n", " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", "\n", - "2024-12-14 16:07:07.820 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 16:07:07.821 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.821527. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:07.822 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 16:07:07.823 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:32.780 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/patients/[0-100).parquet\n", + "2024-12-14 17:15:32.781 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.781478. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:32.783 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", + "2024-12-14 17:15:32.783 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:802: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", " typed_subjects = pl.Series(subjects, dtype=df.schema[input_subject_id_column])\n", - "2024-12-14 16:07:07.829 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/transfers/[0-1190).parquet\n", - "2024-12-14 16:07:07.834 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting transfer\n", + "2024-12-14 17:15:32.790 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/patients/[0-100).parquet\n", + "2024-12-14 17:15:32.794 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:468: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", " if col not in df.schema:\n", - "2024-12-14 16:07:07.835 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column eventtype\n", - "2024-12-14 16:07:07.835 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column careunit\n", - "2024-12-14 16:07:07.835 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.796 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", + "2024-12-14 17:15:32.796 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", + "2024-12-14 17:15:32.796 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", + "2024-12-14 17:15:32.796 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column year_of_birth in possible formats %Y\n", + "2024-12-14 17:15:32.797 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"year_of_birth\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.797 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting death\n", + "2024-12-14 17:15:32.797 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dod in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 17:15:32.797 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dod\").str.strptime([String(raise)]).coalesce([col(\"dod\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 17:15:32.797 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/patients/[0-100).parquet\n", + "2024-12-14 17:15:32.826 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.044811\n", + "2024-12-14 17:15:32.826 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/patients/.[0-100).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/patients/.[0-100).parquet_cache/locks/2024-12-14T17:15:32.781478.json\n", + "2024-12-14 17:15:32.827 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 17:15:32.827 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.827805. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:32.828 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 17:15:32.828 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:32.828 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 17:15:32.832 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting procedure\n", + "2024-12-14 17:15:32.833 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", + "2024-12-14 17:15:32.833 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", + "2024-12-14 17:15:32.833 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:513: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", " if v not in df.schema:\n", "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:517: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", @@ -1803,721 +1808,704 @@ " is_str = df.schema[v] == pl.Utf8\n", "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:519: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", " is_cat = isinstance(df.schema[v], pl.Categorical)\n", - "2024-12-14 16:07:07.836 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:07.836 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 16:07:07.859 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.037877\n", - "2024-12-14 16:07:07.859 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/transfers/.[0-1190).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/transfers/.[0-1190).parquet_cache/locks/2024-12-14T16:07:07.821527.json\n", - "2024-12-14 16:07:07.859 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/omr/[0-2964).parquet\n", - "2024-12-14 16:07:07.860 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.860108. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:07.860 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", - "2024-12-14 16:07:07.860 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:07.860 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/omr/[0-2964).parquet\n", - "2024-12-14 16:07:07.864 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting omr\n", - "2024-12-14 16:07:07.864 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column result_name\n", - "2024-12-14 16:07:07.864 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", - "2024-12-14 16:07:07.865 | WARNING | MEDS_transforms.extract.convert_to_sharded_events:extract_event:507 - Source column 'col(result_value)' for event column text_value is always interpreted as a column name. Removing col() function call and setting source column to result_value.\n", + "2024-12-14 17:15:32.833 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.833 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 17:15:32.838 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010302\n", + "2024-12-14 17:15:32.838 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/procedures_icd/.[0-722).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/procedures_icd/.[0-722).parquet_cache/locks/2024-12-14T17:15:32.827805.json\n", + "2024-12-14 17:15:32.838 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 17:15:32.839 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.838982. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:32.839 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 17:15:32.839 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:32.839 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 17:15:32.843 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hcpcs\n", + "2024-12-14 17:15:32.843 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column short_description\n", + "2024-12-14 17:15:32.843 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 17:15:32.843 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.843 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 17:15:32.845 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006657\n", + "2024-12-14 17:15:32.845 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/hcpcsevents/.[0-61).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/hcpcsevents/.[0-61).parquet_cache/locks/2024-12-14T17:15:32.838982.json\n", + "2024-12-14 17:15:32.846 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/icustays/[0-140).parquet\n", + "2024-12-14 17:15:32.846 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.846632. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:32.846 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", + "2024-12-14 17:15:32.846 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:32.847 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/icustays/[0-140).parquet\n", + "2024-12-14 17:15:32.850 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_admission\n", + "2024-12-14 17:15:32.851 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column first_careunit\n", + "2024-12-14 17:15:32.851 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.851 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.851 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_discharge\n", + "2024-12-14 17:15:32.851 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column last_careunit\n", + "2024-12-14 17:15:32.851 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column outtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.852 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"outtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.852 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/icustays/[0-140).parquet\n", + "2024-12-14 17:15:32.855 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008629\n", + "2024-12-14 17:15:32.855 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/icustays/.[0-140).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/icustays/.[0-140).parquet_cache/locks/2024-12-14T17:15:32.846632.json\n", + "2024-12-14 17:15:32.855 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 17:15:32.856 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.856164. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:32.856 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 17:15:32.856 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:32.856 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 17:15:32.860 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drg\n", + "2024-12-14 17:15:32.860 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_type\n", + "2024-12-14 17:15:32.861 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_code\n", + "2024-12-14 17:15:32.861 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column description\n", + "2024-12-14 17:15:32.861 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.861 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.861 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 17:15:32.863 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007601\n", + "2024-12-14 17:15:32.863 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/.[0-454).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/.[0-454).parquet_cache/locks/2024-12-14T17:15:32.856164.json\n", + "2024-12-14 17:15:32.864 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 17:15:32.865 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.865354. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:32.865 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 17:15:32.865 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:32.866 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/inputevents/[0-20404).parquet\n", + "2024-12-14 17:15:32.871 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_start\n", + "2024-12-14 17:15:32.871 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:32.871 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.872 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.872 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_end\n", + "2024-12-14 17:15:32.872 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:32.872 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.873 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.873 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting subject_weight\n", + "2024-12-14 17:15:32.873 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.874 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.874 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 17:15:32.888 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.023607\n", + "2024-12-14 17:15:32.889 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/.[0-20404).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/.[0-20404).parquet_cache/locks/2024-12-14T17:15:32.865354.json\n", + "2024-12-14 17:15:32.889 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 17:15:32.890 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.889940. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:32.890 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 17:15:32.890 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:32.890 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 17:15:32.894 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", + "2024-12-14 17:15:32.895 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", + "2024-12-14 17:15:32.895 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", + "2024-12-14 17:15:32.895 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.895 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.895 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 17:15:32.897 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007572\n", + "2024-12-14 17:15:32.897 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/.[0-4506).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/.[0-4506).parquet_cache/locks/2024-12-14T17:15:32.889940.json\n", + "2024-12-14 17:15:32.898 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 17:15:32.898 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.898624. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:32.898 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 17:15:32.898 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:32.899 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/labevents/[0-107727).parquet\n", + "2024-12-14 17:15:32.903 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", + "2024-12-14 17:15:32.904 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:32.904 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 17:15:32.904 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:529: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", " case \"text_value\" if not df.schema[v] == pl.Utf8:\n", - "2024-12-14 16:07:07.865 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:558 - Filtering out rows with null codes via col(\"result_name\").is_not_null()\n", - "2024-12-14 16:07:07.865 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:07.865 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/omr/[0-2964).parquet\n", - "2024-12-14 16:07:07.868 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008277\n", - "2024-12-14 16:07:07.868 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/omr/.[0-2964).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/omr/.[0-2964).parquet_cache/locks/2024-12-14T16:07:07.860108.json\n", - "2024-12-14 16:07:07.869 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 16:07:07.870 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.869972. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:07.870 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 16:07:07.870 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:07.870 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 16:07:07.874 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting start\n", - "2024-12-14 16:07:07.875 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:07.875 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:07.875 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:07.875 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting end\n", - "2024-12-14 16:07:07.876 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:07.876 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:07.876 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:07.876 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 16:07:07.883 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.013825\n", - "2024-12-14 16:07:07.883 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/.[0-1468).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/.[0-1468).parquet_cache/locks/2024-12-14T16:07:07.869972.json\n", - "2024-12-14 16:07:07.884 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 16:07:07.884 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.884587. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:07.884 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 16:07:07.884 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:07.885 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 16:07:07.888 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", - "2024-12-14 16:07:07.889 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", - "2024-12-14 16:07:07.889 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", - "2024-12-14 16:07:07.889 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:07.889 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:07.889 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 16:07:07.891 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006845\n", - "2024-12-14 16:07:07.891 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/.[0-4506).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/.[0-4506).parquet_cache/locks/2024-12-14T16:07:07.884587.json\n", - "2024-12-14 16:07:07.891 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/emar/[0-35835).parquet\n", - "2024-12-14 16:07:07.892 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.892220. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:07.892 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", - "2024-12-14 16:07:07.892 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:07.892 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/emar/[0-35835).parquet\n", - "2024-12-14 16:07:07.896 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication\n", - "2024-12-14 16:07:07.896 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", - "2024-12-14 16:07:07.896 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column event_txt\n", - "2024-12-14 16:07:07.896 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:07.897 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:07.897 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/emar/[0-35835).parquet\n", - "2024-12-14 16:07:07.902 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.009786\n", - "2024-12-14 16:07:07.902 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/emar/.[0-35835).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/emar/.[0-35835).parquet_cache/locks/2024-12-14T16:07:07.892220.json\n", - "2024-12-14 16:07:07.902 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/[0-275).parquet\n", - "2024-12-14 16:07:07.903 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.903054. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:07.903 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", - "2024-12-14 16:07:07.903 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:07.903 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/admissions/[0-275).parquet\n", - "2024-12-14 16:07:07.907 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_registration\n", - "2024-12-14 16:07:07.907 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edregtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:07.907 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edregtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:07.907 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_out\n", - "2024-12-14 16:07:07.908 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edouttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:07.908 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edouttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:07.908 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission\n", - "2024-12-14 16:07:07.908 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_type\n", - "2024-12-14 16:07:07.908 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_location\n", - "2024-12-14 16:07:07.908 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column admittime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:07.909 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admittime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:07.909 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting discharge\n", - "2024-12-14 16:07:07.909 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column discharge_location\n", - "2024-12-14 16:07:07.909 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dischtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:07.909 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dischtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:07.909 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/[0-275).parquet\n", - "2024-12-14 16:07:07.918 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.015170\n", - "2024-12-14 16:07:07.918 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/.[0-275).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/.[0-275).parquet_cache/locks/2024-12-14T16:07:07.903054.json\n", - "2024-12-14 16:07:07.918 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/patients/[0-100).parquet\n", - "2024-12-14 16:07:07.919 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.919077. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:07.919 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", - "2024-12-14 16:07:07.919 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:07.919 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/patients/[0-100).parquet\n", - "2024-12-14 16:07:07.923 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", - "2024-12-14 16:07:07.923 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", - "2024-12-14 16:07:07.923 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", - "2024-12-14 16:07:07.923 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", - "2024-12-14 16:07:07.923 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column year_of_birth in possible formats %Y\n", - "2024-12-14 16:07:07.923 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"year_of_birth\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:07.924 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting death\n", - "2024-12-14 16:07:07.924 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dod in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", - "2024-12-14 16:07:07.924 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dod\").str.strptime([String(raise)]).coalesce([col(\"dod\").str.strptime([String(raise)])]).is_not_null()\n", - "2024-12-14 16:07:07.924 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/patients/[0-100).parquet\n", - "2024-12-14 16:07:07.928 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.009003\n", - "2024-12-14 16:07:07.928 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/patients/.[0-100).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/patients/.[0-100).parquet_cache/locks/2024-12-14T16:07:07.919077.json\n", - "2024-12-14 16:07:07.928 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 16:07:07.929 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.928924. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:07.929 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 16:07:07.929 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:07.929 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/chartevents/[0-668862).parquet\n", - "2024-12-14 16:07:07.933 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting event\n", - "2024-12-14 16:07:07.934 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", - "2024-12-14 16:07:07.934 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:07.934 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:07.935 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:07.935 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 16:07:07.968 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.039739\n", - "2024-12-14 16:07:07.968 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/chartevents/.[0-668862).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/chartevents/.[0-668862).parquet_cache/locks/2024-12-14T16:07:07.928924.json\n", - "2024-12-14 16:07:07.969 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:07.970 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.969832. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:07.970 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:07.970 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:07.970 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:07.974 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting output\n", - "2024-12-14 16:07:07.975 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", - "2024-12-14 16:07:07.975 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:07.975 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:07.975 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:07.975 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:07.978 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008660\n", - "2024-12-14 16:07:07.978 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/.[0-9362).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/.[0-9362).parquet_cache/locks/2024-12-14T16:07:07.969832.json\n", - "2024-12-14 16:07:07.979 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 16:07:07.979 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.979455. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:07.979 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 16:07:07.979 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:07.980 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 16:07:07.983 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_start\n", - "2024-12-14 16:07:07.984 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", - "2024-12-14 16:07:07.984 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", - "2024-12-14 16:07:07.984 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce([col(\"starttime\").str.strptime([String(raise)])]).is_not_null()\n", - "2024-12-14 16:07:07.984 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_stop\n", - "2024-12-14 16:07:07.985 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", - "2024-12-14 16:07:07.985 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column stoptime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", - "2024-12-14 16:07:07.985 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"stoptime\").str.strptime([String(raise)]).coalesce([col(\"stoptime\").str.strptime([String(raise)])]).is_not_null()\n", - "2024-12-14 16:07:07.985 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 16:07:07.991 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.011555\n", - "2024-12-14 16:07:07.991 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/pharmacy/.[0-15306).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/pharmacy/.[0-15306).parquet_cache/locks/2024-12-14T16:07:07.979455.json\n", - "2024-12-14 16:07:07.991 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 16:07:07.992 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.991934. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:07.992 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 16:07:07.992 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:07.992 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 16:07:07.996 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hcpcs\n", - "2024-12-14 16:07:07.996 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column short_description\n", - "2024-12-14 16:07:07.996 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", - "2024-12-14 16:07:07.996 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:07.996 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 16:07:07.998 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006670\n", - "2024-12-14 16:07:07.998 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/hcpcsevents/.[0-61).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/hcpcsevents/.[0-61).parquet_cache/locks/2024-12-14T16:07:07.991934.json\n", - "2024-12-14 16:07:07.999 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 16:07:07.999 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:07.999367. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:07.999 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 16:07:07.999 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:07.999 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 16:07:08.003 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drg\n", - "2024-12-14 16:07:08.003 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column description\n", - "2024-12-14 16:07:08.003 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_type\n", - "2024-12-14 16:07:08.004 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_code\n", - "2024-12-14 16:07:08.004 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.004 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.004 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 16:07:08.006 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006814\n", - "2024-12-14 16:07:08.006 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/.[0-454).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/.[0-454).parquet_cache/locks/2024-12-14T16:07:07.999367.json\n", - "2024-12-14 16:07:08.007 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 16:07:08.007 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.007341. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.007 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 16:07:08.007 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.007 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 16:07:08.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting procedure\n", - "2024-12-14 16:07:08.012 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", - "2024-12-14 16:07:08.012 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", - "2024-12-14 16:07:08.012 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", - "2024-12-14 16:07:08.012 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.012 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 16:07:08.014 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006829\n", - "2024-12-14 16:07:08.014 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/procedures_icd/.[0-722).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/procedures_icd/.[0-722).parquet_cache/locks/2024-12-14T16:07:08.007341.json\n", - "2024-12-14 16:07:08.014 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 16:07:08.015 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.015034. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.015 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 16:07:08.015 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.015 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/labevents/[0-107727).parquet\n", - "2024-12-14 16:07:08.019 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", - "2024-12-14 16:07:08.019 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", - "2024-12-14 16:07:08.019 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.020 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.020 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.020 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 16:07:08.026 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.011136\n", - "2024-12-14 16:07:08.026 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/labevents/.[0-107727).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/labevents/.[0-107727).parquet_cache/locks/2024-12-14T16:07:08.015034.json\n", - "2024-12-14 16:07:08.027 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 16:07:08.027 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.027619. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.027 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 16:07:08.027 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.028 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/inputevents/[0-20404).parquet\n", - "2024-12-14 16:07:08.032 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_start\n", - "2024-12-14 16:07:08.033 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.033 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.033 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.033 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_end\n", - "2024-12-14 16:07:08.034 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.034 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.035 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.035 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting subject_weight\n", - "2024-12-14 16:07:08.035 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.035 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.035 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 16:07:08.044 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.016992\n", - "2024-12-14 16:07:08.044 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/.[0-20404).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/.[0-20404).parquet_cache/locks/2024-12-14T16:07:08.027619.json\n", - "2024-12-14 16:07:08.045 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/icustays/[0-140).parquet\n", - "2024-12-14 16:07:08.045 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.045586. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.045 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", - "2024-12-14 16:07:08.045 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.046 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/icustays/[0-140).parquet\n", - "2024-12-14 16:07:08.050 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_admission\n", - "2024-12-14 16:07:08.050 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column first_careunit\n", - "2024-12-14 16:07:08.050 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.051 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.051 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_discharge\n", - "2024-12-14 16:07:08.051 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column last_careunit\n", - "2024-12-14 16:07:08.051 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column outtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.051 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"outtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.051 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/icustays/[0-140).parquet\n", - "2024-12-14 16:07:08.054 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008874\n", - "2024-12-14 16:07:08.054 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/icustays/.[0-140).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/icustays/.[0-140).parquet_cache/locks/2024-12-14T16:07:08.045586.json\n", - "2024-12-14 16:07:08.054 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 16:07:08.055 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.055324. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.055 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 16:07:08.055 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.055 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/transfers/[0-1190).parquet\n", - "2024-12-14 16:07:08.059 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting transfer\n", - "2024-12-14 16:07:08.059 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column eventtype\n", - "2024-12-14 16:07:08.059 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column careunit\n", - "2024-12-14 16:07:08.059 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.060 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.060 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 16:07:08.061 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006326\n", - "2024-12-14 16:07:08.061 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/transfers/.[0-1190).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/transfers/.[0-1190).parquet_cache/locks/2024-12-14T16:07:08.055324.json\n", - "2024-12-14 16:07:08.062 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/omr/[0-2964).parquet\n", - "2024-12-14 16:07:08.062 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.062419. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.062 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", - "2024-12-14 16:07:08.062 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.062 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/omr/[0-2964).parquet\n", - "2024-12-14 16:07:08.066 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting omr\n", - "2024-12-14 16:07:08.066 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column result_name\n", - "2024-12-14 16:07:08.066 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", - "2024-12-14 16:07:08.066 | WARNING | MEDS_transforms.extract.convert_to_sharded_events:extract_event:507 - Source column 'col(result_value)' for event column text_value is always interpreted as a column name. Removing col() function call and setting source column to result_value.\n", - "2024-12-14 16:07:08.066 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:558 - Filtering out rows with null codes via col(\"result_name\").is_not_null()\n", - "2024-12-14 16:07:08.066 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.067 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/omr/[0-2964).parquet\n", - "2024-12-14 16:07:08.068 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006074\n", - "2024-12-14 16:07:08.068 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/omr/.[0-2964).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/omr/.[0-2964).parquet_cache/locks/2024-12-14T16:07:08.062419.json\n", - "2024-12-14 16:07:08.069 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 16:07:08.070 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.069888. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.070 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 16:07:08.070 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.070 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 16:07:08.074 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting start\n", - "2024-12-14 16:07:08.075 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.075 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.075 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.075 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting end\n", - "2024-12-14 16:07:08.076 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.076 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.076 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.076 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 16:07:08.079 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.009659\n", - "2024-12-14 16:07:08.079 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/.[0-1468).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/.[0-1468).parquet_cache/locks/2024-12-14T16:07:08.069888.json\n", - "2024-12-14 16:07:08.080 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 16:07:08.080 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.080456. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.080 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 16:07:08.080 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.080 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 16:07:08.084 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", - "2024-12-14 16:07:08.084 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", - "2024-12-14 16:07:08.085 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", - "2024-12-14 16:07:08.085 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.085 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.085 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 16:07:08.086 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006417\n", - "2024-12-14 16:07:08.086 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/.[0-4506).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/.[0-4506).parquet_cache/locks/2024-12-14T16:07:08.080456.json\n", - "2024-12-14 16:07:08.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/emar/[0-35835).parquet\n", - "2024-12-14 16:07:08.087 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.087668. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.087 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", - "2024-12-14 16:07:08.087 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.088 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/emar/[0-35835).parquet\n", - "2024-12-14 16:07:08.091 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication\n", - "2024-12-14 16:07:08.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", - "2024-12-14 16:07:08.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column event_txt\n", - "2024-12-14 16:07:08.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.092 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/emar/[0-35835).parquet\n", - "2024-12-14 16:07:08.096 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.009201\n", - "2024-12-14 16:07:08.096 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/emar/.[0-35835).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/emar/.[0-35835).parquet_cache/locks/2024-12-14T16:07:08.087668.json\n", - "2024-12-14 16:07:08.097 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/[0-275).parquet\n", - "2024-12-14 16:07:08.098 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.098026. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.098 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", - "2024-12-14 16:07:08.098 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.098 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/admissions/[0-275).parquet\n", - "2024-12-14 16:07:08.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_registration\n", - "2024-12-14 16:07:08.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edregtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edregtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_out\n", - "2024-12-14 16:07:08.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edouttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edouttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission\n", - "2024-12-14 16:07:08.103 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_type\n", - "2024-12-14 16:07:08.103 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_location\n", - "2024-12-14 16:07:08.103 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column admittime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.103 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admittime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.103 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting discharge\n", - "2024-12-14 16:07:08.104 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column discharge_location\n", - "2024-12-14 16:07:08.104 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dischtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.104 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dischtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.104 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/[0-275).parquet\n", - "2024-12-14 16:07:08.108 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010742\n", - "2024-12-14 16:07:08.108 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/.[0-275).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/.[0-275).parquet_cache/locks/2024-12-14T16:07:08.098026.json\n", - "2024-12-14 16:07:08.109 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/patients/[0-100).parquet\n", - "2024-12-14 16:07:08.109 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.109557. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.109 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", - "2024-12-14 16:07:08.109 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.110 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/patients/[0-100).parquet\n", - "2024-12-14 16:07:08.113 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", - "2024-12-14 16:07:08.114 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", - "2024-12-14 16:07:08.114 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", - "2024-12-14 16:07:08.114 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", - "2024-12-14 16:07:08.114 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column year_of_birth in possible formats %Y\n", - "2024-12-14 16:07:08.114 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"year_of_birth\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.114 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting death\n", - "2024-12-14 16:07:08.114 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dod in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", - "2024-12-14 16:07:08.114 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dod\").str.strptime([String(raise)]).coalesce([col(\"dod\").str.strptime([String(raise)])]).is_not_null()\n", - "2024-12-14 16:07:08.114 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/patients/[0-100).parquet\n", - "2024-12-14 16:07:08.117 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008429\n", - "2024-12-14 16:07:08.118 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/patients/.[0-100).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/patients/.[0-100).parquet_cache/locks/2024-12-14T16:07:08.109557.json\n", - "2024-12-14 16:07:08.118 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 16:07:08.119 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.118982. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.119 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 16:07:08.119 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.119 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/chartevents/[0-668862).parquet\n", - "2024-12-14 16:07:08.123 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting event\n", - "2024-12-14 16:07:08.124 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", - "2024-12-14 16:07:08.124 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.124 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.124 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.125 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 16:07:08.143 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.024349\n", - "2024-12-14 16:07:08.143 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/chartevents/.[0-668862).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/chartevents/.[0-668862).parquet_cache/locks/2024-12-14T16:07:08.118982.json\n", - "2024-12-14 16:07:08.144 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:08.144 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.144266. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.144 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:08.144 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.144 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:08.148 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting output\n", - "2024-12-14 16:07:08.149 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", - "2024-12-14 16:07:08.149 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.149 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.149 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.149 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:08.152 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008152\n", - "2024-12-14 16:07:08.152 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/.[0-9362).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/.[0-9362).parquet_cache/locks/2024-12-14T16:07:08.144266.json\n", - "2024-12-14 16:07:08.153 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 16:07:08.153 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.153615. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.153 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 16:07:08.153 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.154 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 16:07:08.158 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_start\n", - "2024-12-14 16:07:08.158 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", - "2024-12-14 16:07:08.158 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", - "2024-12-14 16:07:08.158 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce([col(\"starttime\").str.strptime([String(raise)])]).is_not_null()\n", - "2024-12-14 16:07:08.158 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_stop\n", - "2024-12-14 16:07:08.159 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", - "2024-12-14 16:07:08.159 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column stoptime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", - "2024-12-14 16:07:08.159 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"stoptime\").str.strptime([String(raise)]).coalesce([col(\"stoptime\").str.strptime([String(raise)])]).is_not_null()\n", - "2024-12-14 16:07:08.159 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 16:07:08.164 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010973\n", - "2024-12-14 16:07:08.164 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/pharmacy/.[0-15306).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/pharmacy/.[0-15306).parquet_cache/locks/2024-12-14T16:07:08.153615.json\n", - "2024-12-14 16:07:08.165 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 16:07:08.165 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.165366. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.165 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 16:07:08.165 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.165 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 16:07:08.169 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hcpcs\n", - "2024-12-14 16:07:08.169 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column short_description\n", - "2024-12-14 16:07:08.169 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", - "2024-12-14 16:07:08.170 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.170 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 16:07:08.171 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.005897\n", - "2024-12-14 16:07:08.171 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/hcpcsevents/.[0-61).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/hcpcsevents/.[0-61).parquet_cache/locks/2024-12-14T16:07:08.165366.json\n", - "2024-12-14 16:07:08.171 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 16:07:08.172 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.172125. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.172 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 16:07:08.172 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.172 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 16:07:08.176 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drg\n", - "2024-12-14 16:07:08.176 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column description\n", - "2024-12-14 16:07:08.176 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_type\n", - "2024-12-14 16:07:08.176 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_code\n", - "2024-12-14 16:07:08.176 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.177 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.177 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 16:07:08.178 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006754\n", - "2024-12-14 16:07:08.178 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/.[0-454).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/.[0-454).parquet_cache/locks/2024-12-14T16:07:08.172125.json\n", - "2024-12-14 16:07:08.179 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 16:07:08.180 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.179926. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.180 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 16:07:08.180 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.180 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 16:07:08.184 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting procedure\n", - "2024-12-14 16:07:08.184 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", - "2024-12-14 16:07:08.184 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", - "2024-12-14 16:07:08.184 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", - "2024-12-14 16:07:08.185 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.185 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 16:07:08.186 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006649\n", - "2024-12-14 16:07:08.186 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/procedures_icd/.[0-722).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/procedures_icd/.[0-722).parquet_cache/locks/2024-12-14T16:07:08.179926.json\n", - "2024-12-14 16:07:08.187 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 16:07:08.187 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.187501. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.187 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 16:07:08.187 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.188 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/labevents/[0-107727).parquet\n", - "2024-12-14 16:07:08.192 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", - "2024-12-14 16:07:08.192 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", - "2024-12-14 16:07:08.192 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.192 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.193 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.193 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 16:07:08.197 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010020\n", - "2024-12-14 16:07:08.197 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/labevents/.[0-107727).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/labevents/.[0-107727).parquet_cache/locks/2024-12-14T16:07:08.187501.json\n", - "2024-12-14 16:07:08.198 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 16:07:08.199 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.198888. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.199 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 16:07:08.199 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.199 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/inputevents/[0-20404).parquet\n", - "2024-12-14 16:07:08.203 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_start\n", - "2024-12-14 16:07:08.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_end\n", - "2024-12-14 16:07:08.205 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.205 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.206 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.206 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting subject_weight\n", - "2024-12-14 16:07:08.206 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.206 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.206 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 16:07:08.213 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.014441\n", - "2024-12-14 16:07:08.213 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/.[0-20404).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/.[0-20404).parquet_cache/locks/2024-12-14T16:07:08.198888.json\n", - "2024-12-14 16:07:08.213 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/icustays/[0-140).parquet\n", - "2024-12-14 16:07:08.214 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.214164. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.214 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", - "2024-12-14 16:07:08.214 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.214 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/icustays/[0-140).parquet\n", - "2024-12-14 16:07:08.218 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_admission\n", - "2024-12-14 16:07:08.219 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column first_careunit\n", - "2024-12-14 16:07:08.219 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.219 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.219 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_discharge\n", - "2024-12-14 16:07:08.219 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column last_careunit\n", - "2024-12-14 16:07:08.219 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column outtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.219 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"outtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.219 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/icustays/[0-140).parquet\n", - "2024-12-14 16:07:08.222 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007988\n", - "2024-12-14 16:07:08.222 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/icustays/.[0-140).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/icustays/.[0-140).parquet_cache/locks/2024-12-14T16:07:08.214164.json\n", - "2024-12-14 16:07:08.222 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 16:07:08.223 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.223318. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.223 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 16:07:08.223 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.223 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/transfers/[0-1190).parquet\n", - "2024-12-14 16:07:08.227 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting transfer\n", - "2024-12-14 16:07:08.227 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column eventtype\n", - "2024-12-14 16:07:08.227 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column careunit\n", - "2024-12-14 16:07:08.228 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.228 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.228 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 16:07:08.230 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006777\n", - "2024-12-14 16:07:08.230 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/transfers/.[0-1190).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/transfers/.[0-1190).parquet_cache/locks/2024-12-14T16:07:08.223318.json\n", - "2024-12-14 16:07:08.230 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/omr/[0-2964).parquet\n", - "2024-12-14 16:07:08.230 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.230725. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.230 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", - "2024-12-14 16:07:08.231 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.231 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/omr/[0-2964).parquet\n", - "2024-12-14 16:07:08.234 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting omr\n", - "2024-12-14 16:07:08.234 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column result_name\n", - "2024-12-14 16:07:08.234 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", - "2024-12-14 16:07:08.235 | WARNING | MEDS_transforms.extract.convert_to_sharded_events:extract_event:507 - Source column 'col(result_value)' for event column text_value is always interpreted as a column name. Removing col() function call and setting source column to result_value.\n", - "2024-12-14 16:07:08.235 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:558 - Filtering out rows with null codes via col(\"result_name\").is_not_null()\n", - "2024-12-14 16:07:08.235 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.235 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/omr/[0-2964).parquet\n", - "2024-12-14 16:07:08.237 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006615\n", - "2024-12-14 16:07:08.237 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/omr/.[0-2964).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/omr/.[0-2964).parquet_cache/locks/2024-12-14T16:07:08.230725.json\n", - "2024-12-14 16:07:08.238 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 16:07:08.238 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.238859. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.239 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 16:07:08.239 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.239 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 16:07:08.244 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting start\n", - "2024-12-14 16:07:08.244 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.244 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.245 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.245 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting end\n", - "2024-12-14 16:07:08.245 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.245 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.245 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.245 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 16:07:08.257 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.017998\n", - "2024-12-14 16:07:08.257 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/.[0-1468).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/.[0-1468).parquet_cache/locks/2024-12-14T16:07:08.238859.json\n", - "2024-12-14 16:07:08.260 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 16:07:08.262 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.261546. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.263 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 16:07:08.263 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.264 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 16:07:08.279 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", - "2024-12-14 16:07:08.280 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", - "2024-12-14 16:07:08.280 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", - "2024-12-14 16:07:08.280 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.280 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.281 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 16:07:08.286 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.024554\n", - "2024-12-14 16:07:08.286 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/.[0-4506).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/.[0-4506).parquet_cache/locks/2024-12-14T16:07:08.261546.json\n", - "2024-12-14 16:07:08.286 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/emar/[0-35835).parquet\n", - "2024-12-14 16:07:08.287 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.287021. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.287 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", - "2024-12-14 16:07:08.287 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/emar/[0-35835).parquet\n", - "2024-12-14 16:07:08.291 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication\n", - "2024-12-14 16:07:08.291 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", - "2024-12-14 16:07:08.291 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column event_txt\n", - "2024-12-14 16:07:08.292 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.292 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.292 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/emar/[0-35835).parquet\n", - "2024-12-14 16:07:08.310 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.023385\n", - "2024-12-14 16:07:08.310 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/emar/.[0-35835).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/emar/.[0-35835).parquet_cache/locks/2024-12-14T16:07:08.287021.json\n", - "2024-12-14 16:07:08.311 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/[0-275).parquet\n", - "2024-12-14 16:07:08.311 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.311468. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.311 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", - "2024-12-14 16:07:08.311 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.312 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/admissions/[0-275).parquet\n", - "2024-12-14 16:07:08.316 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_registration\n", - "2024-12-14 16:07:08.316 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edregtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.316 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edregtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.316 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_out\n", - "2024-12-14 16:07:08.316 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edouttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.316 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edouttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.316 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission\n", - "2024-12-14 16:07:08.317 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_type\n", - "2024-12-14 16:07:08.317 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_location\n", - "2024-12-14 16:07:08.317 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column admittime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.317 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admittime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.317 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting discharge\n", - "2024-12-14 16:07:08.317 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column discharge_location\n", - "2024-12-14 16:07:08.317 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dischtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.318 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dischtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.318 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/[0-275).parquet\n", - "2024-12-14 16:07:08.323 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.012409\n", - "2024-12-14 16:07:08.323 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/.[0-275).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/.[0-275).parquet_cache/locks/2024-12-14T16:07:08.311468.json\n", - "2024-12-14 16:07:08.324 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/patients/[0-100).parquet\n", - "2024-12-14 16:07:08.324 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.324714. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.324 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", - "2024-12-14 16:07:08.325 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.325 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/patients/[0-100).parquet\n", - "2024-12-14 16:07:08.329 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", - "2024-12-14 16:07:08.329 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", - "2024-12-14 16:07:08.329 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", - "2024-12-14 16:07:08.329 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", - "2024-12-14 16:07:08.329 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column year_of_birth in possible formats %Y\n", - "2024-12-14 16:07:08.329 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"year_of_birth\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.329 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting death\n", - "2024-12-14 16:07:08.330 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dod in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", - "2024-12-14 16:07:08.330 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dod\").str.strptime([String(raise)]).coalesce([col(\"dod\").str.strptime([String(raise)])]).is_not_null()\n", - "2024-12-14 16:07:08.330 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/patients/[0-100).parquet\n", - "2024-12-14 16:07:08.332 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008220\n", - "2024-12-14 16:07:08.333 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/patients/.[0-100).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/patients/.[0-100).parquet_cache/locks/2024-12-14T16:07:08.324714.json\n", - "2024-12-14 16:07:08.333 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 16:07:08.333 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.333867. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.334 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 16:07:08.334 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.334 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/chartevents/[0-668862).parquet\n", - "2024-12-14 16:07:08.338 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting event\n", - "2024-12-14 16:07:08.339 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", - "2024-12-14 16:07:08.339 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.339 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.339 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.339 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 16:07:08.544 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.210303\n", - "2024-12-14 16:07:08.544 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/chartevents/.[0-668862).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/chartevents/.[0-668862).parquet_cache/locks/2024-12-14T16:07:08.333867.json\n", - "2024-12-14 16:07:08.544 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:08.545 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.545351. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.545 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:08.545 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.546 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:08.550 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting output\n", - "2024-12-14 16:07:08.551 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", - "2024-12-14 16:07:08.551 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.551 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.551 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.551 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:08.556 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.011422\n", - "2024-12-14 16:07:08.556 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/.[0-9362).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/.[0-9362).parquet_cache/locks/2024-12-14T16:07:08.545351.json\n", - "2024-12-14 16:07:08.557 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 16:07:08.558 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.557861. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.558 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 16:07:08.558 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.558 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 16:07:08.562 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_start\n", - "2024-12-14 16:07:08.562 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", - "2024-12-14 16:07:08.562 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", - "2024-12-14 16:07:08.563 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce([col(\"starttime\").str.strptime([String(raise)])]).is_not_null()\n", - "2024-12-14 16:07:08.563 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_stop\n", - "2024-12-14 16:07:08.563 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", - "2024-12-14 16:07:08.563 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column stoptime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", - "2024-12-14 16:07:08.564 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"stoptime\").str.strptime([String(raise)]).coalesce([col(\"stoptime\").str.strptime([String(raise)])]).is_not_null()\n", - "2024-12-14 16:07:08.564 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 16:07:08.581 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.023592\n", - "2024-12-14 16:07:08.581 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/pharmacy/.[0-15306).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/pharmacy/.[0-15306).parquet_cache/locks/2024-12-14T16:07:08.557861.json\n", - "2024-12-14 16:07:08.582 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 16:07:08.582 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.582313. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.582 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 16:07:08.582 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.582 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 16:07:08.586 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hcpcs\n", - "2024-12-14 16:07:08.587 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column short_description\n", - "2024-12-14 16:07:08.587 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", - "2024-12-14 16:07:08.587 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.587 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 16:07:08.588 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006400\n", - "2024-12-14 16:07:08.588 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/hcpcsevents/.[0-61).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/hcpcsevents/.[0-61).parquet_cache/locks/2024-12-14T16:07:08.582313.json\n", - "2024-12-14 16:07:08.589 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 16:07:08.589 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.589478. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.589 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 16:07:08.589 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.589 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 16:07:08.593 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drg\n", - "2024-12-14 16:07:08.593 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column description\n", - "2024-12-14 16:07:08.594 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_type\n", - "2024-12-14 16:07:08.594 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_code\n", - "2024-12-14 16:07:08.594 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.594 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.594 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 16:07:08.596 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007142\n", - "2024-12-14 16:07:08.596 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/.[0-454).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/.[0-454).parquet_cache/locks/2024-12-14T16:07:08.589478.json\n", - "2024-12-14 16:07:08.597 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 16:07:08.597 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.597686. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.597 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 16:07:08.598 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.598 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 16:07:08.602 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting procedure\n", - "2024-12-14 16:07:08.602 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", - "2024-12-14 16:07:08.602 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", - "2024-12-14 16:07:08.602 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", - "2024-12-14 16:07:08.602 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.603 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 16:07:08.605 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007402\n", - "2024-12-14 16:07:08.605 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/procedures_icd/.[0-722).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/procedures_icd/.[0-722).parquet_cache/locks/2024-12-14T16:07:08.597686.json\n", - "2024-12-14 16:07:08.605 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 16:07:08.606 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.606130. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.606 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 16:07:08.606 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.606 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/labevents/[0-107727).parquet\n", - "2024-12-14 16:07:08.610 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", - "2024-12-14 16:07:08.611 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", - "2024-12-14 16:07:08.611 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.611 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.611 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.611 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 16:07:08.642 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.036436\n", - "2024-12-14 16:07:08.642 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/labevents/.[0-107727).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/labevents/.[0-107727).parquet_cache/locks/2024-12-14T16:07:08.606130.json\n", - "2024-12-14 16:07:08.643 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 16:07:08.644 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.644292. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.644 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 16:07:08.644 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.644 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/inputevents/[0-20404).parquet\n", - "2024-12-14 16:07:08.649 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_start\n", - "2024-12-14 16:07:08.649 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.649 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.650 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.650 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_end\n", - "2024-12-14 16:07:08.651 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 16:07:08.651 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.651 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.651 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting subject_weight\n", - "2024-12-14 16:07:08.652 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.652 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.652 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 16:07:08.676 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.032631\n", - "2024-12-14 16:07:08.677 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/.[0-20404).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/.[0-20404).parquet_cache/locks/2024-12-14T16:07:08.644292.json\n", - "2024-12-14 16:07:08.678 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/icustays/[0-140).parquet\n", - "2024-12-14 16:07:08.678 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:08.678769. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:08.679 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", - "2024-12-14 16:07:08.679 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:08.679 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/icustays/[0-140).parquet\n", - "2024-12-14 16:07:08.683 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_admission\n", - "2024-12-14 16:07:08.683 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column first_careunit\n", - "2024-12-14 16:07:08.683 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.683 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.683 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_discharge\n", - "2024-12-14 16:07:08.684 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column last_careunit\n", - "2024-12-14 16:07:08.684 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column outtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 16:07:08.684 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"outtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 16:07:08.684 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/icustays/[0-140).parquet\n", - "2024-12-14 16:07:08.686 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008114\n", - "2024-12-14 16:07:08.686 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/icustays/.[0-140).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/icustays/.[0-140).parquet_cache/locks/2024-12-14T16:07:08.678769.json\n", - "2024-12-14 16:07:08.687 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:823 - Subsharded into converted events.\n", + "2024-12-14 17:15:32.904 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.905 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 17:15:32.915 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.017034\n", + "2024-12-14 17:15:32.915 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/labevents/.[0-107727).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/labevents/.[0-107727).parquet_cache/locks/2024-12-14T17:15:32.898624.json\n", + "2024-12-14 17:15:32.916 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 17:15:32.917 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.917013. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:32.917 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 17:15:32.917 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:32.917 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 17:15:32.921 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting start\n", + "2024-12-14 17:15:32.922 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:32.922 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.922 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.922 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting end\n", + "2024-12-14 17:15:32.923 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:32.923 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.923 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.923 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 17:15:32.926 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.009541\n", + "2024-12-14 17:15:32.926 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/.[0-1468).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/.[0-1468).parquet_cache/locks/2024-12-14T17:15:32.917013.json\n", + "2024-12-14 17:15:32.927 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 17:15:32.927 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.927549. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:32.927 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 17:15:32.927 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:32.928 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 17:15:32.931 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_start\n", + "2024-12-14 17:15:32.932 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 17:15:32.932 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 17:15:32.932 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce([col(\"starttime\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 17:15:32.932 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_stop\n", + "2024-12-14 17:15:32.933 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 17:15:32.933 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column stoptime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 17:15:32.933 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"stoptime\").str.strptime([String(raise)]).coalesce([col(\"stoptime\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 17:15:32.933 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 17:15:32.940 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.013041\n", + "2024-12-14 17:15:32.940 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/pharmacy/.[0-15306).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/pharmacy/.[0-15306).parquet_cache/locks/2024-12-14T17:15:32.927549.json\n", + "2024-12-14 17:15:32.941 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/[0-275).parquet\n", + "2024-12-14 17:15:32.941 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.941739. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:32.942 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", + "2024-12-14 17:15:32.942 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:32.942 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/admissions/[0-275).parquet\n", + "2024-12-14 17:15:32.946 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_registration\n", + "2024-12-14 17:15:32.946 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edregtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.946 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edregtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.946 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_out\n", + "2024-12-14 17:15:32.946 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edouttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.946 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edouttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.946 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission\n", + "2024-12-14 17:15:32.947 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_location\n", + "2024-12-14 17:15:32.947 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_type\n", + "2024-12-14 17:15:32.947 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column admittime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.947 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admittime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.947 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting discharge\n", + "2024-12-14 17:15:32.948 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column discharge_location\n", + "2024-12-14 17:15:32.948 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dischtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.948 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dischtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.948 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/[0-275).parquet\n", + "2024-12-14 17:15:32.955 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.013267\n", + "2024-12-14 17:15:32.955 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/.[0-275).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/.[0-275).parquet_cache/locks/2024-12-14T17:15:32.941739.json\n", + "2024-12-14 17:15:32.955 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 17:15:32.956 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.955912. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:32.956 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 17:15:32.956 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:32.956 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/transfers/[0-1190).parquet\n", + "2024-12-14 17:15:32.973 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting transfer\n", + "2024-12-14 17:15:32.975 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column careunit\n", + "2024-12-14 17:15:32.976 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column eventtype\n", + "2024-12-14 17:15:32.977 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.978 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.979 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 17:15:32.981 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.025516\n", + "2024-12-14 17:15:32.981 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/transfers/.[0-1190).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/transfers/.[0-1190).parquet_cache/locks/2024-12-14T17:15:32.955912.json\n", + "2024-12-14 17:15:32.983 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 17:15:32.986 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.984685. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:32.987 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 17:15:32.987 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:32.987 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/chartevents/[0-668862).parquet\n", + "2024-12-14 17:15:32.991 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting event\n", + "2024-12-14 17:15:32.992 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:32.992 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 17:15:32.992 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:32.992 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:32.992 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 17:15:33.040 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.056118\n", + "2024-12-14 17:15:33.040 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/chartevents/.[0-668862).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/chartevents/.[0-668862).parquet_cache/locks/2024-12-14T17:15:32.984685.json\n", + "2024-12-14 17:15:33.041 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/omr/[0-2964).parquet\n", + "2024-12-14 17:15:33.041 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.041805. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.042 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", + "2024-12-14 17:15:33.042 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.042 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/omr/[0-2964).parquet\n", + "2024-12-14 17:15:33.046 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting omr\n", + "2024-12-14 17:15:33.046 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column result_name\n", + "2024-12-14 17:15:33.046 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 17:15:33.046 | WARNING | MEDS_transforms.extract.convert_to_sharded_events:extract_event:507 - Source column 'col(result_value)' for event column text_value is always interpreted as a column name. Removing col() function call and setting source column to result_value.\n", + "2024-12-14 17:15:33.046 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:558 - Filtering out rows with null codes via col(\"result_name\").is_not_null()\n", + "2024-12-14 17:15:33.047 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.047 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/omr/[0-2964).parquet\n", + "2024-12-14 17:15:33.049 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007480\n", + "2024-12-14 17:15:33.049 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/omr/.[0-2964).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/omr/.[0-2964).parquet_cache/locks/2024-12-14T17:15:33.041805.json\n", + "2024-12-14 17:15:33.050 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 17:15:33.050 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.050716. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.051 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 17:15:33.051 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.051 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/outputevents/[0-9362).parquet\n", + "2024-12-14 17:15:33.055 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting output\n", + "2024-12-14 17:15:33.056 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:33.056 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 17:15:33.056 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.056 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.056 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 17:15:33.059 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008712\n", + "2024-12-14 17:15:33.059 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/.[0-9362).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/.[0-9362).parquet_cache/locks/2024-12-14T17:15:33.050716.json\n", + "2024-12-14 17:15:33.060 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/emar/[0-35835).parquet\n", + "2024-12-14 17:15:33.060 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.060372. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.060 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", + "2024-12-14 17:15:33.060 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.061 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/emar/[0-35835).parquet\n", + "2024-12-14 17:15:33.065 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication\n", + "2024-12-14 17:15:33.066 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column event_txt\n", + "2024-12-14 17:15:33.066 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 17:15:33.066 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.066 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.066 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/emar/[0-35835).parquet\n", + "2024-12-14 17:15:33.071 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.011269\n", + "2024-12-14 17:15:33.071 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/emar/.[0-35835).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/emar/.[0-35835).parquet_cache/locks/2024-12-14T17:15:33.060372.json\n", + "2024-12-14 17:15:33.072 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/patients/[0-100).parquet\n", + "2024-12-14 17:15:33.072 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.072660. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.072 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", + "2024-12-14 17:15:33.072 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.073 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/patients/[0-100).parquet\n", + "2024-12-14 17:15:33.077 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", + "2024-12-14 17:15:33.077 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", + "2024-12-14 17:15:33.077 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", + "2024-12-14 17:15:33.077 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", + "2024-12-14 17:15:33.077 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column year_of_birth in possible formats %Y\n", + "2024-12-14 17:15:33.077 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"year_of_birth\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.077 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting death\n", + "2024-12-14 17:15:33.077 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dod in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 17:15:33.077 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dod\").str.strptime([String(raise)]).coalesce([col(\"dod\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 17:15:33.078 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/patients/[0-100).parquet\n", + "2024-12-14 17:15:33.080 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007929\n", + "2024-12-14 17:15:33.080 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/patients/.[0-100).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/patients/.[0-100).parquet_cache/locks/2024-12-14T17:15:33.072660.json\n", + "2024-12-14 17:15:33.081 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 17:15:33.081 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.081784. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.082 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 17:15:33.082 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.082 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 17:15:33.086 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting procedure\n", + "2024-12-14 17:15:33.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", + "2024-12-14 17:15:33.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", + "2024-12-14 17:15:33.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 17:15:33.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.087 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 17:15:33.089 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007410\n", + "2024-12-14 17:15:33.089 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/procedures_icd/.[0-722).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/procedures_icd/.[0-722).parquet_cache/locks/2024-12-14T17:15:33.081784.json\n", + "2024-12-14 17:15:33.089 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 17:15:33.090 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.090034. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.090 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 17:15:33.090 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.090 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 17:15:33.094 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hcpcs\n", + "2024-12-14 17:15:33.094 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column short_description\n", + "2024-12-14 17:15:33.094 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 17:15:33.095 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.095 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 17:15:33.096 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006395\n", + "2024-12-14 17:15:33.096 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/hcpcsevents/.[0-61).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/hcpcsevents/.[0-61).parquet_cache/locks/2024-12-14T17:15:33.090034.json\n", + "2024-12-14 17:15:33.096 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/icustays/[0-140).parquet\n", + "2024-12-14 17:15:33.097 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.097353. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.097 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", + "2024-12-14 17:15:33.097 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.097 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/icustays/[0-140).parquet\n", + "2024-12-14 17:15:33.101 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_admission\n", + "2024-12-14 17:15:33.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column first_careunit\n", + "2024-12-14 17:15:33.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_discharge\n", + "2024-12-14 17:15:33.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column last_careunit\n", + "2024-12-14 17:15:33.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column outtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"outtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.103 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/icustays/[0-140).parquet\n", + "2024-12-14 17:15:33.105 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007910\n", + "2024-12-14 17:15:33.105 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/icustays/.[0-140).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/icustays/.[0-140).parquet_cache/locks/2024-12-14T17:15:33.097353.json\n", + "2024-12-14 17:15:33.105 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 17:15:33.106 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.106087. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.106 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 17:15:33.106 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.106 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 17:15:33.110 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drg\n", + "2024-12-14 17:15:33.110 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_type\n", + "2024-12-14 17:15:33.110 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_code\n", + "2024-12-14 17:15:33.110 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column description\n", + "2024-12-14 17:15:33.111 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.111 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.111 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 17:15:33.113 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006962\n", + "2024-12-14 17:15:33.113 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/.[0-454).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/.[0-454).parquet_cache/locks/2024-12-14T17:15:33.106087.json\n", + "2024-12-14 17:15:33.114 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 17:15:33.114 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.114649. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.114 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 17:15:33.114 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.115 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/inputevents/[0-20404).parquet\n", + "2024-12-14 17:15:33.119 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_start\n", + "2024-12-14 17:15:33.120 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:33.120 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.120 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.120 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_end\n", + "2024-12-14 17:15:33.121 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:33.121 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.122 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.122 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting subject_weight\n", + "2024-12-14 17:15:33.122 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.122 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.122 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 17:15:33.134 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.020059\n", + "2024-12-14 17:15:33.134 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/.[0-20404).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/.[0-20404).parquet_cache/locks/2024-12-14T17:15:33.114649.json\n", + "2024-12-14 17:15:33.135 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 17:15:33.135 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.135615. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.135 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 17:15:33.135 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.136 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 17:15:33.140 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", + "2024-12-14 17:15:33.140 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", + "2024-12-14 17:15:33.140 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", + "2024-12-14 17:15:33.140 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.140 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.140 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 17:15:33.142 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007186\n", + "2024-12-14 17:15:33.142 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/.[0-4506).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/.[0-4506).parquet_cache/locks/2024-12-14T17:15:33.135615.json\n", + "2024-12-14 17:15:33.143 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 17:15:33.143 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.143737. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.143 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 17:15:33.144 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.144 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/labevents/[0-107727).parquet\n", + "2024-12-14 17:15:33.148 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", + "2024-12-14 17:15:33.149 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:33.149 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 17:15:33.149 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.149 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.149 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 17:15:33.163 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.020243\n", + "2024-12-14 17:15:33.164 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/labevents/.[0-107727).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/labevents/.[0-107727).parquet_cache/locks/2024-12-14T17:15:33.143737.json\n", + "2024-12-14 17:15:33.165 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 17:15:33.165 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.165607. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.165 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 17:15:33.165 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.166 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 17:15:33.170 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting start\n", + "2024-12-14 17:15:33.171 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:33.171 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.171 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.171 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting end\n", + "2024-12-14 17:15:33.172 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:33.172 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.172 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.172 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 17:15:33.175 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.009748\n", + "2024-12-14 17:15:33.175 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/.[0-1468).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/.[0-1468).parquet_cache/locks/2024-12-14T17:15:33.165607.json\n", + "2024-12-14 17:15:33.176 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 17:15:33.176 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.176289. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.176 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 17:15:33.176 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.176 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 17:15:33.180 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_start\n", + "2024-12-14 17:15:33.181 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 17:15:33.181 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 17:15:33.181 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce([col(\"starttime\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 17:15:33.181 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_stop\n", + "2024-12-14 17:15:33.181 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 17:15:33.181 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column stoptime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 17:15:33.182 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"stoptime\").str.strptime([String(raise)]).coalesce([col(\"stoptime\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 17:15:33.182 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 17:15:33.190 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.014067\n", + "2024-12-14 17:15:33.190 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/pharmacy/.[0-15306).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/pharmacy/.[0-15306).parquet_cache/locks/2024-12-14T17:15:33.176289.json\n", + "2024-12-14 17:15:33.191 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/[0-275).parquet\n", + "2024-12-14 17:15:33.191 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.191592. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.191 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", + "2024-12-14 17:15:33.191 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.192 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/admissions/[0-275).parquet\n", + "2024-12-14 17:15:33.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_registration\n", + "2024-12-14 17:15:33.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edregtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edregtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_out\n", + "2024-12-14 17:15:33.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edouttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edouttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission\n", + "2024-12-14 17:15:33.197 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_location\n", + "2024-12-14 17:15:33.197 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_type\n", + "2024-12-14 17:15:33.197 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column admittime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.197 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admittime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.197 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting discharge\n", + "2024-12-14 17:15:33.198 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column discharge_location\n", + "2024-12-14 17:15:33.198 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dischtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.198 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dischtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.198 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/[0-275).parquet\n", + "2024-12-14 17:15:33.202 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.011248\n", + "2024-12-14 17:15:33.202 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/.[0-275).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/.[0-275).parquet_cache/locks/2024-12-14T17:15:33.191592.json\n", + "2024-12-14 17:15:33.203 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 17:15:33.203 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.203695. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.203 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 17:15:33.204 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/transfers/[0-1190).parquet\n", + "2024-12-14 17:15:33.207 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting transfer\n", + "2024-12-14 17:15:33.208 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column careunit\n", + "2024-12-14 17:15:33.208 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column eventtype\n", + "2024-12-14 17:15:33.208 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.208 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.208 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 17:15:33.210 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006506\n", + "2024-12-14 17:15:33.210 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/transfers/.[0-1190).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/transfers/.[0-1190).parquet_cache/locks/2024-12-14T17:15:33.203695.json\n", + "2024-12-14 17:15:33.210 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 17:15:33.211 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.211185. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.211 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 17:15:33.211 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.211 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/chartevents/[0-668862).parquet\n", + "2024-12-14 17:15:33.215 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting event\n", + "2024-12-14 17:15:33.216 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:33.216 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 17:15:33.216 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.216 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.216 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 17:15:33.286 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.075577\n", + "2024-12-14 17:15:33.286 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/chartevents/.[0-668862).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/chartevents/.[0-668862).parquet_cache/locks/2024-12-14T17:15:33.211185.json\n", + "2024-12-14 17:15:33.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/omr/[0-2964).parquet\n", + "2024-12-14 17:15:33.287 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.287673. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.287 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", + "2024-12-14 17:15:33.288 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.288 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/omr/[0-2964).parquet\n", + "2024-12-14 17:15:33.291 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting omr\n", + "2024-12-14 17:15:33.292 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column result_name\n", + "2024-12-14 17:15:33.292 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 17:15:33.292 | WARNING | MEDS_transforms.extract.convert_to_sharded_events:extract_event:507 - Source column 'col(result_value)' for event column text_value is always interpreted as a column name. Removing col() function call and setting source column to result_value.\n", + "2024-12-14 17:15:33.292 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:558 - Filtering out rows with null codes via col(\"result_name\").is_not_null()\n", + "2024-12-14 17:15:33.292 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.292 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/omr/[0-2964).parquet\n", + "2024-12-14 17:15:33.294 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007073\n", + "2024-12-14 17:15:33.294 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/omr/.[0-2964).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/omr/.[0-2964).parquet_cache/locks/2024-12-14T17:15:33.287673.json\n", + "2024-12-14 17:15:33.295 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 17:15:33.295 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.295797. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.296 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 17:15:33.296 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.296 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/outputevents/[0-9362).parquet\n", + "2024-12-14 17:15:33.300 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting output\n", + "2024-12-14 17:15:33.301 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:33.301 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 17:15:33.301 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.301 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.301 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 17:15:33.304 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008910\n", + "2024-12-14 17:15:33.304 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/.[0-9362).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/.[0-9362).parquet_cache/locks/2024-12-14T17:15:33.295797.json\n", + "2024-12-14 17:15:33.305 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/emar/[0-35835).parquet\n", + "2024-12-14 17:15:33.305 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.305567. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.305 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", + "2024-12-14 17:15:33.305 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.306 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/emar/[0-35835).parquet\n", + "2024-12-14 17:15:33.310 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication\n", + "2024-12-14 17:15:33.311 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column event_txt\n", + "2024-12-14 17:15:33.311 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 17:15:33.311 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.311 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.311 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/emar/[0-35835).parquet\n", + "2024-12-14 17:15:33.318 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.013312\n", + "2024-12-14 17:15:33.318 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/emar/.[0-35835).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/emar/.[0-35835).parquet_cache/locks/2024-12-14T17:15:33.305567.json\n", + "2024-12-14 17:15:33.319 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/patients/[0-100).parquet\n", + "2024-12-14 17:15:33.320 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.319931. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.320 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", + "2024-12-14 17:15:33.320 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.320 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/patients/[0-100).parquet\n", + "2024-12-14 17:15:33.324 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", + "2024-12-14 17:15:33.324 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", + "2024-12-14 17:15:33.324 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", + "2024-12-14 17:15:33.324 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", + "2024-12-14 17:15:33.324 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column year_of_birth in possible formats %Y\n", + "2024-12-14 17:15:33.324 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"year_of_birth\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.324 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting death\n", + "2024-12-14 17:15:33.325 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dod in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 17:15:33.325 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dod\").str.strptime([String(raise)]).coalesce([col(\"dod\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 17:15:33.325 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/patients/[0-100).parquet\n", + "2024-12-14 17:15:33.328 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008224\n", + "2024-12-14 17:15:33.328 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/patients/.[0-100).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/patients/.[0-100).parquet_cache/locks/2024-12-14T17:15:33.319931.json\n", + "2024-12-14 17:15:33.328 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 17:15:33.329 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.329021. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.329 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 17:15:33.329 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.329 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 17:15:33.333 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting procedure\n", + "2024-12-14 17:15:33.333 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", + "2024-12-14 17:15:33.333 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", + "2024-12-14 17:15:33.333 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 17:15:33.334 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.334 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 17:15:33.336 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006998\n", + "2024-12-14 17:15:33.336 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/procedures_icd/.[0-722).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/procedures_icd/.[0-722).parquet_cache/locks/2024-12-14T17:15:33.329021.json\n", + "2024-12-14 17:15:33.336 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 17:15:33.337 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.337131. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.337 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 17:15:33.337 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.337 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 17:15:33.341 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hcpcs\n", + "2024-12-14 17:15:33.342 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column short_description\n", + "2024-12-14 17:15:33.342 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 17:15:33.342 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.342 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 17:15:33.344 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007560\n", + "2024-12-14 17:15:33.344 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/hcpcsevents/.[0-61).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/hcpcsevents/.[0-61).parquet_cache/locks/2024-12-14T17:15:33.337131.json\n", + "2024-12-14 17:15:33.345 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/icustays/[0-140).parquet\n", + "2024-12-14 17:15:33.345 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.345648. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.345 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", + "2024-12-14 17:15:33.345 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.346 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/icustays/[0-140).parquet\n", + "2024-12-14 17:15:33.350 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_admission\n", + "2024-12-14 17:15:33.350 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column first_careunit\n", + "2024-12-14 17:15:33.350 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.350 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.350 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_discharge\n", + "2024-12-14 17:15:33.351 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column last_careunit\n", + "2024-12-14 17:15:33.351 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column outtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.351 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"outtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.352 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/icustays/[0-140).parquet\n", + "2024-12-14 17:15:33.363 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.017417\n", + "2024-12-14 17:15:33.363 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/icustays/.[0-140).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/icustays/.[0-140).parquet_cache/locks/2024-12-14T17:15:33.345648.json\n", + "2024-12-14 17:15:33.366 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 17:15:33.369 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.368406. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.371 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 17:15:33.371 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.373 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 17:15:33.386 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drg\n", + "2024-12-14 17:15:33.387 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_type\n", + "2024-12-14 17:15:33.387 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_code\n", + "2024-12-14 17:15:33.387 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column description\n", + "2024-12-14 17:15:33.387 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.387 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.387 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 17:15:33.390 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.022108\n", + "2024-12-14 17:15:33.390 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/.[0-454).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/.[0-454).parquet_cache/locks/2024-12-14T17:15:33.368406.json\n", + "2024-12-14 17:15:33.391 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 17:15:33.392 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.392174. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.392 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 17:15:33.392 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.392 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/inputevents/[0-20404).parquet\n", + "2024-12-14 17:15:33.397 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_start\n", + "2024-12-14 17:15:33.398 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:33.398 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.398 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.398 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_end\n", + "2024-12-14 17:15:33.399 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:33.399 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.400 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.400 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting subject_weight\n", + "2024-12-14 17:15:33.400 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.401 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.401 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 17:15:33.420 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.028113\n", + "2024-12-14 17:15:33.420 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/.[0-20404).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/.[0-20404).parquet_cache/locks/2024-12-14T17:15:33.392174.json\n", + "2024-12-14 17:15:33.421 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 17:15:33.421 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.421362. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.421 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 17:15:33.421 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.422 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 17:15:33.426 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", + "2024-12-14 17:15:33.426 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", + "2024-12-14 17:15:33.426 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", + "2024-12-14 17:15:33.426 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.426 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.426 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 17:15:33.429 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008179\n", + "2024-12-14 17:15:33.429 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/.[0-4506).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/.[0-4506).parquet_cache/locks/2024-12-14T17:15:33.421362.json\n", + "2024-12-14 17:15:33.430 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 17:15:33.430 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.430538. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.430 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 17:15:33.430 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.431 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/labevents/[0-107727).parquet\n", + "2024-12-14 17:15:33.436 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", + "2024-12-14 17:15:33.436 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:33.436 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 17:15:33.436 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.437 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.437 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 17:15:33.458 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.028130\n", + "2024-12-14 17:15:33.458 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/labevents/.[0-107727).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/labevents/.[0-107727).parquet_cache/locks/2024-12-14T17:15:33.430538.json\n", + "2024-12-14 17:15:33.459 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 17:15:33.460 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.460250. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.460 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 17:15:33.460 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.460 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 17:15:33.465 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting start\n", + "2024-12-14 17:15:33.466 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:33.466 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.466 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.466 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting end\n", + "2024-12-14 17:15:33.466 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:33.467 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.467 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.467 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 17:15:33.470 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010316\n", + "2024-12-14 17:15:33.470 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/.[0-1468).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/.[0-1468).parquet_cache/locks/2024-12-14T17:15:33.460250.json\n", + "2024-12-14 17:15:33.471 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 17:15:33.471 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.471585. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.471 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 17:15:33.471 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.472 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 17:15:33.476 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_start\n", + "2024-12-14 17:15:33.476 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 17:15:33.476 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 17:15:33.476 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce([col(\"starttime\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 17:15:33.477 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_stop\n", + "2024-12-14 17:15:33.477 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 17:15:33.477 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column stoptime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 17:15:33.477 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"stoptime\").str.strptime([String(raise)]).coalesce([col(\"stoptime\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 17:15:33.477 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 17:15:33.490 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.019048\n", + "2024-12-14 17:15:33.490 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/pharmacy/.[0-15306).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/pharmacy/.[0-15306).parquet_cache/locks/2024-12-14T17:15:33.471585.json\n", + "2024-12-14 17:15:33.491 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/[0-275).parquet\n", + "2024-12-14 17:15:33.491 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.491829. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.492 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", + "2024-12-14 17:15:33.492 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.492 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/admissions/[0-275).parquet\n", + "2024-12-14 17:15:33.496 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_registration\n", + "2024-12-14 17:15:33.496 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edregtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.496 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edregtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.496 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_out\n", + "2024-12-14 17:15:33.496 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edouttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.496 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edouttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.497 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission\n", + "2024-12-14 17:15:33.497 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_location\n", + "2024-12-14 17:15:33.497 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_type\n", + "2024-12-14 17:15:33.497 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column admittime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.497 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admittime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.497 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting discharge\n", + "2024-12-14 17:15:33.498 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column discharge_location\n", + "2024-12-14 17:15:33.498 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dischtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.498 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dischtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.498 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/[0-275).parquet\n", + "2024-12-14 17:15:33.503 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.011900\n", + "2024-12-14 17:15:33.503 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/.[0-275).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/.[0-275).parquet_cache/locks/2024-12-14T17:15:33.491829.json\n", + "2024-12-14 17:15:33.504 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 17:15:33.504 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.504429. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.504 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 17:15:33.504 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.504 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/transfers/[0-1190).parquet\n", + "2024-12-14 17:15:33.508 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting transfer\n", + "2024-12-14 17:15:33.508 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column careunit\n", + "2024-12-14 17:15:33.508 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column eventtype\n", + "2024-12-14 17:15:33.508 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.509 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.509 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 17:15:33.510 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006418\n", + "2024-12-14 17:15:33.510 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/transfers/.[0-1190).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/transfers/.[0-1190).parquet_cache/locks/2024-12-14T17:15:33.504429.json\n", + "2024-12-14 17:15:33.511 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 17:15:33.511 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.511839. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.512 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 17:15:33.512 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.512 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/chartevents/[0-668862).parquet\n", + "2024-12-14 17:15:33.516 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting event\n", + "2024-12-14 17:15:33.516 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:33.516 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 17:15:33.516 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.517 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.517 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 17:15:33.649 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.137863\n", + "2024-12-14 17:15:33.649 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/chartevents/.[0-668862).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/chartevents/.[0-668862).parquet_cache/locks/2024-12-14T17:15:33.511839.json\n", + "2024-12-14 17:15:33.650 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/omr/[0-2964).parquet\n", + "2024-12-14 17:15:33.650 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.650624. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.650 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", + "2024-12-14 17:15:33.651 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.651 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/omr/[0-2964).parquet\n", + "2024-12-14 17:15:33.655 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting omr\n", + "2024-12-14 17:15:33.655 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column result_name\n", + "2024-12-14 17:15:33.655 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 17:15:33.655 | WARNING | MEDS_transforms.extract.convert_to_sharded_events:extract_event:507 - Source column 'col(result_value)' for event column text_value is always interpreted as a column name. Removing col() function call and setting source column to result_value.\n", + "2024-12-14 17:15:33.655 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:558 - Filtering out rows with null codes via col(\"result_name\").is_not_null()\n", + "2024-12-14 17:15:33.655 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.655 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/omr/[0-2964).parquet\n", + "2024-12-14 17:15:33.658 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007397\n", + "2024-12-14 17:15:33.658 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/omr/.[0-2964).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/omr/.[0-2964).parquet_cache/locks/2024-12-14T17:15:33.650624.json\n", + "2024-12-14 17:15:33.658 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 17:15:33.659 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.659087. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.659 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 17:15:33.659 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.659 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/outputevents/[0-9362).parquet\n", + "2024-12-14 17:15:33.663 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting output\n", + "2024-12-14 17:15:33.665 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 17:15:33.665 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 17:15:33.665 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.665 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.665 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 17:15:33.669 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010815\n", + "2024-12-14 17:15:33.669 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/.[0-9362).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/.[0-9362).parquet_cache/locks/2024-12-14T17:15:33.659087.json\n", + "2024-12-14 17:15:33.670 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/emar/[0-35835).parquet\n", + "2024-12-14 17:15:33.670 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.670832. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:33.671 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", + "2024-12-14 17:15:33.671 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:33.671 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/emar/[0-35835).parquet\n", + "2024-12-14 17:15:33.675 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication\n", + "2024-12-14 17:15:33.675 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column event_txt\n", + "2024-12-14 17:15:33.675 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 17:15:33.675 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 17:15:33.675 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 17:15:33.675 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/emar/[0-35835).parquet\n", + "2024-12-14 17:15:33.688 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.018080\n", + "2024-12-14 17:15:33.689 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/emar/.[0-35835).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/emar/.[0-35835).parquet_cache/locks/2024-12-14T17:15:33.670832.json\n", + "2024-12-14 17:15:33.689 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:823 - Subsharded into converted events.\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:08.766\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: merge_to_MEDS_cohort\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:08.772\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-merge_to_MEDS_cohort --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=merge_to_MEDS_cohort\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:10.205\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 17:15:33.763\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: merge_to_MEDS_cohort\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:33.769\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-merge_to_MEDS_cohort --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=merge_to_MEDS_cohort\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:35.153\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:10.207\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 16:07:09.354 | INFO | MEDS_transforms.utils:stage_init:73 - Running merge_to_MEDS_cohort with the following configuration:\n", + "\u001b[32m2024-12-14 17:15:35.154\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 17:15:34.367 | INFO | MEDS_transforms.utils:stage_init:73 - Running merge_to_MEDS_cohort with the following configuration:\n", "input_dir: ${oc.env:MIMICIV_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:MIMICIV_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -2546,9 +2534,9 @@ " n_subjects_per_shard: 1000\n", " external_splits_json_fp: null\n", " split_fracs:\n", - " train: 0.8\n", - " tuning: 0.1\n", - " held_out: 0.1\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", " convert_to_sharded_events:\n", " do_dedup_text_and_numeric: true\n", " merge_to_MEDS_cohort:\n", @@ -2564,10 +2552,6 @@ " do_retype: true\n", " finalize_MEDS_data:\n", " do_retype: true\n", - " split_fracs:\n", - " train: 0.5\n", - " tuning: 0.25\n", - " held_out: 0.25\n", "worker: 0\n", "polling_time: 300\n", "stage: merge_to_MEDS_cohort\n", @@ -2590,7 +2574,7 @@ "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", "\n", - "2024-12-14 16:07:09.370 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "2024-12-14 17:15:34.382 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", "unique_by: '*'\n", "additional_sort_by: null\n", "is_metadata: false\n", @@ -2603,11 +2587,11 @@ " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events\n", " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", " - metadata_input_dir: ❌ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/split_and_shard_subjects\n", - "2024-12-14 16:07:09.385 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator_by_shard_map:687 - Mapping computation over a maximum of 3 shards\n", - "2024-12-14 16:07:09.393 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", - "2024-12-14 16:07:09.394 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:09.393828. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:09.395 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0\n", - "2024-12-14 16:07:09.396 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 16 files:\n", + "2024-12-14 17:15:34.397 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator_by_shard_map:687 - Mapping computation over a maximum of 3 shards\n", + "2024-12-14 17:15:34.404 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", + "2024-12-14 17:15:34.405 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:34.405491. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:34.407 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0\n", + "2024-12-14 17:15:34.408 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 16 files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/[0-275).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/[0-4506).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/[0-454).parquet\n", @@ -2624,14 +2608,14 @@ " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/[0-1468).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/[0-20404).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:09.405 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:09.405 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", - "2024-12-14 16:07:09.460 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.066957\n", - "2024-12-14 16:07:09.460 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/.0.parquet_cache/locks/2024-12-14T16:07:09.393828.json\n", - "2024-12-14 16:07:09.461 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", - "2024-12-14 16:07:09.461 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:09.461748. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:09.462 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0\n", - "2024-12-14 16:07:09.463 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 16 files:\n", + "2024-12-14 17:15:34.417 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:34.417 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", + "2024-12-14 17:15:34.536 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.130660\n", + "2024-12-14 17:15:34.536 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/.0.parquet_cache/locks/2024-12-14T17:15:34.405491.json\n", + "2024-12-14 17:15:34.536 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", + "2024-12-14 17:15:34.537 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:34.537340. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:34.537 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0\n", + "2024-12-14 17:15:34.538 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 16 files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/[0-275).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/[0-4506).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/[0-454).parquet\n", @@ -2648,14 +2632,14 @@ " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/[0-1468).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/[0-20404).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:09.464 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:09.465 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", - "2024-12-14 16:07:09.512 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.050432\n", - "2024-12-14 16:07:09.512 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/.0.parquet_cache/locks/2024-12-14T16:07:09.461748.json\n", - "2024-12-14 16:07:09.512 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", - "2024-12-14 16:07:09.513 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:09.513190. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:09.513 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0\n", - "2024-12-14 16:07:09.514 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 16 files:\n", + "2024-12-14 17:15:34.540 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:34.540 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", + "2024-12-14 17:15:34.716 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.178790\n", + "2024-12-14 17:15:34.716 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/.0.parquet_cache/locks/2024-12-14T17:15:34.537340.json\n", + "2024-12-14 17:15:34.716 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", + "2024-12-14 17:15:34.717 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:34.717118. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:34.717 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0\n", + "2024-12-14 17:15:34.718 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 16 files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/[0-275).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/[0-4506).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/[0-454).parquet\n", @@ -2672,18 +2656,18 @@ " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/[0-1468).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/[0-20404).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 16:07:09.516 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:09.516 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", - "2024-12-14 16:07:10.123 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.610197\n", - "2024-12-14 16:07:10.123 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/.0.parquet_cache/locks/2024-12-14T16:07:09.513190.json\n", - "2024-12-14 16:07:10.123 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:00.753831\n", + "2024-12-14 17:15:34.720 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:34.720 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", + "2024-12-14 17:15:35.081 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.364595\n", + "2024-12-14 17:15:35.081 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/.0.parquet_cache/locks/2024-12-14T17:15:34.717118.json\n", + "2024-12-14 17:15:35.082 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:00.699477\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:10.209\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: extract_code_metadata\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:10.216\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-extract_code_metadata --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=extract_code_metadata\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:11.170\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 17:15:35.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: extract_code_metadata\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:35.164\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-extract_code_metadata --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=extract_code_metadata\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:36.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:11.171\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 16:07:10.771 | INFO | MEDS_transforms.utils:stage_init:73 - Running extract_code_metadata with the following configuration:\n", + "\u001b[32m2024-12-14 17:15:36.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 17:15:35.785 | INFO | MEDS_transforms.utils:stage_init:73 - Running extract_code_metadata with the following configuration:\n", "input_dir: ${oc.env:MIMICIV_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:MIMICIV_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -2712,9 +2696,9 @@ " n_subjects_per_shard: 1000\n", " external_splits_json_fp: null\n", " split_fracs:\n", - " train: 0.8\n", - " tuning: 0.1\n", - " held_out: 0.1\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", " convert_to_sharded_events:\n", " do_dedup_text_and_numeric: true\n", " merge_to_MEDS_cohort:\n", @@ -2730,10 +2714,6 @@ " do_retype: true\n", " finalize_MEDS_data:\n", " do_retype: true\n", - " split_fracs:\n", - " train: 0.5\n", - " tuning: 0.25\n", - " held_out: 0.25\n", "worker: 0\n", "polling_time: 300\n", "stage: extract_code_metadata\n", @@ -2756,7 +2736,7 @@ "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", "\n", - "2024-12-14 16:07:10.787 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "2024-12-14 17:15:35.802 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", "is_metadata: true\n", "description_separator: '\n", "\n", @@ -2771,8 +2751,8 @@ " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata\n", " - metadata_input_dir: ❌ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/split_and_shard_subjects\n", - "2024-12-14 16:07:10.788 | INFO | MEDS_transforms.extract.extract_code_metadata:main:359 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs/event_configs.yaml\n", - "2024-12-14 16:07:10.819 | INFO | MEDS_transforms.extract.extract_code_metadata:main:361 - Event conversion config:\n", + "2024-12-14 17:15:35.803 | INFO | MEDS_transforms.extract.extract_code_metadata:main:359 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs/event_configs.yaml\n", + "2024-12-14 17:15:35.835 | INFO | MEDS_transforms.extract.extract_code_metadata:main:361 - Event conversion config:\n", "subject_id_col: subject_id\n", "hosp/admissions:\n", " ed_registration:\n", @@ -3095,107 +3075,107 @@ " valueuom: unitname\n", " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", "\n", - "2024-12-14 16:07:10.871 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet\n", - "2024-12-14 16:07:10.871 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_icd_diagnoses.parquet\n", - "2024-12-14 16:07:10.872 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:10.872428. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:10.874 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet\n", - "2024-12-14 16:07:10.874 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:35.887 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet\n", + "2024-12-14 17:15:35.887 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_icd_procedures.parquet\n", + "2024-12-14 17:15:35.889 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:35.888743. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:35.890 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet\n", + "2024-12-14 17:15:35.890 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/extract_code_metadata.py:184: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", " if metadata_df.schema[mandatory_col] is not mandatory_type:\n", - "2024-12-14 16:07:10.880 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 16:07:10.881 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 16:07:10.881 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_icd_diagnoses.parquet\n", - "2024-12-14 16:07:10.932 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.059924\n", - "2024-12-14 16:07:10.932 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_icd_diagnoses.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_icd_diagnoses.parquet_cache/locks/2024-12-14T16:07:10.872428.json\n", - "2024-12-14 16:07:10.933 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_hcpcs.csv\n", - "2024-12-14 16:07:10.933 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_hcpcs.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_hcpcs.parquet\n", - "2024-12-14 16:07:10.933 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:10.933340. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:10.933 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_hcpcs.csv\n", - "2024-12-14 16:07:10.934 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:10.936 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 16:07:10.936 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_hcpcs.parquet\n", - "2024-12-14 16:07:10.967 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.034517\n", - "2024-12-14 16:07:10.967 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_hcpcs.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_hcpcs.parquet_cache/locks/2024-12-14T16:07:10.933340.json\n", - "2024-12-14 16:07:10.968 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/d_labitems_to_loinc.csv\n", - "2024-12-14 16:07:10.968 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/d_labitems_to_loinc.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/d_labitems_to_loinc.parquet\n", - "2024-12-14 16:07:10.969 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:10.969008. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:10.969 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/d_labitems_to_loinc.csv\n", - "2024-12-14 16:07:10.969 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:10.972 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 16:07:10.972 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 16:07:10.973 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 16:07:10.974 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 16:07:10.974 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/d_labitems_to_loinc.parquet\n", - "2024-12-14 16:07:10.996 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.027039\n", - "2024-12-14 16:07:10.996 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.d_labitems_to_loinc.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.d_labitems_to_loinc.parquet_cache/locks/2024-12-14T16:07:10.969008.json\n", - "2024-12-14 16:07:10.996 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet\n", - "2024-12-14 16:07:10.996 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_icd_procedures.parquet\n", - "2024-12-14 16:07:10.997 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:10.997044. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:10.997 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet\n", - "2024-12-14 16:07:10.997 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:11.001 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 16:07:11.001 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 16:07:11.001 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_icd_procedures.parquet\n", - "2024-12-14 16:07:11.028 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.031820\n", - "2024-12-14 16:07:11.028 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_icd_procedures.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_icd_procedures.parquet_cache/locks/2024-12-14T16:07:10.997044.json\n", - "2024-12-14 16:07:11.029 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/inputevents_to_rxnorm.csv\n", - "2024-12-14 16:07:11.029 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/inputevents_to_rxnorm.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/inputevents_to_rxnorm.parquet\n", - "2024-12-14 16:07:11.029 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:11.029852. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:11.030 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/inputevents_to_rxnorm.csv\n", - "2024-12-14 16:07:11.030 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:11.032 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 16:07:11.032 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 16:07:11.033 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 16:07:11.033 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 16:07:11.033 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/inputevents_to_rxnorm.parquet\n", - "2024-12-14 16:07:11.044 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.014199\n", - "2024-12-14 16:07:11.044 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.inputevents_to_rxnorm.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.inputevents_to_rxnorm.parquet_cache/locks/2024-12-14T16:07:11.029852.json\n", - "2024-12-14 16:07:11.044 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_itemid.csv\n", - "2024-12-14 16:07:11.044 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_itemid.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/proc_itemid.parquet\n", - "2024-12-14 16:07:11.045 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:11.045049. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:11.045 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_itemid.csv\n", - "2024-12-14 16:07:11.045 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:11.047 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 16:07:11.047 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 16:07:11.048 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 16:07:11.048 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 16:07:11.048 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/proc_itemid.parquet\n", - "2024-12-14 16:07:11.056 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.011186\n", - "2024-12-14 16:07:11.056 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.proc_itemid.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.proc_itemid.parquet_cache/locks/2024-12-14T16:07:11.045049.json\n", - "2024-12-14 16:07:11.056 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/outputevents_to_loinc.csv\n", - "2024-12-14 16:07:11.056 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/outputevents_to_loinc.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/outputevents_to_loinc.parquet\n", - "2024-12-14 16:07:11.057 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:11.057011. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:11.057 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/outputevents_to_loinc.csv\n", - "2024-12-14 16:07:11.057 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:11.059 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 16:07:11.059 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 16:07:11.059 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/outputevents_to_loinc.parquet\n", - "2024-12-14 16:07:11.061 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.004929\n", - "2024-12-14 16:07:11.062 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.outputevents_to_loinc.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.outputevents_to_loinc.parquet_cache/locks/2024-12-14T16:07:11.057011.json\n", - "2024-12-14 16:07:11.062 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_datetimeevents.csv\n", - "2024-12-14 16:07:11.062 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_datetimeevents.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/proc_datetimeevents.parquet\n", - "2024-12-14 16:07:11.063 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:11.062871. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:11.063 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_datetimeevents.csv\n", - "2024-12-14 16:07:11.063 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:11.065 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 16:07:11.065 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 16:07:11.066 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 16:07:11.066 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 16:07:11.066 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/proc_datetimeevents.parquet\n", - "2024-12-14 16:07:11.072 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.009717\n", - "2024-12-14 16:07:11.072 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.proc_datetimeevents.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.proc_datetimeevents.parquet_cache/locks/2024-12-14T16:07:11.062871.json\n", - "2024-12-14 16:07:11.072 | INFO | MEDS_transforms.extract.extract_code_metadata:main:398 - Extracted metadata for all events. Merging.\n", - "2024-12-14 16:07:11.072 | INFO | MEDS_transforms.extract.extract_code_metadata:main:404 - Starting reduction process\n", - "2024-12-14 16:07:11.072 | INFO | MEDS_transforms.extract.extract_code_metadata:main:412 - All map shards complete! Starting code metadata reduction computation.\n", - "2024-12-14 16:07:11.089 | INFO | MEDS_transforms.extract.extract_code_metadata:main:424 - Collected metadata for 2661 unique codes among 42898 total observations.\n", - "2024-12-14 16:07:11.124 | INFO | MEDS_transforms.extract.extract_code_metadata:main:449 - Finished reduction in 0:00:00.051941\n", + "2024-12-14 17:15:35.898 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 17:15:35.899 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 17:15:35.899 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_icd_procedures.parquet\n", + "2024-12-14 17:15:35.946 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.057475\n", + "2024-12-14 17:15:35.946 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_icd_procedures.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_icd_procedures.parquet_cache/locks/2024-12-14T17:15:35.888743.json\n", + "2024-12-14 17:15:35.947 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/inputevents_to_rxnorm.csv\n", + "2024-12-14 17:15:35.947 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/inputevents_to_rxnorm.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/inputevents_to_rxnorm.parquet\n", + "2024-12-14 17:15:35.947 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:35.947489. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:35.947 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/inputevents_to_rxnorm.csv\n", + "2024-12-14 17:15:35.948 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:35.951 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 17:15:35.951 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 17:15:35.953 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 17:15:35.953 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 17:15:35.953 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/inputevents_to_rxnorm.parquet\n", + "2024-12-14 17:15:35.970 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.022885\n", + "2024-12-14 17:15:35.970 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.inputevents_to_rxnorm.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.inputevents_to_rxnorm.parquet_cache/locks/2024-12-14T17:15:35.947489.json\n", + "2024-12-14 17:15:35.971 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_datetimeevents.csv\n", + "2024-12-14 17:15:35.971 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_datetimeevents.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/proc_datetimeevents.parquet\n", + "2024-12-14 17:15:35.971 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:35.971762. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:35.972 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_datetimeevents.csv\n", + "2024-12-14 17:15:35.972 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:35.973 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 17:15:35.974 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 17:15:35.975 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 17:15:35.975 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 17:15:35.975 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/proc_datetimeevents.parquet\n", + "2024-12-14 17:15:35.982 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010768\n", + "2024-12-14 17:15:35.982 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.proc_datetimeevents.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.proc_datetimeevents.parquet_cache/locks/2024-12-14T17:15:35.971762.json\n", + "2024-12-14 17:15:35.983 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_hcpcs.csv\n", + "2024-12-14 17:15:35.983 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_hcpcs.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_hcpcs.parquet\n", + "2024-12-14 17:15:35.983 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:35.983359. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:35.983 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_hcpcs.csv\n", + "2024-12-14 17:15:35.983 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:35.985 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 17:15:35.985 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_hcpcs.parquet\n", + "2024-12-14 17:15:36.015 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.032007\n", + "2024-12-14 17:15:36.015 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_hcpcs.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_hcpcs.parquet_cache/locks/2024-12-14T17:15:35.983359.json\n", + "2024-12-14 17:15:36.016 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet\n", + "2024-12-14 17:15:36.016 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_icd_diagnoses.parquet\n", + "2024-12-14 17:15:36.016 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:36.016392. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:36.016 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet\n", + "2024-12-14 17:15:36.016 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:36.019 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 17:15:36.019 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 17:15:36.019 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_icd_diagnoses.parquet\n", + "2024-12-14 17:15:36.052 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.035722\n", + "2024-12-14 17:15:36.052 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_icd_diagnoses.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_icd_diagnoses.parquet_cache/locks/2024-12-14T17:15:36.016392.json\n", + "2024-12-14 17:15:36.053 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/d_labitems_to_loinc.csv\n", + "2024-12-14 17:15:36.053 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/d_labitems_to_loinc.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/d_labitems_to_loinc.parquet\n", + "2024-12-14 17:15:36.053 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:36.053647. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:36.053 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/d_labitems_to_loinc.csv\n", + "2024-12-14 17:15:36.054 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:36.056 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 17:15:36.057 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 17:15:36.058 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 17:15:36.058 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 17:15:36.058 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/d_labitems_to_loinc.parquet\n", + "2024-12-14 17:15:36.077 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.023464\n", + "2024-12-14 17:15:36.077 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.d_labitems_to_loinc.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.d_labitems_to_loinc.parquet_cache/locks/2024-12-14T17:15:36.053647.json\n", + "2024-12-14 17:15:36.077 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/outputevents_to_loinc.csv\n", + "2024-12-14 17:15:36.077 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/outputevents_to_loinc.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/outputevents_to_loinc.parquet\n", + "2024-12-14 17:15:36.078 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:36.078096. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:36.078 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/outputevents_to_loinc.csv\n", + "2024-12-14 17:15:36.078 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:36.080 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 17:15:36.080 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 17:15:36.080 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/outputevents_to_loinc.parquet\n", + "2024-12-14 17:15:36.083 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.005380\n", + "2024-12-14 17:15:36.083 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.outputevents_to_loinc.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.outputevents_to_loinc.parquet_cache/locks/2024-12-14T17:15:36.078096.json\n", + "2024-12-14 17:15:36.084 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_itemid.csv\n", + "2024-12-14 17:15:36.084 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_itemid.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/proc_itemid.parquet\n", + "2024-12-14 17:15:36.084 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:36.084421. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:36.084 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_itemid.csv\n", + "2024-12-14 17:15:36.084 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:36.086 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 17:15:36.086 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 17:15:36.088 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 17:15:36.088 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 17:15:36.088 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/proc_itemid.parquet\n", + "2024-12-14 17:15:36.096 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.011606\n", + "2024-12-14 17:15:36.096 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.proc_itemid.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.proc_itemid.parquet_cache/locks/2024-12-14T17:15:36.084421.json\n", + "2024-12-14 17:15:36.096 | INFO | MEDS_transforms.extract.extract_code_metadata:main:398 - Extracted metadata for all events. Merging.\n", + "2024-12-14 17:15:36.096 | INFO | MEDS_transforms.extract.extract_code_metadata:main:404 - Starting reduction process\n", + "2024-12-14 17:15:36.096 | INFO | MEDS_transforms.extract.extract_code_metadata:main:412 - All map shards complete! Starting code metadata reduction computation.\n", + "2024-12-14 17:15:36.113 | INFO | MEDS_transforms.extract.extract_code_metadata:main:424 - Collected metadata for 2661 unique codes among 42898 total observations.\n", + "2024-12-14 17:15:36.151 | INFO | MEDS_transforms.extract.extract_code_metadata:main:449 - Finished reduction in 0:00:00.054648\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:11.175\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: finalize_MEDS_metadata\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:11.221\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-finalize_MEDS_metadata --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=finalize_MEDS_metadata\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:12.072\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 17:15:36.201\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: finalize_MEDS_metadata\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:36.249\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-finalize_MEDS_metadata --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=finalize_MEDS_metadata\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:37.150\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:12.072\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 16:07:11.807 | INFO | MEDS_transforms.utils:stage_init:73 - Running finalize_MEDS_metadata with the following configuration:\n", + "\u001b[32m2024-12-14 17:15:37.151\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 17:15:36.869 | INFO | MEDS_transforms.utils:stage_init:73 - Running finalize_MEDS_metadata with the following configuration:\n", "input_dir: ${oc.env:MIMICIV_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:MIMICIV_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -3224,9 +3204,9 @@ " n_subjects_per_shard: 1000\n", " external_splits_json_fp: null\n", " split_fracs:\n", - " train: 0.8\n", - " tuning: 0.1\n", - " held_out: 0.1\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", " convert_to_sharded_events:\n", " do_dedup_text_and_numeric: true\n", " merge_to_MEDS_cohort:\n", @@ -3242,10 +3222,6 @@ " do_retype: true\n", " finalize_MEDS_data:\n", " do_retype: true\n", - " split_fracs:\n", - " train: 0.5\n", - " tuning: 0.25\n", - " held_out: 0.25\n", "worker: 0\n", "polling_time: 300\n", "stage: finalize_MEDS_metadata\n", @@ -3268,7 +3244,7 @@ "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", "\n", - "2024-12-14 16:07:11.825 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "2024-12-14 17:15:36.888 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", "is_metadata: true\n", "do_retype: true\n", "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", @@ -3281,23 +3257,23 @@ " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/finalize_MEDS_metadata\n", " - metadata_input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata\n", - "2024-12-14 16:07:11.830 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:170 - Validating code metadata\n", - "2024-12-14 16:07:11.830 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:173 - Reading code metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/codes.parquet\n", - "2024-12-14 16:07:12.007 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:183 - Writing finalized metadata df to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata/codes.parquet\n", - "2024-12-14 16:07:12.016 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:187 - Creating dataset metadata\n", - "2024-12-14 16:07:12.019 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:199 - Writing finalized dataset metadata to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata/dataset.json\n", - "2024-12-14 16:07:12.019 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:204 - Creating subject splits from {str(shards_map_fp.resolve())}\n", - "2024-12-14 16:07:12.019 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split train has 80 subjects\n", - "2024-12-14 16:07:12.020 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split tuning has 10 subjects\n", - "2024-12-14 16:07:12.020 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split held_out has 10 subjects\n", - "2024-12-14 16:07:12.022 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:222 - Writing finalized subject splits to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata/subject_splits.parquet\n", + "2024-12-14 17:15:36.892 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:170 - Validating code metadata\n", + "2024-12-14 17:15:36.892 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:173 - Reading code metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/codes.parquet\n", + "2024-12-14 17:15:37.079 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:183 - Writing finalized metadata df to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata/codes.parquet\n", + "2024-12-14 17:15:37.091 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:187 - Creating dataset metadata\n", + "2024-12-14 17:15:37.093 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:199 - Writing finalized dataset metadata to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata/dataset.json\n", + "2024-12-14 17:15:37.094 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:204 - Creating subject splits from {str(shards_map_fp.resolve())}\n", + "2024-12-14 17:15:37.094 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split train has 50 subjects\n", + "2024-12-14 17:15:37.094 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split tuning has 25 subjects\n", + "2024-12-14 17:15:37.094 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split held_out has 25 subjects\n", + "2024-12-14 17:15:37.096 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:222 - Writing finalized subject splits to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata/subject_splits.parquet\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:12.073\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: finalize_MEDS_data\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:12.079\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-finalize_MEDS_data --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=finalize_MEDS_data\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:13.112\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 17:15:37.152\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: finalize_MEDS_data\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:37.159\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-finalize_MEDS_data --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=finalize_MEDS_data\u001b[0m\n", + "\u001b[32m2024-12-14 17:15:38.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:07:13.112\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 16:07:12.623 | INFO | MEDS_transforms.utils:stage_init:73 - Running finalize_MEDS_data with the following configuration:\n", + "\u001b[32m2024-12-14 17:15:38.199\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 17:15:37.714 | INFO | MEDS_transforms.utils:stage_init:73 - Running finalize_MEDS_data with the following configuration:\n", "input_dir: ${oc.env:MIMICIV_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:MIMICIV_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -3326,9 +3302,9 @@ " n_subjects_per_shard: 1000\n", " external_splits_json_fp: null\n", " split_fracs:\n", - " train: 0.8\n", - " tuning: 0.1\n", - " held_out: 0.1\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", " convert_to_sharded_events:\n", " do_dedup_text_and_numeric: true\n", " merge_to_MEDS_cohort:\n", @@ -3344,10 +3320,6 @@ " do_retype: true\n", " finalize_MEDS_data:\n", " do_retype: true\n", - " split_fracs:\n", - " train: 0.5\n", - " tuning: 0.25\n", - " held_out: 0.25\n", "worker: 0\n", "polling_time: 300\n", "stage: finalize_MEDS_data\n", @@ -3370,7 +3342,7 @@ "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", "\n", - "2024-12-14 16:07:12.640 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "2024-12-14 17:15:37.732 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", "do_retype: true\n", "is_metadata: false\n", "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", @@ -3382,29 +3354,29 @@ " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", " - metadata_input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata\n", - "2024-12-14 16:07:12.658 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator:600 - Mapping computation over a maximum of 3 shards\n", - "2024-12-14 16:07:12.667 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\n", - "2024-12-14 16:07:12.668 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:12.668203. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:12.669 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", - "2024-12-14 16:07:12.670 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:12.762 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\n", - "2024-12-14 16:07:12.777 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.109013\n", - "2024-12-14 16:07:12.777 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/.0.parquet_cache/locks/2024-12-14T16:07:12.668203.json\n", - "2024-12-14 16:07:12.777 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\n", - "2024-12-14 16:07:12.778 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:12.777977. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:12.778 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", - "2024-12-14 16:07:12.778 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:12.783 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\n", - "2024-12-14 16:07:12.802 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.024486\n", - "2024-12-14 16:07:12.802 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/.0.parquet_cache/locks/2024-12-14T16:07:12.777977.json\n", - "2024-12-14 16:07:12.802 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\n", - "2024-12-14 16:07:12.803 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:07:12.803208. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:07:12.803 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", - "2024-12-14 16:07:12.803 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:07:12.858 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\n", - "2024-12-14 16:07:13.051 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.248693\n", - "2024-12-14 16:07:13.052 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/.0.parquet_cache/locks/2024-12-14T16:07:12.803208.json\n", - "2024-12-14 16:07:13.052 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:00.411314\n", + "2024-12-14 17:15:37.749 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator:600 - Mapping computation over a maximum of 3 shards\n", + "2024-12-14 17:15:37.758 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\n", + "2024-12-14 17:15:37.759 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:37.759430. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:37.760 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", + "2024-12-14 17:15:37.761 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:37.862 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\n", + "2024-12-14 17:15:37.899 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.140464\n", + "2024-12-14 17:15:37.900 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/.0.parquet_cache/locks/2024-12-14T17:15:37.759430.json\n", + "2024-12-14 17:15:37.900 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\n", + "2024-12-14 17:15:37.900 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:37.900755. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:37.901 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", + "2024-12-14 17:15:37.901 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:37.915 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\n", + "2024-12-14 17:15:37.979 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.078518\n", + "2024-12-14 17:15:37.979 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/.0.parquet_cache/locks/2024-12-14T17:15:37.900755.json\n", + "2024-12-14 17:15:37.979 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\n", + "2024-12-14 17:15:37.980 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:37.980261. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:15:37.980 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", + "2024-12-14 17:15:37.980 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:15:38.009 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\n", + "2024-12-14 17:15:38.139 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.159248\n", + "2024-12-14 17:15:38.139 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/.0.parquet_cache/locks/2024-12-14T17:15:37.980261.json\n", + "2024-12-14 17:15:38.139 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:00.407567\n", "\u001b[0m\n" ] } @@ -3424,7 +3396,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 43, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -3444,7 +3416,7 @@ " white-space: pre-wrap;\n", "}\n", "\n", - "shape: (916_166, 4)
subject_idtimecodenumeric_value
i64datetime[μs]strf32
10012853null"GENDER//F"null
100128532084-01-01 00:00:00"MEDS_BIRTH"null
100128532175-04-05 00:00:00"Height (Inches)"null
100128532175-04-05 00:00:00"Weight (Lbs)"null
100128532175-04-05 00:00:00"BMI (kg/m2)"null
100398312116-01-09 11:00:00"LAB//50983//mEq/L"134.0
100398312116-01-09 11:00:00"LAB//50882//mEq/L"25.0
100398312116-01-09 11:00:00"LAB//50863//IU/L"112.0
100398312116-01-09 11:00:00"LAB//51237//UNK"3.3
100398312116-01-09 11:00:00"LAB//51274//sec"33.0
" + "shape: (916_166, 4)
subject_idtimecodenumeric_value
i64datetime[μs]strf32
10001725null"GENDER//F"null
100017252064-01-01 00:00:00"MEDS_BIRTH"null
100017252109-10-15 00:00:00"Weight (Lbs)"null
100017252109-10-15 00:00:00"Blood Pressure"null
100017252110-01-04 00:00:00"Weight (Lbs)"null
100397082145-01-04 11:47:00"LAB//50934//UNK"33.0
100397082145-01-04 11:47:00"LAB//50868//mEq/L"15.0
100397082145-01-04 12:24:00"LAB//51274//sec"13.4
100397082145-01-04 12:24:00"LAB//51237//UNK"1.2
100397082145-01-04 12:24:00"LAB//51275//sec"29.700001
" ], "text/plain": [ "shape: (916_166, 4)\n", @@ -3453,21 +3425,21 @@ "│ --- ┆ --- ┆ --- ┆ --- │\n", "│ i64 ┆ datetime[μs] ┆ str ┆ f32 │\n", "╞════════════╪═════════════════════╪═══════════════════╪═══════════════╡\n", - "│ 10012853 ┆ null ┆ GENDER//F ┆ null │\n", - "│ 10012853 ┆ 2084-01-01 00:00:00 ┆ MEDS_BIRTH ┆ null │\n", - "│ 10012853 ┆ 2175-04-05 00:00:00 ┆ Height (Inches) ┆ null │\n", - "│ 10012853 ┆ 2175-04-05 00:00:00 ┆ Weight (Lbs) ┆ null │\n", - "│ 10012853 ┆ 2175-04-05 00:00:00 ┆ BMI (kg/m2) ┆ null │\n", + "│ 10001725 ┆ null ┆ GENDER//F ┆ null │\n", + "│ 10001725 ┆ 2064-01-01 00:00:00 ┆ MEDS_BIRTH ┆ null │\n", + "│ 10001725 ┆ 2109-10-15 00:00:00 ┆ Weight (Lbs) ┆ null │\n", + "│ 10001725 ┆ 2109-10-15 00:00:00 ┆ Blood Pressure ┆ null │\n", + "│ 10001725 ┆ 2110-01-04 00:00:00 ┆ Weight (Lbs) ┆ null │\n", "│ … ┆ … ┆ … ┆ … │\n", - "│ 10039831 ┆ 2116-01-09 11:00:00 ┆ LAB//50983//mEq/L ┆ 134.0 │\n", - "│ 10039831 ┆ 2116-01-09 11:00:00 ┆ LAB//50882//mEq/L ┆ 25.0 │\n", - "│ 10039831 ┆ 2116-01-09 11:00:00 ┆ LAB//50863//IU/L ┆ 112.0 │\n", - "│ 10039831 ┆ 2116-01-09 11:00:00 ┆ LAB//51237//UNK ┆ 3.3 │\n", - "│ 10039831 ┆ 2116-01-09 11:00:00 ┆ LAB//51274//sec ┆ 33.0 │\n", + "│ 10039708 ┆ 2145-01-04 11:47:00 ┆ LAB//50934//UNK ┆ 33.0 │\n", + "│ 10039708 ┆ 2145-01-04 11:47:00 ┆ LAB//50868//mEq/L ┆ 15.0 │\n", + "│ 10039708 ┆ 2145-01-04 12:24:00 ┆ LAB//51274//sec ┆ 13.4 │\n", + "│ 10039708 ┆ 2145-01-04 12:24:00 ┆ LAB//51237//UNK ┆ 1.2 │\n", + "│ 10039708 ┆ 2145-01-04 12:24:00 ┆ LAB//51275//sec ┆ 29.700001 │\n", "└────────────┴─────────────────────┴───────────────────┴───────────────┘" ] }, - "execution_count": 21, + "execution_count": 43, "metadata": {}, "output_type": "execute_result" } @@ -3483,7 +3455,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 44, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -3511,30 +3483,30 @@ " white-space: pre-wrap;\n", "}\n", "\n", - "shape: (734, 2)
codecount
stru32
"DIAGNOSIS//ICD//10//E785"57
"DIAGNOSIS//ICD//10//E039"47
"DIAGNOSIS//ICD//10//Z794"37
"DIAGNOSIS//ICD//10//Z87891"35
"DIAGNOSIS//ICD//10//I2510"33
"DIAGNOSIS//ICD//10//M4856XA"1
"DIAGNOSIS//ICD//10//Z800"1
"DIAGNOSIS//ICD//10//M720"1
"DIAGNOSIS//ICD//10//Y92121"1
"DIAGNOSIS//ICD//10//T383X1A"1
" + "shape: (734, 2)
codecount
stru32
"DIAGNOSIS//ICD//10//E785"57
"DIAGNOSIS//ICD//10//E039"47
"DIAGNOSIS//ICD//10//Z794"37
"DIAGNOSIS//ICD//10//Z87891"35
"DIAGNOSIS//ICD//10//I2510"33
"DIAGNOSIS//ICD//10//Z792"1
"DIAGNOSIS//ICD//10//E041"1
"DIAGNOSIS//ICD//10//Z4502"1
"DIAGNOSIS//ICD//10//I2699"1
"DIAGNOSIS//ICD//10//J942"1
" ], "text/plain": [ "shape: (734, 2)\n", - "┌─────────────────────────────┬───────┐\n", - "│ code ┆ count │\n", - "│ --- ┆ --- │\n", - "│ str ┆ u32 │\n", - "╞═════════════════════════════╪═══════╡\n", - "│ DIAGNOSIS//ICD//10//E785 ┆ 57 │\n", - "│ DIAGNOSIS//ICD//10//E039 ┆ 47 │\n", - "│ DIAGNOSIS//ICD//10//Z794 ┆ 37 │\n", - "│ DIAGNOSIS//ICD//10//Z87891 ┆ 35 │\n", - "│ DIAGNOSIS//ICD//10//I2510 ┆ 33 │\n", - "│ … ┆ … │\n", - "│ DIAGNOSIS//ICD//10//M4856XA ┆ 1 │\n", - "│ DIAGNOSIS//ICD//10//Z800 ┆ 1 │\n", - "│ DIAGNOSIS//ICD//10//M720 ┆ 1 │\n", - "│ DIAGNOSIS//ICD//10//Y92121 ┆ 1 │\n", - "│ DIAGNOSIS//ICD//10//T383X1A ┆ 1 │\n", - "└─────────────────────────────┴───────┘" + "┌────────────────────────────┬───────┐\n", + "│ code ┆ count │\n", + "│ --- ┆ --- │\n", + "│ str ┆ u32 │\n", + "╞════════════════════════════╪═══════╡\n", + "│ DIAGNOSIS//ICD//10//E785 ┆ 57 │\n", + "│ DIAGNOSIS//ICD//10//E039 ┆ 47 │\n", + "│ DIAGNOSIS//ICD//10//Z794 ┆ 37 │\n", + "│ DIAGNOSIS//ICD//10//Z87891 ┆ 35 │\n", + "│ DIAGNOSIS//ICD//10//I2510 ┆ 33 │\n", + "│ … ┆ … │\n", + "│ DIAGNOSIS//ICD//10//Z792 ┆ 1 │\n", + "│ DIAGNOSIS//ICD//10//E041 ┆ 1 │\n", + "│ DIAGNOSIS//ICD//10//Z4502 ┆ 1 │\n", + "│ DIAGNOSIS//ICD//10//I2699 ┆ 1 │\n", + "│ DIAGNOSIS//ICD//10//J942 ┆ 1 │\n", + "└────────────────────────────┴───────┘" ] }, - "execution_count": 22, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" } @@ -3549,7 +3521,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 45, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -3569,45 +3541,45 @@ " white-space: pre-wrap;\n", "}\n", "\n", - "shape: (2_661, 6)
codedescriptionparent_codespossibly_cpt_codeitemidvalueuom
strstrlist[str]list[str]list[str]list[str]
"DIAGNOSIS//ICD//9//7916""Acetonuria"["ICD9CM/791.6"][null][null][null]
"INFUSION_START//229654""amiodarone Injection"["RxNorm/1663223"][null]["229654"][null]
"DIAGNOSIS//ICD//9//82021""Closed fracture of intertrocha…["ICD9CM/820.21"][null][null][null]
"DIAGNOSIS//ICD//10//S22068A""Other fracture of T7-T8 thorac…["ICD10CM/S22.068A"][null][null][null]
"DIAGNOSIS//ICD//9//9585""Traumatic anuria"["ICD9CM/958.5"][null][null][null]
"LAB//51307//UNK""CD13 cells/100 cells in Bone m…["LOINC/51237-6"][null]["51307"][null]
"PROCEDURE//ICD//9//3722""Left heart cardiac catheteriza…["ICD9Proc/37.22"][null][null][null]
"DIAGNOSIS//ICD//9//53560""Duodenitis, without mention of…["ICD9CM/535.60"][null][null][null]
"PROCEDURE//ICD//9//9671""Continuous invasive mechanical…["ICD9Proc/96.71"][null][null][null]
"DIAGNOSIS//ICD//10//Y92129""Unspecified place in nursing h…["ICD10CM/Y92.129"][null][null][null]
" + "shape: (2_661, 6)
codedescriptionparent_codesitemidpossibly_cpt_codevalueuom
strstrlist[str]list[str]list[str]list[str]
"DIAGNOSIS//ICD//10//E873""Alkalosis"["ICD10CM/E87.3"][null][null][null]
"DIAGNOSIS//ICD//10//Z85810""Personal history of malignant …["ICD10CM/Z85.810"][null][null][null]
"LAB//50883//mg/dL""Bilirubin.direct [Mass/volume]…["LOINC/1968-7"]["50883"][null]["mg/dL"]
"LAB//51000//mg/dL""Triglyceride [Mass/volume] in …["LOINC/2571-8"]["51000"][null]["mg/dL"]
"INFUSION_END//227531""mannitol Injection"["RxNorm/1791383"]["227531"][null][null]
"LAB//51501//#/hpf""Transitional cells [#/area] in…["LOINC/30089-7"]["51501"][null]["#/hpf"]
"DIAGNOSIS//ICD//9//V600""Lack of housing"["ICD9CM/V60.0"][null][null][null]
"PROCEDURE//START//225400""Bronchoscopy"["SNOMED/10847001"]["225400"][null][null]
"LAB//50991//ng/mL""Thyroglobulin [Mass/volume] in…["LOINC/3013-0"]["50991"][null]["ng/mL"]
"LAB//51296//N/A""Dacrocytes [Presence] in Blood…["LOINC/7791-7"]["51296"][null]["N/A"]
" ], "text/plain": [ "shape: (2_661, 6)\n", - "┌──────────────────┬──────────────────┬─────────────────┬─────────────────┬────────────┬───────────┐\n", - "│ code ┆ description ┆ parent_codes ┆ possibly_cpt_co ┆ itemid ┆ valueuom │\n", - "│ --- ┆ --- ┆ --- ┆ de ┆ --- ┆ --- │\n", - "│ str ┆ str ┆ list[str] ┆ --- ┆ list[str] ┆ list[str] │\n", - "│ ┆ ┆ ┆ list[str] ┆ ┆ │\n", - "╞══════════════════╪══════════════════╪═════════════════╪═════════════════╪════════════╪═══════════╡\n", - "│ DIAGNOSIS//ICD// ┆ Acetonuria ┆ [\"ICD9CM/791.6\" ┆ [null] ┆ [null] ┆ [null] │\n", - "│ 9//7916 ┆ ┆ ] ┆ ┆ ┆ │\n", - "│ INFUSION_START// ┆ amiodarone ┆ [\"RxNorm/166322 ┆ [null] ┆ [\"229654\"] ┆ [null] │\n", - "│ 229654 ┆ Injection ┆ 3\"] ┆ ┆ ┆ │\n", - "│ DIAGNOSIS//ICD// ┆ Closed fracture ┆ [\"ICD9CM/820.21 ┆ [null] ┆ [null] ┆ [null] │\n", - "│ 9//82021 ┆ of intertrocha… ┆ \"] ┆ ┆ ┆ │\n", - "│ DIAGNOSIS//ICD// ┆ Other fracture ┆ [\"ICD10CM/S22.0 ┆ [null] ┆ [null] ┆ [null] │\n", - "│ 10//S22068A ┆ of T7-T8 thorac… ┆ 68A\"] ┆ ┆ ┆ │\n", - "│ DIAGNOSIS//ICD// ┆ Traumatic anuria ┆ [\"ICD9CM/958.5\" ┆ [null] ┆ [null] ┆ [null] │\n", - "│ 9//9585 ┆ ┆ ] ┆ ┆ ┆ │\n", - "│ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ LAB//51307//UNK ┆ CD13 cells/100 ┆ [\"LOINC/51237-6 ┆ [null] ┆ [\"51307\"] ┆ [null] │\n", - "│ ┆ cells in Bone m… ┆ \"] ┆ ┆ ┆ │\n", - "│ PROCEDURE//ICD// ┆ Left heart ┆ [\"ICD9Proc/37.2 ┆ [null] ┆ [null] ┆ [null] │\n", - "│ 9//3722 ┆ cardiac ┆ 2\"] ┆ ┆ ┆ │\n", - "│ ┆ catheteriza… ┆ ┆ ┆ ┆ │\n", - "│ DIAGNOSIS//ICD// ┆ Duodenitis, ┆ [\"ICD9CM/535.60 ┆ [null] ┆ [null] ┆ [null] │\n", - "│ 9//53560 ┆ without mention ┆ \"] ┆ ┆ ┆ │\n", - "│ ┆ of… ┆ ┆ ┆ ┆ │\n", - "│ PROCEDURE//ICD// ┆ Continuous ┆ [\"ICD9Proc/96.7 ┆ [null] ┆ [null] ┆ [null] │\n", - "│ 9//9671 ┆ invasive ┆ 1\"] ┆ ┆ ┆ │\n", - "│ ┆ mechanical… ┆ ┆ ┆ ┆ │\n", - "│ DIAGNOSIS//ICD// ┆ Unspecified ┆ [\"ICD10CM/Y92.1 ┆ [null] ┆ [null] ┆ [null] │\n", - "│ 10//Y92129 ┆ place in nursing ┆ 29\"] ┆ ┆ ┆ │\n", - "│ ┆ h… ┆ ┆ ┆ ┆ │\n", - "└──────────────────┴──────────────────┴─────────────────┴─────────────────┴────────────┴───────────┘" + "┌──────────────────┬──────────────────┬─────────────────┬────────────┬─────────────────┬───────────┐\n", + "│ code ┆ description ┆ parent_codes ┆ itemid ┆ possibly_cpt_co ┆ valueuom │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ de ┆ --- │\n", + "│ str ┆ str ┆ list[str] ┆ list[str] ┆ --- ┆ list[str] │\n", + "│ ┆ ┆ ┆ ┆ list[str] ┆ │\n", + "╞══════════════════╪══════════════════╪═════════════════╪════════════╪═════════════════╪═══════════╡\n", + "│ DIAGNOSIS//ICD// ┆ Alkalosis ┆ [\"ICD10CM/E87.3 ┆ [null] ┆ [null] ┆ [null] │\n", + "│ 10//E873 ┆ ┆ \"] ┆ ┆ ┆ │\n", + "│ DIAGNOSIS//ICD// ┆ Personal history ┆ [\"ICD10CM/Z85.8 ┆ [null] ┆ [null] ┆ [null] │\n", + "│ 10//Z85810 ┆ of malignant … ┆ 10\"] ┆ ┆ ┆ │\n", + "│ LAB//50883//mg/d ┆ Bilirubin.direct ┆ [\"LOINC/1968-7\" ┆ [\"50883\"] ┆ [null] ┆ [\"mg/dL\"] │\n", + "│ L ┆ [Mass/volume]… ┆ ] ┆ ┆ ┆ │\n", + "│ LAB//51000//mg/d ┆ Triglyceride ┆ [\"LOINC/2571-8\" ┆ [\"51000\"] ┆ [null] ┆ [\"mg/dL\"] │\n", + "│ L ┆ [Mass/volume] in ┆ ] ┆ ┆ ┆ │\n", + "│ ┆ … ┆ ┆ ┆ ┆ │\n", + "│ INFUSION_END//22 ┆ mannitol ┆ [\"RxNorm/179138 ┆ [\"227531\"] ┆ [null] ┆ [null] │\n", + "│ 7531 ┆ Injection ┆ 3\"] ┆ ┆ ┆ │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ LAB//51501//#/hp ┆ Transitional ┆ [\"LOINC/30089-7 ┆ [\"51501\"] ┆ [null] ┆ [\"#/hpf\"] │\n", + "│ f ┆ cells [#/area] ┆ \"] ┆ ┆ ┆ │\n", + "│ ┆ in… ┆ ┆ ┆ ┆ │\n", + "│ DIAGNOSIS//ICD// ┆ Lack of housing ┆ [\"ICD9CM/V60.0\" ┆ [null] ┆ [null] ┆ [null] │\n", + "│ 9//V600 ┆ ┆ ] ┆ ┆ ┆ │\n", + "│ PROCEDURE//START ┆ Bronchoscopy ┆ [\"SNOMED/108470 ┆ [\"225400\"] ┆ [null] ┆ [null] │\n", + "│ //225400 ┆ ┆ 01\"] ┆ ┆ ┆ │\n", + "│ LAB//50991//ng/m ┆ Thyroglobulin ┆ [\"LOINC/3013-0\" ┆ [\"50991\"] ┆ [null] ┆ [\"ng/mL\"] │\n", + "│ L ┆ [Mass/volume] ┆ ] ┆ ┆ ┆ │\n", + "│ ┆ in… ┆ ┆ ┆ ┆ │\n", + "│ LAB//51296//N/A ┆ Dacrocytes ┆ [\"LOINC/7791-7\" ┆ [\"51296\"] ┆ [null] ┆ [\"N/A\"] │\n", + "│ ┆ [Presence] in ┆ ] ┆ ┆ ┆ │\n", + "│ ┆ Blood… ┆ ┆ ┆ ┆ │\n", + "└──────────────────┴──────────────────┴─────────────────┴────────────┴─────────────────┴───────────┘" ] }, - "execution_count": 23, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } @@ -3619,7 +3591,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 46, "metadata": {}, "outputs": [ { @@ -3645,9 +3617,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 47, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cloning into '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo//tmp'...\n", + "remote: Enumerating objects: 144, done.\u001b[K\n", + "remote: Counting objects: 100% (144/144), done.\u001b[K\n", + "remote: Compressing objects: 100% (129/129), done.\u001b[K\n", + "remote: Total 144 (delta 22), reused 70 (delta 7), pack-reused 0 (from 0)\u001b[K\n", + "Receiving objects: 100% (144/144), 211.41 KiB | 478.00 KiB/s, done.\n", + "Resolving deltas: 100% (22/22), done.\n" + ] + } + ], "source": [ "# Download pre-meds script, event config (defining how raw data is converted to meds data), and meds-transform config\n", "!mkdir {ROOT_DIR}/meds-transform/\n", @@ -3661,7 +3647,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 48, "metadata": {}, "outputs": [], "source": [ @@ -3670,7 +3656,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -3688,8 +3674,8 @@ "Additionally, consider reducing N_PARALLEL_WORKERS if > 1\n", "Skipping unzipping.\n", "Running pre-MEDS conversion.\n", - "\u001b[32m2024-12-14 16:18:06.881\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m278\u001b[0m - \u001b[1mLoading table preprocessors from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs/table_preprocessors.yaml...\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.900\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for admissionDx:\n", + "\u001b[32m2024-12-14 17:20:07.072\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m278\u001b[0m - \u001b[1mLoading table preprocessors from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs/table_preprocessors.yaml...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.090\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for admissionDx:\n", "offset_col: admitdxenteredoffset\n", "pseudotime_col: admitDxEnteredTimestamp\n", "output_data_cols:\n", @@ -3699,7 +3685,7 @@ "- How should we use `admitdxtest`?\n", "- How should we use `admitdxpath`?\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.900\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for allergy:\n", + "\u001b[32m2024-12-14 17:20:07.091\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for allergy:\n", "offset_col: allergyenteredoffset\n", "pseudotime_col: allergyEnteredTimestamp\n", "output_data_cols:\n", @@ -3712,21 +3698,21 @@ "- Is `drugName` the name of the drug to which the patient is allergic or the drug\n", " given to the patient (docs say 'name of the selected admission drug')?\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for carePlanGeneral:\n", + "\u001b[32m2024-12-14 17:20:07.091\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for carePlanGeneral:\n", "offset_col: cplitemoffset\n", "pseudotime_col: carePlanGeneralItemEnteredTimestamp\n", "output_data_cols:\n", "- cplgroup\n", "- cplitemvalue\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for carePlanEOL:\n", + "\u001b[32m2024-12-14 17:20:07.091\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for carePlanEOL:\n", "offset_col: cpleoldiscussionoffset\n", "pseudotime_col: carePlanEolDiscussionOccurredTimestamp\n", "warning_items:\n", "- Is the DiscussionOffset time actually reliable? Should we fall back on the SaveOffset\n", " time?\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for carePlanGoal:\n", + "\u001b[32m2024-12-14 17:20:07.092\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for carePlanGoal:\n", "offset_col: cplgoaloffset\n", "pseudotime_col: carePlanGoalEnteredTimestamp\n", "output_data_cols:\n", @@ -3734,7 +3720,7 @@ "- cplgoalvalue\n", "- cplgoalstatus\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for carePlanInfectiousDisease:\n", + "\u001b[32m2024-12-14 17:20:07.092\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for carePlanInfectiousDisease:\n", "offset_col: cplinfectdiseaseoffset\n", "pseudotime_col: carePlanInfectDiseaseEnteredTimestamp\n", "output_data_cols:\n", @@ -3743,7 +3729,7 @@ "- responsetotherapy\n", "- treatment\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for diagnosis:\n", + "\u001b[32m2024-12-14 17:20:07.092\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for diagnosis:\n", "offset_col: diagnosisoffset\n", "pseudotime_col: diagnosisEnteredTimestamp\n", "output_data_cols:\n", @@ -3754,7 +3740,7 @@ "- Though we use it, the `diagnosisString` field documentation is unclear -- by what\n", " is it separated?\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for infusionDrug:\n", + "\u001b[32m2024-12-14 17:20:07.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for infusionDrug:\n", "offset_col: infusionoffset\n", "pseudotime_col: infusionEnteredTimestamp\n", "output_data_cols:\n", @@ -3766,7 +3752,7 @@ "- volumeoffluid\n", "- patientweight\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for lab:\n", + "\u001b[32m2024-12-14 17:20:07.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for lab:\n", "offset_col: labresultoffset\n", "pseudotime_col: labResultDrawnTimestamp\n", "output_data_cols:\n", @@ -3780,7 +3766,7 @@ "- Is this the time the lab was drawn? Entered? The time the result came in?\n", "- We **IGNORE** the `labResultRevisedOffset` column -- this may be a mistake!\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for medication:\n", + "\u001b[32m2024-12-14 17:20:07.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for medication:\n", "offset_col:\n", "- drugorderoffset\n", "- drugstartoffset\n", @@ -3803,7 +3789,7 @@ "warning_items:\n", "- We **IGNORE** the `drugOrderCancelled` column -- this may be a mistake!\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for nurseAssessment:\n", + "\u001b[32m2024-12-14 17:20:07.094\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for nurseAssessment:\n", "offset_col:\n", "- nurseassessoffset\n", "- nurseassessentryoffset\n", @@ -3819,7 +3805,7 @@ "- Should we be using `cellAttributePath` instead of `cellAttribute`?\n", "- SOME MAY BE LISTS\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for nurseCare:\n", + "\u001b[32m2024-12-14 17:20:07.094\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for nurseCare:\n", "offset_col:\n", "- nursecareoffset\n", "- nursecareentryoffset\n", @@ -3835,7 +3821,7 @@ "- Should we be using `cellAttributePath` instead of `cellAttribute`?\n", "- SOME MAY BE LISTS\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for nurseCharting:\n", + "\u001b[32m2024-12-14 17:20:07.095\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for nurseCharting:\n", "offset_col:\n", "- nursingchartoffset\n", "- nursingchartentryoffset\n", @@ -3851,7 +3837,7 @@ "warning_items:\n", "- SOME MAY BE LISTS\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for pastHistory:\n", + "\u001b[32m2024-12-14 17:20:07.095\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for pastHistory:\n", "offset_col:\n", "- pasthistoryoffset\n", "- pasthistoryenteredoffset\n", @@ -3869,7 +3855,7 @@ "- How should we use `pastHistoryPath` vs. `pastHistoryNoteType`?\n", "- How should we use `pastHistoryValue` vs. `pastHistoryValueText`?\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for physicalExam:\n", + "\u001b[32m2024-12-14 17:20:07.096\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for physicalExam:\n", "offset_col: physicalexamoffset\n", "pseudotime_col: physicalExamEnteredTimestamp\n", "output_data_cols:\n", @@ -3881,7 +3867,7 @@ "- How should we use `physicalExamValue` vs. `physicalExamText`?\n", "- I believe the `physicalExamValue` is a **LIST**. This must be processed specially.\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.906\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for respiratoryCare:\n", + "\u001b[32m2024-12-14 17:20:07.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for respiratoryCare:\n", "offset_col:\n", "- respcarestatusoffset\n", "- ventstartoffset\n", @@ -3924,7 +3910,7 @@ "- We might be able to use `priorVent` timestamps to further refine true season of\n", " unit admission.\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.907\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for respiratoryCharting:\n", + "\u001b[32m2024-12-14 17:20:07.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for respiratoryCharting:\n", "offset_col:\n", "- respchartoffset\n", "- respchartentryoffset\n", @@ -3939,7 +3925,7 @@ "warning_items:\n", "- SOME MAY BE LISTS\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.907\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for treatment:\n", + "\u001b[32m2024-12-14 17:20:07.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for treatment:\n", "offset_col: treatmentoffset\n", "pseudotime_col: treatmentEnteredTimestamp\n", "output_data_cols:\n", @@ -3948,7 +3934,7 @@ "warning_items:\n", "- Absence of entries in table do not indicate absence of treatments\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.908\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for vitalAperiodic:\n", + "\u001b[32m2024-12-14 17:20:07.098\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for vitalAperiodic:\n", "offset_col: observationoffset\n", "pseudotime_col: observationEnteredTimestamp\n", "output_data_cols:\n", @@ -3964,7 +3950,7 @@ "- pvr\n", "- pvri\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.908\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for vitalPeriodic:\n", + "\u001b[32m2024-12-14 17:20:07.098\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for vitalPeriodic:\n", "offset_col: observationoffset\n", "pseudotime_col: observationEnteredTimestamp\n", "output_data_cols:\n", @@ -3988,48 +3974,3141 @@ "warning_items:\n", "- These are 5-minute median values. There are going to be a *lot* of events.\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:06.909\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m291\u001b[0m - \u001b[1mReloading processed patient df from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet\u001b[0m\n", - "Done with diagnosis. Continuing\n", - "Done with vitalAperiodic. Continuing\n", - "Done with admissionDx. Continuing\n", - "Done with respiratoryCare. Continuing\n", - "Done with nurseAssessment. Continuing\n", - "Done with vitalPeriodic. Continuing\n", - "Done with carePlanGeneral. Continuing\n", - "Done with carePlanGoal. Continuing\n", - "Done with treatment. Continuing\n", - "\u001b[32m2024-12-14 16:18:07.013\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping apacheApsVar as it is not supported in this pipeline.\u001b[0m\n", - "Done with carePlanEOL. Continuing\n", - "\u001b[32m2024-12-14 16:18:07.013\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m331\u001b[0m - \u001b[33m\u001b[1mNo function needed for infusiondrug. For eICU, THIS IS UNEXPECTED\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:07.013\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping carePlanCareProvider as it is not supported in this pipeline.\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:07.013\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping microLab as it is not supported in this pipeline.\u001b[0m\n", - "Done with nurseCare. Continuing\n", - "Done with physicalExam. Continuing\n", - "Done with respiratoryCharting. Continuing\n", - "\u001b[32m2024-12-14 16:18:07.013\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping note as it is not supported in this pipeline.\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:07.013\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping admissiondrug as it is not supported in this pipeline.\u001b[0m\n", - "Done with lab. Continuing\n", - "\u001b[32m2024-12-14 16:18:07.013\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping apachePredVar as it is not supported in this pipeline.\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:07.013\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping customLab as it is not supported in this pipeline.\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:07.013\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping apachePatientResult as it is not supported in this pipeline.\u001b[0m\n", - "Done with carePlanInfectiousDisease. Continuing\n", - "Done with allergy. Continuing\n", - "Done with nurseCharting. Continuing\n", - "Done with pastHistory. Continuing\n", - "Done with medication. Continuing\n", - "\u001b[32m2024-12-14 16:18:07.014\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping intakeOutput as it is not supported in this pipeline.\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:07.014\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m352\u001b[0m - \u001b[1mDone! All dataframes processed and written to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.098\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m294\u001b[0m - \u001b[1mProcessing patient table first...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.099\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m298\u001b[0m - \u001b[1mLoading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/hospital.csv.gz...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.105\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m302\u001b[0m - \u001b[1mLoading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/patient.csv.gz...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m305\u001b[0m - \u001b[1mProcessing patient table...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.117\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_patient\u001b[0m:\u001b[36m101\u001b[0m - \u001b[1mChecking that the 24h times are consistent. If this is extremely slow, consider refactoring to have only one `.collect()` call.\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.117\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mcheck_timestamps_agree\u001b[0m:\u001b[36m57\u001b[0m - \u001b[1mChecking that stated 24h times are consistent given offsets between {pseudotime_col.name} and hospitaldischargetime24...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.126\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mcheck_timestamps_agree\u001b[0m:\u001b[36m57\u001b[0m - \u001b[1mChecking that stated 24h times are consistent given offsets between {pseudotime_col.name} and hospitaladmittime24...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mcheck_timestamps_agree\u001b[0m:\u001b[36m57\u001b[0m - \u001b[1mChecking that stated 24h times are consistent given offsets between {pseudotime_col.name} and unitadmittime24...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.132\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mcheck_timestamps_agree\u001b[0m:\u001b[36m57\u001b[0m - \u001b[1mChecking that stated 24h times are consistent given offsets between {pseudotime_col.name} and unitdischargetime24...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.135\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_patient\u001b[0m:\u001b[36m109\u001b[0m - \u001b[1mValidated 24h times in 0:00:00.017329\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.135\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_patient\u001b[0m:\u001b[36m111\u001b[0m - \u001b[33m\u001b[1mNOT validating the `unitVisitNumber` column as that isn't implemented yet.\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.135\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_patient\u001b[0m:\u001b[36m113\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING. Check with the eICU team:\n", + " - `apacheAdmissionDx` is not selected from the patients table as we grab it from `admissiondx`. Is this right?\n", + " - `admissionHeight` and `admissionWeight` are interpreted as **unit** admission height/weight, not hospital admission height/weight. Is this right?\n", + " - `age` is interpreted as the age at the time of the unit stay, not the hospital stay. Is this right?\n", + " - `What is the actual mean age for those > 89? Here we assume 90.\n", + " - Note that all the column names appear to be all in lowercase for the csv versions, vs. the docs\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.163\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing diagnosis...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.184\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/diagnosis.csv.gz in 0:00:00.021635\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.184\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for diagnosis table. Check with the eICU team:\n", + " - Though we use it, the `diagnosisString` field documentation is unclear -- by what is it separated?\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.201\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet in 0:00:00.038430\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.201\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing vitalAperiodic...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.360\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/vitalAperiodic.csv.gz in 0:00:00.158846\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.430\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet in 0:00:00.228263\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.430\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing admissionDx...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.437\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/admissionDx.csv.gz in 0:00:00.007296\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.437\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for admissionDx table. Check with the eICU team:\n", + " - How should we use `admitdxtest`?\n", + " - How should we use `admitdxpath`?\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.449\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet in 0:00:00.019188\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.449\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing respiratoryCare...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.459\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/respiratoryCare.csv.gz in 0:00:00.009363\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.459\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for respiratoryCare table. Check with the eICU team:\n", + " - We ignore the `priorVent*` columns -- this may be a mistake!\n", + " - There is a lot of data in this table -- what should be incorporated into the event structure?\n", + " - We might be able to use `priorVent` timestamps to further refine true season of unit admission.\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.472\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet in 0:00:00.022688\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.472\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing nurseAssessment...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.560\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/nurseAssessment.csv.gz in 0:00:00.087465\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.560\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for nurseAssessment table. Check with the eICU team:\n", + " - Should we be using `cellAttributePath` instead of `cellAttribute`?\n", + " - SOME MAY BE LISTS\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.593\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet in 0:00:00.120698\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:07.593\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing vitalPeriodic...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:08.837\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/vitalPeriodic.csv.gz in 0:00:01.244370\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:08.838\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for vitalPeriodic table. Check with the eICU team:\n", + " - These are 5-minute median values. There are going to be a *lot* of events.\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.222\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet in 0:00:01.629149\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.222\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing carePlanGeneral...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.244\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/carePlanGeneral.csv.gz in 0:00:00.021348\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.260\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet in 0:00:00.037551\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.260\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing carePlanGoal...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.263\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/carePlanGoal.csv.gz in 0:00:00.003226\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.273\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet in 0:00:00.012718\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.273\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing treatment...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.296\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/treatment.csv.gz in 0:00:00.023153\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.296\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for treatment table. Check with the eICU team:\n", + " - Absence of entries in table do not indicate absence of treatments\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.320\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet in 0:00:00.046999\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.320\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping apacheApsVar as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.320\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing carePlanEOL...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/carePlanEOL.csv.gz in 0:00:00.000273\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.321\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for carePlanEOL table. Check with the eICU team:\n", + " - Is the DiscussionOffset time actually reliable? Should we fall back on the SaveOffset time?\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.329\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet in 0:00:00.008632\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.329\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m331\u001b[0m - \u001b[33m\u001b[1mNo function needed for infusiondrug. For eICU, THIS IS UNEXPECTED\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.329\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping carePlanCareProvider as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.329\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping microLab as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.329\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing nurseCare...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.368\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/nurseCare.csv.gz in 0:00:00.038466\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.368\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for nurseCare table. Check with the eICU team:\n", + " - Should we be using `cellAttributePath` instead of `cellAttribute`?\n", + " - SOME MAY BE LISTS\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.387\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet in 0:00:00.057232\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.387\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing physicalExam...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.449\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/physicalExam.csv.gz in 0:00:00.062587\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.450\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for physicalExam table. Check with the eICU team:\n", + " - How should we use `physicalExamValue` vs. `physicalExamText`?\n", + " - I believe the `physicalExamValue` is a **LIST**. This must be processed specially.\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet in 0:00:00.096029\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing respiratoryCharting...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.600\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/respiratoryCharting.csv.gz in 0:00:00.116960\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.600\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for respiratoryCharting table. Check with the eICU team:\n", + " - SOME MAY BE LISTS\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.657\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet in 0:00:00.173772\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.657\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping note as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.657\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping admissiondrug as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:09.657\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing lab...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:10.045\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/lab.csv.gz in 0:00:00.387661\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:10.045\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for lab table. Check with the eICU team:\n", + " - Is this the time the lab was drawn? Entered? The time the result came in?\n", + " - We **IGNORE** the `labResultRevisedOffset` column -- this may be a mistake!\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:10.143\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet in 0:00:00.485708\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:10.143\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping apachePredVar as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:10.143\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping customLab as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:10.143\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping apachePatientResult as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:10.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing carePlanInfectiousDisease...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:10.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/carePlanInfectiousDisease.csv.gz in 0:00:00.000544\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:10.153\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet in 0:00:00.009958\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:10.154\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing allergy...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:10.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/allergy.csv.gz in 0:00:00.003649\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:10.157\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for allergy table. Check with the eICU team:\n", + " - How should we use `allergyNoteType`?\n", + " - How should we use `specialtyType`?\n", + " - How should we use `userType`?\n", + " - Is `drugName` the name of the drug to which the patient is allergic or the drug given to the patient (docs say 'name of the selected admission drug')?\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:10.165\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet in 0:00:00.011618\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:10.165\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing nurseCharting...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:11.333\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/nurseCharting.csv.gz in 0:00:01.167813\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:11.334\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for nurseCharting table. Check with the eICU team:\n", + " - SOME MAY BE LISTS\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:11.706\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet in 0:00:01.540825\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:11.707\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing pastHistory...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:11.719\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/pastHistory.csv.gz in 0:00:00.012306\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:11.719\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for pastHistory table. Check with the eICU team:\n", + " - SOME MAY BE LISTS\n", + " - How should we use `pastHistoryPath` vs. `pastHistoryNoteType`?\n", + " - How should we use `pastHistoryValue` vs. `pastHistoryValueText`?\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:11.734\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet in 0:00:00.027972\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:11.735\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing medication...\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:11.859\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/medication.csv.gz in 0:00:00.124266\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:11.859\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for medication table. Check with the eICU team:\n", + " - We **IGNORE** the `drugOrderCancelled` column -- this may be a mistake!\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:11.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet in 0:00:00.168091\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:11.903\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping intakeOutput as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:11.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m352\u001b[0m - \u001b[1mDone! All dataframes processed and written to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds\u001b[0m\n", "Setting N_WORKERS to 1 to avoid issues with the runners.\n", "Running extraction pipeline.\n", - "\u001b[32m2024-12-14 16:18:07.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m324\u001b[0m - \u001b[1mSkipping stage shard_events as it is already complete.\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:07.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m324\u001b[0m - \u001b[1mSkipping stage split_and_shard_subjects as it is already complete.\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:07.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m324\u001b[0m - \u001b[1mSkipping stage convert_to_sharded_events as it is already complete.\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:07.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: merge_to_MEDS_cohort\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:07.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-merge_to_MEDS_cohort --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=merge_to_MEDS_cohort\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:53.120\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 17:20:12.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: shard_events\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:12.472\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-shard_events --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=shard_events\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:14.418\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:14.418\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 17:20:13.013 | INFO | MEDS_transforms.extract.shard_events:main:330 - Running with config:\n", + "input_dir: ${oc.env:EICU_PRE_MEDS_DIR}\n", + "cohort_dir: ${oc.env:EICU_MEDS_COHORT_DIR}\n", + "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", + " at the top of your specific pipeline\n", + "\n", + " configuration file.'\n", + "log_dir: ${stage_cfg.output_dir}/.logs\n", + "do_overwrite: false\n", + "seed: 1\n", + "stages:\n", + "- shard_events\n", + "- split_and_shard_subjects\n", + "- convert_to_sharded_events\n", + "- merge_to_MEDS_cohort\n", + "- finalize_MEDS_metadata\n", + "- finalize_MEDS_data\n", + "stage_configs:\n", + " shard_events:\n", + " row_chunksize: 200000000\n", + " infer_schema_length: 999999999\n", + " data_input_dir: ${input_dir}\n", + " split_and_shard_subjects:\n", + " is_metadata: true\n", + " output_dir: ${cohort_dir}/metadata\n", + " n_subjects_per_shard: 10000\n", + " external_splits_json_fp: null\n", + " split_fracs:\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", + " convert_to_sharded_events:\n", + " do_dedup_text_and_numeric: true\n", + " merge_to_MEDS_cohort:\n", + " unique_by: null\n", + " additional_sort_by: null\n", + " extract_code_metadata:\n", + " is_metadata: true\n", + " description_separator: '\n", + "\n", + " '\n", + " finalize_MEDS_metadata:\n", + " is_metadata: true\n", + " do_retype: true\n", + " finalize_MEDS_data:\n", + " do_retype: true\n", + "worker: 0\n", + "polling_time: 300\n", + "stage: shard_events\n", + "stage_cfg: ${oc.create:${populate_stage:${stage}, ${input_dir}, ${cohort_dir}, ${stages},\n", + " ${stage_configs}}}\n", + "etl_metadata:\n", + " pipeline_name: ???\n", + " dataset_name: eICU\n", + " dataset_version: 2.0\n", + " package_name: ${get_package_name:}\n", + " package_version: ${get_package_version:}\n", + "etl_metadata.pipeline_name: extract\n", + "description: \"This pipeline extracts the eICU dataset in longitudinal, sparse form\\\n", + " \\ from an input dataset meeting\\nselect criteria and converts them to the flattened,\\\n", + " \\ MEDS format. You can control the key arguments to this\\npipeline by setting environment\\\n", + " \\ variables:\\n```bash\\n export EVENT_CONVERSION_CONFIG_FP=# Path to your event\\\n", + " \\ conversion config\\n export EICU_PRE_MEDS_DIR=# Path to the output dir of the\\\n", + " \\ pre-MEDS step\\n export EICU_MEDS_COHORT_DIR=# Path to where you want the dataset\\\n", + " \\ to live\\n```\"\n", + "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", + "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", + "\n", + "Stage: shard_events\n", + "\n", + "Stage config:\n", + "row_chunksize: 200000000\n", + "infer_schema_length: 999999999\n", + "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo//pre_meds/\n", + "is_metadata: false\n", + "metadata_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/metadata\n", + "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events\n", + "reducer_output_dir: null\n", + "\n", + "2024-12-14 17:20:13.017 | INFO | MEDS_transforms.extract.shard_events:main:342 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs/event_configs.yaml to identify needed columns.\n", + "2024-12-14 17:20:13.069 | INFO | MEDS_transforms.extract.shard_events:main:368 - Starting event sub-sharding. Sub-sharding 20 files:\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet\n", + " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet\n", + "2024-12-14 17:20:13.071 | INFO | MEDS_transforms.extract.shard_events:main:372 - Will read raw data from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/$IN_FILE.parquet and write sub-sharded data to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/$IN_FILE/$ROW_START-$ROW_END.parquet\n", + "2024-12-14 17:20:13.073 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL.\n", + "2024-12-14 17:20:13.073 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet to determine row count.\n", + "2024-12-14 17:20:13.075 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.076 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.076 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanEolDiscussionOccurredTimestamp', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:13.082 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanEolDiscussionOccurredTimestamp, patienthealthsystemstayid\n", + "2024-12-14 17:20:13.088 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 15 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet.\n", + "2024-12-14 17:20:13.088 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:13.088 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet row-chunk [0-15) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet.\n", + "2024-12-14 17:20:13.091 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:13.091462. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:13.093 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet\n", + "2024-12-14 17:20:13.093 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.093 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.093 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanEolDiscussionOccurredTimestamp', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:13.093 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanEolDiscussionOccurredTimestamp, patienthealthsystemstayid\n", + "2024-12-14 17:20:13.093 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:13.094 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet\n", + "2024-12-14 17:20:13.101 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010042\n", + "2024-12-14 17:20:13.101 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/.[0-15).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/.[0-15).parquet_cache/locks/2024-12-14T17:20:13.091462.json\n", + "2024-12-14 17:20:13.104 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting.\n", + "2024-12-14 17:20:13.104 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet to determine row count.\n", + "2024-12-14 17:20:13.106 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.106 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.106 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['nursingChartEnteredTimestamp', 'nursingChartPerformedTimestamp', 'nursingchartcelltypecat', 'nursingchartcelltypevallabel', 'nursingchartcelltypevalname', 'nursingchartid', 'nursingchartvalue', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:13.107 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, nursingChartEnteredTimestamp, nursingChartPerformedTimestamp, nursingchartcelltypecat, nursingchartcelltypevallabel, nursingchartcelltypevalname, nursingchartid, nursingchartvalue, patienthealthsystemstayid\n", + "2024-12-14 17:20:13.118 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 1477163 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet.\n", + "2024-12-14 17:20:13.118 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:13.118 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet row-chunk [0-1477163) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet.\n", + "2024-12-14 17:20:13.120 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:13.120700. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:13.121 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet\n", + "2024-12-14 17:20:13.121 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.121 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.121 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['nursingChartEnteredTimestamp', 'nursingChartPerformedTimestamp', 'nursingchartcelltypecat', 'nursingchartcelltypevallabel', 'nursingchartcelltypevalname', 'nursingchartid', 'nursingchartvalue', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:13.121 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, nursingChartEnteredTimestamp, nursingChartPerformedTimestamp, nursingchartcelltypecat, nursingchartcelltypevallabel, nursingchartcelltypevalname, nursingchartid, nursingchartvalue, patienthealthsystemstayid\n", + "2024-12-14 17:20:13.121 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:13.121 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 17:20:13.501 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.380454\n", + "2024-12-14 17:20:13.501 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/.[0-1477163).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/.[0-1477163).parquet_cache/locks/2024-12-14T17:20:13.120700.json\n", + "2024-12-14 17:20:13.504 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication.\n", + "2024-12-14 17:20:13.504 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet to determine row count.\n", + "2024-12-14 17:20:13.506 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.507 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.507 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['dosage', 'drugivadmixture', 'drugname', 'drugordertimestamp', 'drugstarttimestamp', 'drugstoptimestamp', 'frequency', 'gtc', 'loadingdose', 'medicationid', 'patienthealthsystemstayid', 'prn', 'routeadmin']\n", + "2024-12-14 17:20:13.507 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, dosage, drugivadmixture, drugname, drugordertimestamp, drugstarttimestamp, drugstoptimestamp, frequency, gtc, loadingdose, medicationid, patienthealthsystemstayid, prn, routeadmin\n", + "2024-12-14 17:20:13.508 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 75604 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet.\n", + "2024-12-14 17:20:13.508 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:13.508 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet row-chunk [0-75604) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet.\n", + "2024-12-14 17:20:13.510 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:13.510654. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:13.511 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet\n", + "2024-12-14 17:20:13.511 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.511 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.511 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['dosage', 'drugivadmixture', 'drugname', 'drugordertimestamp', 'drugstarttimestamp', 'drugstoptimestamp', 'frequency', 'gtc', 'loadingdose', 'medicationid', 'patienthealthsystemstayid', 'prn', 'routeadmin']\n", + "2024-12-14 17:20:13.511 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, dosage, drugivadmixture, drugname, drugordertimestamp, drugstarttimestamp, drugstoptimestamp, frequency, gtc, loadingdose, medicationid, patienthealthsystemstayid, prn, routeadmin\n", + "2024-12-14 17:20:13.511 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:13.511 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet\n", + "2024-12-14 17:20:13.548 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.037873\n", + "2024-12-14 17:20:13.548 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/.[0-75604).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/.[0-75604).parquet_cache/locks/2024-12-14T17:20:13.510654.json\n", + "2024-12-14 17:20:13.551 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal.\n", + "2024-12-14 17:20:13.551 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet to determine row count.\n", + "2024-12-14 17:20:13.553 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.553 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.553 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanGoalEnteredTimestamp', 'cplgoalcategory', 'cplgoalstatus', 'cplgoalvalue', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:13.553 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanGoalEnteredTimestamp, cplgoalcategory, cplgoalstatus, cplgoalvalue, patienthealthsystemstayid\n", + "2024-12-14 17:20:13.554 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 3633 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet.\n", + "2024-12-14 17:20:13.554 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:13.554 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet row-chunk [0-3633) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet.\n", + "2024-12-14 17:20:13.557 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:13.557013. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:13.557 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet\n", + "2024-12-14 17:20:13.557 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.557 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.557 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanGoalEnteredTimestamp', 'cplgoalcategory', 'cplgoalstatus', 'cplgoalvalue', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:13.557 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanGoalEnteredTimestamp, cplgoalcategory, cplgoalstatus, cplgoalvalue, patienthealthsystemstayid\n", + "2024-12-14 17:20:13.557 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:13.557 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 17:20:13.560 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.003061\n", + "2024-12-14 17:20:13.560 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/.[0-3633).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/.[0-3633).parquet_cache/locks/2024-12-14T17:20:13.557013.json\n", + "2024-12-14 17:20:13.562 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease.\n", + "2024-12-14 17:20:13.562 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet to determine row count.\n", + "2024-12-14 17:20:13.564 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.564 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.564 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanInfectDiseaseEnteredTimestamp', 'infectdiseaseassessment', 'infectdiseasesite', 'patienthealthsystemstayid', 'responsetotherapy', 'treatment']\n", + "2024-12-14 17:20:13.564 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanInfectDiseaseEnteredTimestamp, infectdiseaseassessment, infectdiseasesite, patienthealthsystemstayid, responsetotherapy, treatment\n", + "2024-12-14 17:20:13.565 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 112 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet.\n", + "2024-12-14 17:20:13.565 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:13.565 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet row-chunk [0-112) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet.\n", + "2024-12-14 17:20:13.567 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:13.567600. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:13.567 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet\n", + "2024-12-14 17:20:13.567 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.567 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.568 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanInfectDiseaseEnteredTimestamp', 'infectdiseaseassessment', 'infectdiseasesite', 'patienthealthsystemstayid', 'responsetotherapy', 'treatment']\n", + "2024-12-14 17:20:13.568 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanInfectDiseaseEnteredTimestamp, infectdiseaseassessment, infectdiseasesite, patienthealthsystemstayid, responsetotherapy, treatment\n", + "2024-12-14 17:20:13.568 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:13.568 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 17:20:13.569 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.001941\n", + "2024-12-14 17:20:13.569 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/.[0-112).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/.[0-112).parquet_cache/locks/2024-12-14T17:20:13.567600.json\n", + "2024-12-14 17:20:13.571 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic.\n", + "2024-12-14 17:20:13.571 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet to determine row count.\n", + "2024-12-14 17:20:13.574 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.574 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.574 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cvp', 'etco2', 'heartrate', 'icp', 'observationEnteredTimestamp', 'padiastolic', 'pamean', 'pasystolic', 'patienthealthsystemstayid', 'respiration', 'sao2', 'st1', 'st2', 'st3', 'systemicdiastolic', 'systemicmean', 'systemicsystolic', 'temperature', 'vitalperiodicid']\n", + "2024-12-14 17:20:13.574 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cvp, etco2, heartrate, icp, observationEnteredTimestamp, padiastolic, pamean, pasystolic, patienthealthsystemstayid, respiration, sao2, st1, st2, st3, systemicdiastolic, systemicmean, systemicsystolic, temperature, vitalperiodicid\n", + "2024-12-14 17:20:13.576 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 1634960 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet.\n", + "2024-12-14 17:20:13.576 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:13.576 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet row-chunk [0-1634960) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet.\n", + "2024-12-14 17:20:13.579 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:13.578964. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:13.579 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet\n", + "2024-12-14 17:20:13.579 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.579 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.579 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cvp', 'etco2', 'heartrate', 'icp', 'observationEnteredTimestamp', 'padiastolic', 'pamean', 'pasystolic', 'patienthealthsystemstayid', 'respiration', 'sao2', 'st1', 'st2', 'st3', 'systemicdiastolic', 'systemicmean', 'systemicsystolic', 'temperature', 'vitalperiodicid']\n", + "2024-12-14 17:20:13.579 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cvp, etco2, heartrate, icp, observationEnteredTimestamp, padiastolic, pamean, pasystolic, patienthealthsystemstayid, respiration, sao2, st1, st2, st3, systemicdiastolic, systemicmean, systemicsystolic, temperature, vitalperiodicid\n", + "2024-12-14 17:20:13.579 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:13.579 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 17:20:13.942 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.363615\n", + "2024-12-14 17:20:13.942 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/.[0-1634960).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/.[0-1634960).parquet_cache/locks/2024-12-14T17:20:13.578964.json\n", + "2024-12-14 17:20:13.945 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient.\n", + "2024-12-14 17:20:13.945 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet to determine row count.\n", + "2024-12-14 17:20:13.947 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.947 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.947 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['dateofbirth', 'ethnicity', 'gender', 'hospitaladmitsource', 'hospitaladmittimestamp', 'hospitaldischargelocation', 'hospitaldischargestatus', 'hospitaldischargetimestamp', 'hospitalid', 'hospitalnumbedscategory', 'hospitalregion', 'hospitalteachingstatus', 'patienthealthsystemstayid', 'patientunitstayid', 'uniquepid', 'unitadmissionheight', 'unitadmissionweight', 'unitadmitsource', 'unitadmittimestamp', 'unitdischargelocation', 'unitdischargestatus', 'unitdischargetimestamp', 'unitdischargeweight', 'unitstaytype', 'wardid']\n", + "2024-12-14 17:20:13.948 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, dateofbirth, ethnicity, gender, hospitaladmitsource, hospitaladmittimestamp, hospitaldischargelocation, hospitaldischargestatus, hospitaldischargetimestamp, hospitalid, hospitalnumbedscategory, hospitalregion, hospitalteachingstatus, patienthealthsystemstayid, patientunitstayid, uniquepid, unitadmissionheight, unitadmissionweight, unitadmitsource, unitadmittimestamp, unitdischargelocation, unitdischargestatus, unitdischargetimestamp, unitdischargeweight, unitstaytype, wardid\n", + "2024-12-14 17:20:13.948 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 2520 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet.\n", + "2024-12-14 17:20:13.948 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:13.948 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet row-chunk [0-2520) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet.\n", + "2024-12-14 17:20:13.951 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:13.950962. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:13.951 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet\n", + "2024-12-14 17:20:13.951 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.951 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.951 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['dateofbirth', 'ethnicity', 'gender', 'hospitaladmitsource', 'hospitaladmittimestamp', 'hospitaldischargelocation', 'hospitaldischargestatus', 'hospitaldischargetimestamp', 'hospitalid', 'hospitalnumbedscategory', 'hospitalregion', 'hospitalteachingstatus', 'patienthealthsystemstayid', 'patientunitstayid', 'uniquepid', 'unitadmissionheight', 'unitadmissionweight', 'unitadmitsource', 'unitadmittimestamp', 'unitdischargelocation', 'unitdischargestatus', 'unitdischargetimestamp', 'unitdischargeweight', 'unitstaytype', 'wardid']\n", + "2024-12-14 17:20:13.951 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, dateofbirth, ethnicity, gender, hospitaladmitsource, hospitaladmittimestamp, hospitaldischargelocation, hospitaldischargestatus, hospitaldischargetimestamp, hospitalid, hospitalnumbedscategory, hospitalregion, hospitalteachingstatus, patienthealthsystemstayid, patientunitstayid, uniquepid, unitadmissionheight, unitadmissionweight, unitadmitsource, unitadmittimestamp, unitdischargelocation, unitdischargestatus, unitdischargetimestamp, unitdischargeweight, unitstaytype, wardid\n", + "2024-12-14 17:20:13.951 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:13.951 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet\n", + "2024-12-14 17:20:13.961 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010891\n", + "2024-12-14 17:20:13.961 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/.[0-2520).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/.[0-2520).parquet_cache/locks/2024-12-14T17:20:13.950962.json\n", + "2024-12-14 17:20:13.964 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab.\n", + "2024-12-14 17:20:13.964 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet to determine row count.\n", + "2024-12-14 17:20:13.966 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.967 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.967 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['labResultDrawnTimestamp', 'labmeasurenameinterface', 'labmeasurenamesystem', 'labname', 'labresult', 'labresulttext', 'labtypeid', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:13.967 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, labResultDrawnTimestamp, labmeasurenameinterface, labmeasurenamesystem, labname, labresult, labresulttext, labtypeid, patienthealthsystemstayid\n", + "2024-12-14 17:20:13.969 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 434660 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet.\n", + "2024-12-14 17:20:13.969 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:13.969 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet row-chunk [0-434660) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet.\n", + "2024-12-14 17:20:13.972 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:13.972482. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:13.972 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet\n", + "2024-12-14 17:20:13.972 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:13.972 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:13.972 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['labResultDrawnTimestamp', 'labmeasurenameinterface', 'labmeasurenamesystem', 'labname', 'labresult', 'labresulttext', 'labtypeid', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:13.973 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, labResultDrawnTimestamp, labmeasurenameinterface, labmeasurenamesystem, labname, labresult, labresulttext, labtypeid, patienthealthsystemstayid\n", + "2024-12-14 17:20:13.973 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:13.973 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet\n", + "2024-12-14 17:20:14.058 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.085536\n", + "2024-12-14 17:20:14.058 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/.[0-434660).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/.[0-434660).parquet_cache/locks/2024-12-14T17:20:13.972482.json\n", + "2024-12-14 17:20:14.061 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment.\n", + "2024-12-14 17:20:14.061 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet to determine row count.\n", + "2024-12-14 17:20:14.063 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.063 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.063 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['patienthealthsystemstayid', 'treatmentEnteredTimestamp', 'treatmentid', 'treatmentstring']\n", + "2024-12-14 17:20:14.063 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, patienthealthsystemstayid, treatmentEnteredTimestamp, treatmentid, treatmentstring\n", + "2024-12-14 17:20:14.064 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 38290 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet.\n", + "2024-12-14 17:20:14.064 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:14.064 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet row-chunk [0-38290) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet.\n", + "2024-12-14 17:20:14.066 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.066637. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:14.066 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet\n", + "2024-12-14 17:20:14.067 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.067 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.067 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['patienthealthsystemstayid', 'treatmentEnteredTimestamp', 'treatmentid', 'treatmentstring']\n", + "2024-12-14 17:20:14.067 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, patienthealthsystemstayid, treatmentEnteredTimestamp, treatmentid, treatmentstring\n", + "2024-12-14 17:20:14.067 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:14.067 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet\n", + "2024-12-14 17:20:14.076 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.009360\n", + "2024-12-14 17:20:14.076 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/.[0-38290).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/.[0-38290).parquet_cache/locks/2024-12-14T17:20:14.066637.json\n", + "2024-12-14 17:20:14.078 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment.\n", + "2024-12-14 17:20:14.079 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet to determine row count.\n", + "2024-12-14 17:20:14.080 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.081 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.081 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cellattribute', 'cellattributevalue', 'celllabel', 'nurseAssessEnteredTimestamp', 'nurseAssessPerformedTimestamp', 'nurseassessid', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:14.081 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cellattribute, cellattributevalue, celllabel, nurseAssessEnteredTimestamp, nurseAssessPerformedTimestamp, nurseassessid, patienthealthsystemstayid\n", + "2024-12-14 17:20:14.081 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 91589 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet.\n", + "2024-12-14 17:20:14.081 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:14.081 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet row-chunk [0-91589) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet.\n", + "2024-12-14 17:20:14.084 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.084291. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:14.084 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet\n", + "2024-12-14 17:20:14.084 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.084 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.084 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cellattribute', 'cellattributevalue', 'celllabel', 'nurseAssessEnteredTimestamp', 'nurseAssessPerformedTimestamp', 'nurseassessid', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:14.084 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cellattribute, cellattributevalue, celllabel, nurseAssessEnteredTimestamp, nurseAssessPerformedTimestamp, nurseassessid, patienthealthsystemstayid\n", + "2024-12-14 17:20:14.084 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:14.085 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 17:20:14.106 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.021893\n", + "2024-12-14 17:20:14.106 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/.[0-91589).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/.[0-91589).parquet_cache/locks/2024-12-14T17:20:14.084291.json\n", + "2024-12-14 17:20:14.108 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam.\n", + "2024-12-14 17:20:14.108 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet to determine row count.\n", + "2024-12-14 17:20:14.110 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.110 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.111 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['patienthealthsystemstayid', 'physicalExamEnteredTimestamp', 'physicalexamid', 'physicalexampath', 'physicalexamtext', 'physicalexamvalue']\n", + "2024-12-14 17:20:14.111 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, patienthealthsystemstayid, physicalExamEnteredTimestamp, physicalexamid, physicalexampath, physicalexamtext, physicalexamvalue\n", + "2024-12-14 17:20:14.111 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 84058 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet.\n", + "2024-12-14 17:20:14.111 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:14.111 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet row-chunk [0-84058) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet.\n", + "2024-12-14 17:20:14.114 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.113975. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:14.114 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet\n", + "2024-12-14 17:20:14.114 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.114 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.114 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['patienthealthsystemstayid', 'physicalExamEnteredTimestamp', 'physicalexamid', 'physicalexampath', 'physicalexamtext', 'physicalexamvalue']\n", + "2024-12-14 17:20:14.114 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, patienthealthsystemstayid, physicalExamEnteredTimestamp, physicalexamid, physicalexampath, physicalexamtext, physicalexamvalue\n", + "2024-12-14 17:20:14.114 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:14.114 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet\n", + "2024-12-14 17:20:14.139 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.025145\n", + "2024-12-14 17:20:14.139 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/.[0-84058).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/.[0-84058).parquet_cache/locks/2024-12-14T17:20:14.113975.json\n", + "2024-12-14 17:20:14.141 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx.\n", + "2024-12-14 17:20:14.141 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet to determine row count.\n", + "2024-12-14 17:20:14.143 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.144 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.144 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['admissiondxid', 'admitDxEnteredTimestamp', 'admitdxname', 'patienthealthsystemstayid', 'patientunitstayid']\n", + "2024-12-14 17:20:14.144 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, admissiondxid, admitDxEnteredTimestamp, admitdxname, patienthealthsystemstayid, patientunitstayid\n", + "2024-12-14 17:20:14.144 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 7578 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet.\n", + "2024-12-14 17:20:14.144 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:14.144 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet row-chunk [0-7578) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet.\n", + "2024-12-14 17:20:14.146 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.146860. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:14.147 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet\n", + "2024-12-14 17:20:14.147 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.147 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.147 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['admissiondxid', 'admitDxEnteredTimestamp', 'admitdxname', 'patienthealthsystemstayid', 'patientunitstayid']\n", + "2024-12-14 17:20:14.147 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, admissiondxid, admitDxEnteredTimestamp, admitdxname, patienthealthsystemstayid, patientunitstayid\n", + "2024-12-14 17:20:14.147 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:14.147 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet\n", + "2024-12-14 17:20:14.150 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.003467\n", + "2024-12-14 17:20:14.150 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/.[0-7578).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/.[0-7578).parquet_cache/locks/2024-12-14T17:20:14.146860.json\n", + "2024-12-14 17:20:14.152 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis.\n", + "2024-12-14 17:20:14.152 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet to determine row count.\n", + "2024-12-14 17:20:14.155 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.155 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.155 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['diagnosisEnteredTimestamp', 'diagnosispriority', 'diagnosisstring', 'icd9code', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:14.155 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, diagnosisEnteredTimestamp, diagnosispriority, diagnosisstring, icd9code, patienthealthsystemstayid\n", + "2024-12-14 17:20:14.155 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 24978 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet.\n", + "2024-12-14 17:20:14.155 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:14.155 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet row-chunk [0-24978) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet.\n", + "2024-12-14 17:20:14.158 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.158055. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:14.158 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet\n", + "2024-12-14 17:20:14.158 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.158 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.158 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['diagnosisEnteredTimestamp', 'diagnosispriority', 'diagnosisstring', 'icd9code', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:14.158 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, diagnosisEnteredTimestamp, diagnosispriority, diagnosisstring, icd9code, patienthealthsystemstayid\n", + "2024-12-14 17:20:14.158 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:14.158 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet\n", + "2024-12-14 17:20:14.164 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006441\n", + "2024-12-14 17:20:14.164 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/.[0-24978).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/.[0-24978).parquet_cache/locks/2024-12-14T17:20:14.158055.json\n", + "2024-12-14 17:20:14.166 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting.\n", + "2024-12-14 17:20:14.167 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet to determine row count.\n", + "2024-12-14 17:20:14.169 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.169 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.169 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['patienthealthsystemstayid', 'respChartEnteredTimestamp', 'respChartPerformedTimestamp', 'respchartid', 'respcharttypecat', 'respchartvalue', 'respchartvaluelabel']\n", + "2024-12-14 17:20:14.169 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, patienthealthsystemstayid, respChartEnteredTimestamp, respChartPerformedTimestamp, respchartid, respcharttypecat, respchartvalue, respchartvaluelabel\n", + "2024-12-14 17:20:14.170 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 176089 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet.\n", + "2024-12-14 17:20:14.170 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:14.170 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet row-chunk [0-176089) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet.\n", + "2024-12-14 17:20:14.172 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.172792. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:14.173 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet\n", + "2024-12-14 17:20:14.173 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.173 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.173 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['patienthealthsystemstayid', 'respChartEnteredTimestamp', 'respChartPerformedTimestamp', 'respchartid', 'respcharttypecat', 'respchartvalue', 'respchartvaluelabel']\n", + "2024-12-14 17:20:14.173 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, patienthealthsystemstayid, respChartEnteredTimestamp, respChartPerformedTimestamp, respchartid, respcharttypecat, respchartvalue, respchartvaluelabel\n", + "2024-12-14 17:20:14.173 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:14.173 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 17:20:14.217 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.044580\n", + "2024-12-14 17:20:14.217 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/.[0-176089).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/.[0-176089).parquet_cache/locks/2024-12-14T17:20:14.172792.json\n", + "2024-12-14 17:20:14.220 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare.\n", + "2024-12-14 17:20:14.220 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet to determine row count.\n", + "2024-12-14 17:20:14.222 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.222 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.222 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cellattribute', 'cellattributevalue', 'celllabel', 'nurseCareEnteredTimestamp', 'nurseCarePerformedTimestamp', 'nursecareid', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:14.222 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cellattribute, cellattributevalue, celllabel, nurseCareEnteredTimestamp, nurseCarePerformedTimestamp, nursecareid, patienthealthsystemstayid\n", + "2024-12-14 17:20:14.223 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 42080 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet.\n", + "2024-12-14 17:20:14.223 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:14.223 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet row-chunk [0-42080) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet.\n", + "2024-12-14 17:20:14.225 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.225610. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:14.225 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet\n", + "2024-12-14 17:20:14.225 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.226 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.226 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cellattribute', 'cellattributevalue', 'celllabel', 'nurseCareEnteredTimestamp', 'nurseCarePerformedTimestamp', 'nursecareid', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:14.226 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cellattribute, cellattributevalue, celllabel, nurseCareEnteredTimestamp, nurseCarePerformedTimestamp, nursecareid, patienthealthsystemstayid\n", + "2024-12-14 17:20:14.226 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:14.226 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet\n", + "2024-12-14 17:20:14.237 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.012115\n", + "2024-12-14 17:20:14.237 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/.[0-42080).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/.[0-42080).parquet_cache/locks/2024-12-14T17:20:14.225610.json\n", + "2024-12-14 17:20:14.240 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral.\n", + "2024-12-14 17:20:14.240 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet to determine row count.\n", + "2024-12-14 17:20:14.242 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.242 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.242 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanGeneralItemEnteredTimestamp', 'cplgroup', 'cplitemvalue', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:14.242 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanGeneralItemEnteredTimestamp, cplgroup, cplitemvalue, patienthealthsystemstayid\n", + "2024-12-14 17:20:14.243 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 33148 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet.\n", + "2024-12-14 17:20:14.243 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:14.243 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet row-chunk [0-33148) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet.\n", + "2024-12-14 17:20:14.246 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.245975. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:14.246 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet\n", + "2024-12-14 17:20:14.246 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.246 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.246 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanGeneralItemEnteredTimestamp', 'cplgroup', 'cplitemvalue', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:14.246 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanGeneralItemEnteredTimestamp, cplgroup, cplitemvalue, patienthealthsystemstayid\n", + "2024-12-14 17:20:14.246 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:14.246 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 17:20:14.252 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006840\n", + "2024-12-14 17:20:14.252 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/.[0-33148).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/.[0-33148).parquet_cache/locks/2024-12-14T17:20:14.245975.json\n", + "2024-12-14 17:20:14.255 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory.\n", + "2024-12-14 17:20:14.255 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet to determine row count.\n", + "2024-12-14 17:20:14.257 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.257 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.257 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['pastHistoryEnteredTimestamp', 'pastHistoryTakenTimestamp', 'pasthistoryid', 'pasthistorynotetype', 'pasthistorypath', 'pasthistoryvalue', 'pasthistoryvaluetext', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:14.258 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, pastHistoryEnteredTimestamp, pastHistoryTakenTimestamp, pasthistoryid, pasthistorynotetype, pasthistorypath, pasthistoryvalue, pasthistoryvaluetext, patienthealthsystemstayid\n", + "2024-12-14 17:20:14.258 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 12109 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet.\n", + "2024-12-14 17:20:14.258 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:14.258 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet row-chunk [0-12109) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet.\n", + "2024-12-14 17:20:14.260 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.260683. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:14.260 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet\n", + "2024-12-14 17:20:14.261 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.261 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.261 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['pastHistoryEnteredTimestamp', 'pastHistoryTakenTimestamp', 'pasthistoryid', 'pasthistorynotetype', 'pasthistorypath', 'pasthistoryvalue', 'pasthistoryvaluetext', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:14.261 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, pastHistoryEnteredTimestamp, pastHistoryTakenTimestamp, pasthistoryid, pasthistorynotetype, pasthistorypath, pasthistoryvalue, pasthistoryvaluetext, patienthealthsystemstayid\n", + "2024-12-14 17:20:14.261 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:14.261 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet\n", + "2024-12-14 17:20:14.268 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007372\n", + "2024-12-14 17:20:14.268 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/.[0-12109).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/.[0-12109).parquet_cache/locks/2024-12-14T17:20:14.260683.json\n", + "2024-12-14 17:20:14.270 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic.\n", + "2024-12-14 17:20:14.270 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet to determine row count.\n", + "2024-12-14 17:20:14.273 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.273 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.273 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cardiacinput', 'cardiacoutput', 'noninvasivediastolic', 'noninvasivemean', 'noninvasivesystolic', 'observationEnteredTimestamp', 'paop', 'patienthealthsystemstayid', 'pvr', 'pvri', 'svr', 'svri', 'vitalaperiodicid']\n", + "2024-12-14 17:20:14.273 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cardiacinput, cardiacoutput, noninvasivediastolic, noninvasivemean, noninvasivesystolic, observationEnteredTimestamp, paop, patienthealthsystemstayid, pvr, pvri, svr, svri, vitalaperiodicid\n", + "2024-12-14 17:20:14.274 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 274088 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet.\n", + "2024-12-14 17:20:14.274 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:14.274 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet row-chunk [0-274088) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet.\n", + "2024-12-14 17:20:14.276 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.276629. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:14.276 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet\n", + "2024-12-14 17:20:14.276 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.277 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.277 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cardiacinput', 'cardiacoutput', 'noninvasivediastolic', 'noninvasivemean', 'noninvasivesystolic', 'observationEnteredTimestamp', 'paop', 'patienthealthsystemstayid', 'pvr', 'pvri', 'svr', 'svri', 'vitalaperiodicid']\n", + "2024-12-14 17:20:14.277 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cardiacinput, cardiacoutput, noninvasivediastolic, noninvasivemean, noninvasivesystolic, observationEnteredTimestamp, paop, patienthealthsystemstayid, pvr, pvri, svr, svri, vitalaperiodicid\n", + "2024-12-14 17:20:14.277 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:14.277 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 17:20:14.323 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.046469\n", + "2024-12-14 17:20:14.323 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/.[0-274088).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/.[0-274088).parquet_cache/locks/2024-12-14T17:20:14.276629.json\n", + "2024-12-14 17:20:14.326 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy.\n", + "2024-12-14 17:20:14.326 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet to determine row count.\n", + "2024-12-14 17:20:14.328 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.328 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.328 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['allergyEnteredTimestamp', 'allergyname', 'allergytype', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:14.328 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, allergyEnteredTimestamp, allergyname, allergytype, patienthealthsystemstayid\n", + "2024-12-14 17:20:14.329 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 2475 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet.\n", + "2024-12-14 17:20:14.329 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:14.329 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet row-chunk [0-2475) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet.\n", + "2024-12-14 17:20:14.331 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.331616. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:14.331 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet\n", + "2024-12-14 17:20:14.331 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.332 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.332 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['allergyEnteredTimestamp', 'allergyname', 'allergytype', 'patienthealthsystemstayid']\n", + "2024-12-14 17:20:14.332 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, allergyEnteredTimestamp, allergyname, allergytype, patienthealthsystemstayid\n", + "2024-12-14 17:20:14.332 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:14.332 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet\n", + "2024-12-14 17:20:14.334 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.002488\n", + "2024-12-14 17:20:14.334 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/.[0-2475).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/.[0-2475).parquet_cache/locks/2024-12-14T17:20:14.331616.json\n", + "2024-12-14 17:20:14.336 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare.\n", + "2024-12-14 17:20:14.336 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet to determine row count.\n", + "2024-12-14 17:20:14.338 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.338 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.338 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['airwayposition', 'airwaysize', 'airwaytype', 'cpaplimit', 'cuffpressure', 'hiexhmvlimit', 'highironoxlimit', 'hipeakpreslimit', 'hirespratelimit', 'lowexhmvlimit', 'lowexhtvlimit', 'lowironoxlimit', 'lowpeakpreslimit', 'lowrespratelimit', 'meanairwaypreslimit', 'patienthealthsystemstayid', 'peeplimit', 'respCareStatusEnteredTimestamp', 'respcareid', 'setapneafio2', 'setapneaie', 'setapneainsptime', 'setapneainterval', 'setapneaippeephigh', 'setapneapeakflow', 'setapnearr', 'setapneatv', 'sighpreslimit', 'ventEndTimestamp', 'ventStartTimestamp']\n", + "2024-12-14 17:20:14.339 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, airwayposition, airwaysize, airwaytype, cpaplimit, cuffpressure, hiexhmvlimit, highironoxlimit, hipeakpreslimit, hirespratelimit, lowexhmvlimit, lowexhtvlimit, lowironoxlimit, lowpeakpreslimit, lowrespratelimit, meanairwaypreslimit, patienthealthsystemstayid, peeplimit, respCareStatusEnteredTimestamp, respcareid, setapneafio2, setapneaie, setapneainsptime, setapneainterval, setapneaippeephigh, setapneapeakflow, setapnearr, setapneatv, sighpreslimit, ventEndTimestamp, ventStartTimestamp\n", + "2024-12-14 17:20:14.339 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 5436 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet.\n", + "2024-12-14 17:20:14.339 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 17:20:14.339 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet row-chunk [0-5436) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet.\n", + "2024-12-14 17:20:14.341 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.341778. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:14.342 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet\n", + "2024-12-14 17:20:14.342 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 17:20:14.342 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 17:20:14.342 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['airwayposition', 'airwaysize', 'airwaytype', 'cpaplimit', 'cuffpressure', 'hiexhmvlimit', 'highironoxlimit', 'hipeakpreslimit', 'hirespratelimit', 'lowexhmvlimit', 'lowexhtvlimit', 'lowironoxlimit', 'lowpeakpreslimit', 'lowrespratelimit', 'meanairwaypreslimit', 'patienthealthsystemstayid', 'peeplimit', 'respCareStatusEnteredTimestamp', 'respcareid', 'setapneafio2', 'setapneaie', 'setapneainsptime', 'setapneainterval', 'setapneaippeephigh', 'setapneapeakflow', 'setapnearr', 'setapneatv', 'sighpreslimit', 'ventEndTimestamp', 'ventStartTimestamp']\n", + "2024-12-14 17:20:14.342 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, airwayposition, airwaysize, airwaytype, cpaplimit, cuffpressure, hiexhmvlimit, highironoxlimit, hipeakpreslimit, hirespratelimit, lowexhmvlimit, lowexhtvlimit, lowironoxlimit, lowpeakpreslimit, lowrespratelimit, meanairwaypreslimit, patienthealthsystemstayid, peeplimit, respCareStatusEnteredTimestamp, respcareid, setapneafio2, setapneaie, setapneainsptime, setapneainterval, setapneaippeephigh, setapneapeakflow, setapnearr, setapneatv, sighpreslimit, ventEndTimestamp, ventStartTimestamp\n", + "2024-12-14 17:20:14.342 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:14.342 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 17:20:14.357 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.016024\n", + "2024-12-14 17:20:14.357 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/.[0-5436).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/.[0-5436).parquet_cache/locks/2024-12-14T17:20:14.341778.json\n", + "2024-12-14 17:20:14.358 | INFO | MEDS_transforms.extract.shard_events:main:430 - Sub-sharding completed in 0:00:01.286791\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:14.432\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: split_and_shard_subjects\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:14.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-split_and_shard_subjects --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=split_and_shard_subjects\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:15.172\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:15.172\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 17:20:14.978 | INFO | MEDS_transforms.utils:stage_init:73 - Running split_and_shard_subjects with the following configuration:\n", + "input_dir: ${oc.env:EICU_PRE_MEDS_DIR}\n", + "cohort_dir: ${oc.env:EICU_MEDS_COHORT_DIR}\n", + "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", + " at the top of your specific pipeline\n", + "\n", + " configuration file.'\n", + "log_dir: ${stage_cfg.output_dir}/.logs\n", + "do_overwrite: false\n", + "seed: 1\n", + "stages:\n", + "- shard_events\n", + "- split_and_shard_subjects\n", + "- convert_to_sharded_events\n", + "- merge_to_MEDS_cohort\n", + "- finalize_MEDS_metadata\n", + "- finalize_MEDS_data\n", + "stage_configs:\n", + " shard_events:\n", + " row_chunksize: 200000000\n", + " infer_schema_length: 999999999\n", + " data_input_dir: ${input_dir}\n", + " split_and_shard_subjects:\n", + " is_metadata: true\n", + " output_dir: ${cohort_dir}/metadata\n", + " n_subjects_per_shard: 10000\n", + " external_splits_json_fp: null\n", + " split_fracs:\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", + " convert_to_sharded_events:\n", + " do_dedup_text_and_numeric: true\n", + " merge_to_MEDS_cohort:\n", + " unique_by: null\n", + " additional_sort_by: null\n", + " extract_code_metadata:\n", + " is_metadata: true\n", + " description_separator: '\n", + "\n", + " '\n", + " finalize_MEDS_metadata:\n", + " is_metadata: true\n", + " do_retype: true\n", + " finalize_MEDS_data:\n", + " do_retype: true\n", + "worker: 0\n", + "polling_time: 300\n", + "stage: split_and_shard_subjects\n", + "stage_cfg: ${oc.create:${populate_stage:${stage}, ${input_dir}, ${cohort_dir}, ${stages},\n", + " ${stage_configs}}}\n", + "etl_metadata:\n", + " pipeline_name: ???\n", + " dataset_name: eICU\n", + " dataset_version: 2.0\n", + " package_name: ${get_package_name:}\n", + " package_version: ${get_package_version:}\n", + "etl_metadata.pipeline_name: extract\n", + "description: \"This pipeline extracts the eICU dataset in longitudinal, sparse form\\\n", + " \\ from an input dataset meeting\\nselect criteria and converts them to the flattened,\\\n", + " \\ MEDS format. You can control the key arguments to this\\npipeline by setting environment\\\n", + " \\ variables:\\n```bash\\n export EVENT_CONVERSION_CONFIG_FP=# Path to your event\\\n", + " \\ conversion config\\n export EICU_PRE_MEDS_DIR=# Path to the output dir of the\\\n", + " \\ pre-MEDS step\\n export EICU_MEDS_COHORT_DIR=# Path to where you want the dataset\\\n", + " \\ to live\\n```\"\n", + "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", + "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", + "\n", + "2024-12-14 17:20:14.991 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "is_metadata: true\n", + "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo//meds//metadata\n", + "n_subjects_per_shard: 10000\n", + "external_splits_json_fp: null\n", + "split_fracs:\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", + "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events\n", + "metadata_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/metadata\n", + "reducer_output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects\n", + "train_only: true\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events\n", + " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata\n", + " - metadata_input_dir: ❌ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/metadata\n", + "2024-12-14 17:20:14.992 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:215 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs/event_configs.yaml (needed for subject ID columns)\n", + "2024-12-14 17:20:15.041 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:219 - Event conversion config:\n", + "subject_id_col: patienthealthsystemstayid\n", + "patient:\n", + " dob:\n", + " code: MEDS_BIRTH\n", + " time: col(dateofbirth)\n", + " uniquepid: uniquepid\n", + " gender:\n", + " code:\n", + " - GENDER\n", + " - col(gender)\n", + " time: null\n", + " ethnicity:\n", + " code:\n", + " - ETHNICITY\n", + " - col(ethnicity)\n", + " time: null\n", + " hosp_admission:\n", + " code:\n", + " - HOSPITAL_ADMISSION\n", + " - col(hospitaladmitsource)\n", + " - col(hospitalregion)\n", + " - col(hospitalteachingstatus)\n", + " - col(hospitalnumbedscategory)\n", + " time: col(hospitaladmittimestamp)\n", + " hospital_id: hospitalid\n", + " hosp_discharge:\n", + " code:\n", + " - HOSPITAL_DISCHARGE\n", + " - col(hospitaldischargestatus)\n", + " - col(hospitaldischargelocation)\n", + " time: col(hospitaldischargetimestamp)\n", + " unit_admission:\n", + " code:\n", + " - UNIT_ADMISSION\n", + " - col(unitadmitsource)\n", + " - col(unitstaytype)\n", + " time: col(unitadmittimestamp)\n", + " ward_id: wardid\n", + " unit_stay_id: patientunitstayid\n", + " unit_admission_weight:\n", + " code:\n", + " - UNIT_ADMISSION_WEIGHT\n", + " time: col(unitadmittimestamp)\n", + " numeric_value: unitadmissionweight\n", + " unit_admission_height:\n", + " code:\n", + " - UNIT_ADMISSION_HEIGHT\n", + " time: col(unitadmittimestamp)\n", + " numeric_value: unitadmissionheight\n", + " unit_discharge:\n", + " code:\n", + " - UNIT_DISCHARGE\n", + " - col(unitdischargestatus)\n", + " - col(unitdischargelocation)\n", + " time: col(unitdischargetimestamp)\n", + " unit_discharge_weight:\n", + " code:\n", + " - UNIT_DISCHARGE_WEIGHT\n", + " time: col(unitdischargetimestamp)\n", + " numeric_value: unitdischargeweight\n", + "admissionDx:\n", + " admission_diagnosis:\n", + " code:\n", + " - ADMISSION_DX\n", + " - col(admitdxname)\n", + " time: col(admitDxEnteredTimestamp)\n", + " admission_dx_id: admissiondxid\n", + " unit_stay_id: patientunitstayid\n", + "allergy:\n", + " allergy:\n", + " code:\n", + " - ALLERGY\n", + " - col(allergytype)\n", + " - col(allergyname)\n", + " time: col(allergyEnteredTimestamp)\n", + "carePlanGeneral:\n", + " cplItem:\n", + " code:\n", + " - CAREPLAN_GENERAL\n", + " - col(cplgroup)\n", + " - col(cplitemvalue)\n", + " time: col(carePlanGeneralItemEnteredTimestamp)\n", + "carePlanEOL:\n", + " cplEolDiscussion:\n", + " code:\n", + " - CAREPLAN_EOL\n", + " time: col(carePlanEolDiscussionOccurredTimestamp)\n", + "carePlanGoal:\n", + " cplGoal:\n", + " code:\n", + " - CAREPLAN_GOAL\n", + " - col(cplgoalcategory)\n", + " - col(cplgoalvalue)\n", + " - col(cplgoalstatus)\n", + " time: col(carePlanGoalEnteredTimestamp)\n", + "carePlanInfectiousDisease:\n", + " cplInfectDisease:\n", + " code:\n", + " - CAREPLAN_INFECTIOUS_DISEASE\n", + " - col(infectdiseasesite)\n", + " - col(infectdiseaseassessment)\n", + " - col(treatment)\n", + " - col(responsetotherapy)\n", + " time: col(carePlanInfectDiseaseEnteredTimestamp)\n", + "diagnosis:\n", + " diagnosis:\n", + " code:\n", + " - ICD9CM\n", + " - col(icd9code)\n", + " - col(diagnosispriority)\n", + " time: col(diagnosisEnteredTimestamp)\n", + " diagnosis_string: diagnosisstring\n", + "lab:\n", + " lab:\n", + " code:\n", + " - LAB\n", + " - col(labmeasurenamesystem)\n", + " - col(labmeasurenameinterface)\n", + " - col(labname)\n", + " time: col(labResultDrawnTimestamp)\n", + " numeric_value: labresult\n", + " text_value: labresulttext\n", + " lab_type_id: labtypeid\n", + "medication:\n", + " drug_ordered:\n", + " code:\n", + " - MEDICATION\n", + " - ORDERED\n", + " - col(drugname)\n", + " time: col(drugordertimestamp)\n", + " medication_id: medicationid\n", + " drug_iv_admixture: drugivadmixture\n", + " dosage: dosage\n", + " route_admin: routeadmin\n", + " frequency: frequency\n", + " loading_dose: loadingdose\n", + " prn: prn\n", + " gtc: gtc\n", + " drug_started:\n", + " code:\n", + " - MEDICATION\n", + " - STARTED\n", + " - col(drugname)\n", + " time: col(drugstarttimestamp)\n", + " medication_id: medicationid\n", + " drug_stopped:\n", + " code:\n", + " - MEDICATION\n", + " - STOPPED\n", + " - col(drugname)\n", + " time: col(drugstoptimestamp)\n", + " medication_id: medicationid\n", + "nurseAssessment:\n", + " nurse_assessment_performed:\n", + " code:\n", + " - NURSE_ASSESSMENT\n", + " - PERFORMED\n", + " - NOT YET DONE\n", + " time: col(nurseAssessPerformedTimestamp)\n", + " nurse_assessment_id: nurseassessid\n", + " cell_label: celllabel\n", + " cell_attribute: cellattribute\n", + " cell_attribute_value: cellattributevalue\n", + " nurse_assessment_entered:\n", + " code:\n", + " - NURSE_ASSESSMENT\n", + " - ENTERED\n", + " - NOT YET DONE\n", + " time: col(nurseAssessEnteredTimestamp)\n", + " nurse_assessment_id: nurseassessid\n", + " cell_label: celllabel\n", + " cell_attribute: cellattribute\n", + " cell_attribute_value: cellattributevalue\n", + "nurseCare:\n", + " nurse_care_performed:\n", + " code:\n", + " - NURSE_CARE\n", + " - PERFORMED\n", + " - NOT YET DONE\n", + " time: col(nurseCarePerformedTimestamp)\n", + " nurse_care_id: nursecareid\n", + " cell_label: celllabel\n", + " cell_attribute: cellattribute\n", + " cell_attribute_value: cellattributevalue\n", + " nurse_care_entered:\n", + " code:\n", + " - NURSE_CARE\n", + " - ENTERED\n", + " - NOT YET DONE\n", + " time: col(nurseCareEnteredTimestamp)\n", + " nurse_care_id: nursecareid\n", + " cell_label: celllabel\n", + " cell_attribute: cellattribute\n", + " cell_attribute_value: cellattributevalue\n", + "nurseCharting:\n", + " nurse_charting_performed:\n", + " code:\n", + " - NURSE_CHARTING\n", + " - PERFORMED\n", + " - NOT YET DONE\n", + " time: col(nursingChartPerformedTimestamp)\n", + " nurse_charting_id: nursingchartid\n", + " cell_type_cat: nursingchartcelltypecat\n", + " cell_type_val_name: nursingchartcelltypevalname\n", + " cell_type_val_label: nursingchartcelltypevallabel\n", + " cell_value: nursingchartvalue\n", + " nurse_charting_entered:\n", + " code:\n", + " - NURSE_CHARTING\n", + " - ENTERED\n", + " - NOT YET DONE\n", + " time: col(nursingChartEnteredTimestamp)\n", + " nurse_charting_id: nursingchartid\n", + " cell_type_cat: nursingchartcelltypecat\n", + " cell_type_val_name: nursingchartcelltypevalname\n", + " cell_type_val_label: nursingchartcelltypevallabel\n", + " cell_value: nursingchartvalue\n", + "pastHistory:\n", + " past_history_taken:\n", + " code:\n", + " - PAST_HISTORY\n", + " - TAKEN\n", + " - NOT YET DONE\n", + " time: col(pastHistoryTakenTimestamp)\n", + " past_history_id: pasthistoryid\n", + " note_type: pasthistorynotetype\n", + " path: pasthistorypath\n", + " value: pasthistoryvalue\n", + " value_text: pasthistoryvaluetext\n", + " past_history_entered:\n", + " code:\n", + " - PAST_HISTORY\n", + " - ENTERED\n", + " - NOT YET DONE\n", + " time: col(pastHistoryEnteredTimestamp)\n", + " past_history_id: pasthistoryid\n", + " note_type: pasthistorynotetype\n", + " path: pasthistorypath\n", + " value: pasthistoryvalue\n", + " value_text: pasthistoryvaluetext\n", + "physicalExam:\n", + " physical_exam_entered:\n", + " code:\n", + " - PHYSICAL_EXAM\n", + " - ENTERED\n", + " - NOT YET DONE\n", + " time: col(physicalExamEnteredTimestamp)\n", + " physical_exam_id: physicalexamid\n", + " text: physicalexamtext\n", + " path: physicalexampath\n", + " value: physicalexamvalue\n", + "respiratoryCare:\n", + " resp_care_status:\n", + " code:\n", + " - RESP_CARE\n", + " - STATUS\n", + " - NOT YET DONE\n", + " time: col(respCareStatusEnteredTimestamp)\n", + " resp_care_id: respcareid\n", + " airwaytype: airwaytype\n", + " airwaysize: airwaysize\n", + " airwayposition: airwayposition\n", + " cuffpressure: cuffpressure\n", + " lowexhmvlimit: lowexhmvlimit\n", + " hiexhmvlimit: hiexhmvlimit\n", + " lowexhtvlimit: lowexhtvlimit\n", + " hipeakpreslimit: hipeakpreslimit\n", + " lowpeakpreslimit: lowpeakpreslimit\n", + " hirespratelimit: hirespratelimit\n", + " lowrespratelimit: lowrespratelimit\n", + " sighpreslimit: sighpreslimit\n", + " lowironoxlimit: lowironoxlimit\n", + " highironoxlimit: highironoxlimit\n", + " meanairwaypreslimit: meanairwaypreslimit\n", + " peeplimit: peeplimit\n", + " cpaplimit: cpaplimit\n", + " setapneainterval: setapneainterval\n", + " setapneatv: setapneatv\n", + " setapneaippeephigh: setapneaippeephigh\n", + " setapnearr: setapnearr\n", + " setapneapeakflow: setapneapeakflow\n", + " setapneainsptime: setapneainsptime\n", + " setapneaie: setapneaie\n", + " setapneafio2: setapneafio2\n", + " vent_start:\n", + " code:\n", + " - VENT\n", + " - START\n", + " - NOT YET DONE\n", + " time: col(ventStartTimestamp)\n", + " resp_care_id: respcareid\n", + " vent_end:\n", + " code:\n", + " - VENT\n", + " - END\n", + " - NOT YET DONE\n", + " time: col(ventEndTimestamp)\n", + " resp_care_id: respcareid\n", + "respiratoryCharting:\n", + " resp_charting_performed:\n", + " code:\n", + " - RESP_CHARTING\n", + " - PERFORMED\n", + " - NOT YET DONE\n", + " time: col(respChartPerformedTimestamp)\n", + " resp_chart_id: respchartid\n", + " type_cat: respcharttypecat\n", + " value_label: respchartvaluelabel\n", + " value: respchartvalue\n", + " resp_charting_entered:\n", + " code:\n", + " - RESP_CHARTING\n", + " - ENTERED\n", + " - NOT YET DONE\n", + " time: col(respChartEnteredTimestamp)\n", + " resp_chart_id: respchartid\n", + " type_cat: respcharttypecat\n", + " value_label: respchartvaluelabel\n", + " value: respchartvalue\n", + "treatment:\n", + " treatment:\n", + " code:\n", + " - TREATMENT\n", + " - ENTERED\n", + " - col(treatmentstring)\n", + " time: col(treatmentEnteredTimestamp)\n", + " treatment_id: treatmentid\n", + "vitalAperiodic:\n", + " non_invasive_systolic:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - BP\n", + " - NONINVASIVE_SYSTOLIC\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: noninvasivesystolic\n", + " non_invasive_diastolic:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - BP\n", + " - NONINVASIVE_DIASTOLIC\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: noninvasivediastolic\n", + " non_invasive_mean:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - BP\n", + " - NONINVASIVE_MEAN\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: noninvasivemean\n", + " paop:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - PAOP\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: paop\n", + " cardiac_output:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - CARDIAC_OUTPUT\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: cardiacoutput\n", + " cardiac_input:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - CARDIAC_INPUT\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: cardiacinput\n", + " svr:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - SVR\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: svr\n", + " svri:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - SVRI\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: svri\n", + " pvr:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - PVR\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: pvr\n", + " pvri:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - PVRI\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: pvri\n", + "vitalPeriodic:\n", + " temperature:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - TEMPERATURE\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: temperature\n", + " saO2:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - SAO2\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: sao2\n", + " heartRate:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - HEARTRATE\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: heartrate\n", + " respiration:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - RESPIRATION\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: respiration\n", + " cvp:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - CVP\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: cvp\n", + " etCo2:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - ETCO2\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: etco2\n", + " systemic_systolic:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - BP\n", + " - SYSTEMIC_SYSTOLIC\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: systemicsystolic\n", + " systemic_diastolic:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - BP\n", + " - SYSTEMIC_DIASTOLIC\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: systemicdiastolic\n", + " systemic_mean:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - BP\n", + " - SYSTEMIC_MEAN\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: systemicmean\n", + " pa_systolic:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - BP\n", + " - PULM_ART_SYSTOLIC\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: pasystolic\n", + " pa_diastolic:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - BP\n", + " - PULM_ART_DIASTOLIC\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: padiastolic\n", + " pa_mean:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - BP\n", + " - PULM_ART_MEAN\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: pamean\n", + " st1:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - ST1\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: st1\n", + " st2:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - ST2\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: st2\n", + " st3:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - ST3\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: st3\n", + " ICP:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - ICP\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: icp\n", + "\n", + "2024-12-14 17:20:15.041 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from patient files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet\n", + "2024-12-14 17:20:15.042 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from admissionDx files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet\n", + "2024-12-14 17:20:15.043 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from allergy files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet\n", + "2024-12-14 17:20:15.043 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from carePlanGeneral files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 17:20:15.043 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from carePlanEOL files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet\n", + "2024-12-14 17:20:15.043 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from carePlanGoal files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 17:20:15.044 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from carePlanInfectiousDisease files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 17:20:15.044 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from diagnosis files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet\n", + "2024-12-14 17:20:15.044 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from lab files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet\n", + "2024-12-14 17:20:15.044 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from medication files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet\n", + "2024-12-14 17:20:15.045 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from nurseAssessment files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 17:20:15.045 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from nurseCare files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet\n", + "2024-12-14 17:20:15.045 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from nurseCharting files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 17:20:15.045 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from pastHistory files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet\n", + "2024-12-14 17:20:15.045 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from physicalExam files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet\n", + "2024-12-14 17:20:15.046 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from respiratoryCare files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 17:20:15.046 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from respiratoryCharting files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 17:20:15.046 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from treatment files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet\n", + "2024-12-14 17:20:15.046 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from vitalAperiodic files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 17:20:15.047 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from vitalPeriodic files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 17:20:15.047 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:239 - Joining all subject IDs from 20 dataframes\n", + "2024-12-14 17:20:15.108 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:247 - Found 2174 unique subject IDs of type int64\n", + "2024-12-14 17:20:15.112 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:262 - Sharding and splitting subjects\n", + "2024-12-14 17:20:15.131 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split train/0 has 1087 subjects.\n", + "2024-12-14 17:20:15.131 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split tuning/0 has 544 subjects.\n", + "2024-12-14 17:20:15.131 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split held_out/0 has 543 subjects.\n", + "2024-12-14 17:20:15.132 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:273 - Writing sharded subjects to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata/.shards.json\n", + "2024-12-14 17:20:15.132 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:276 - Done writing sharded subjects\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:15.176\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: convert_to_sharded_events\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:15.182\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-convert_to_sharded_events --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=convert_to_sharded_events\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:24.817\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:24.820\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 17:20:15.716 | INFO | MEDS_transforms.utils:stage_init:73 - Running convert_to_sharded_events with the following configuration:\n", + "input_dir: ${oc.env:EICU_PRE_MEDS_DIR}\n", + "cohort_dir: ${oc.env:EICU_MEDS_COHORT_DIR}\n", + "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", + " at the top of your specific pipeline\n", + "\n", + " configuration file.'\n", + "log_dir: ${stage_cfg.output_dir}/.logs\n", + "do_overwrite: false\n", + "seed: 1\n", + "stages:\n", + "- shard_events\n", + "- split_and_shard_subjects\n", + "- convert_to_sharded_events\n", + "- merge_to_MEDS_cohort\n", + "- finalize_MEDS_metadata\n", + "- finalize_MEDS_data\n", + "stage_configs:\n", + " shard_events:\n", + " row_chunksize: 200000000\n", + " infer_schema_length: 999999999\n", + " data_input_dir: ${input_dir}\n", + " split_and_shard_subjects:\n", + " is_metadata: true\n", + " output_dir: ${cohort_dir}/metadata\n", + " n_subjects_per_shard: 10000\n", + " external_splits_json_fp: null\n", + " split_fracs:\n", + " train: 0.5\n", + " tuning: 0.25\n", + " held_out: 0.25\n", + " convert_to_sharded_events:\n", + " do_dedup_text_and_numeric: true\n", + " merge_to_MEDS_cohort:\n", + " unique_by: null\n", + " additional_sort_by: null\n", + " extract_code_metadata:\n", + " is_metadata: true\n", + " description_separator: '\n", + "\n", + " '\n", + " finalize_MEDS_metadata:\n", + " is_metadata: true\n", + " do_retype: true\n", + " finalize_MEDS_data:\n", + " do_retype: true\n", + "worker: 0\n", + "polling_time: 300\n", + "stage: convert_to_sharded_events\n", + "stage_cfg: ${oc.create:${populate_stage:${stage}, ${input_dir}, ${cohort_dir}, ${stages},\n", + " ${stage_configs}}}\n", + "etl_metadata:\n", + " pipeline_name: ???\n", + " dataset_name: eICU\n", + " dataset_version: 2.0\n", + " package_name: ${get_package_name:}\n", + " package_version: ${get_package_version:}\n", + "etl_metadata.pipeline_name: extract\n", + "description: \"This pipeline extracts the eICU dataset in longitudinal, sparse form\\\n", + " \\ from an input dataset meeting\\nselect criteria and converts them to the flattened,\\\n", + " \\ MEDS format. You can control the key arguments to this\\npipeline by setting environment\\\n", + " \\ variables:\\n```bash\\n export EVENT_CONVERSION_CONFIG_FP=# Path to your event\\\n", + " \\ conversion config\\n export EICU_PRE_MEDS_DIR=# Path to the output dir of the\\\n", + " \\ pre-MEDS step\\n export EICU_MEDS_COHORT_DIR=# Path to where you want the dataset\\\n", + " \\ to live\\n```\"\n", + "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", + "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", + "\n", + "2024-12-14 17:20:15.729 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "do_dedup_text_and_numeric: true\n", + "is_metadata: false\n", + "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events\n", + "metadata_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects\n", + "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events\n", + "reducer_output_dir: null\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events\n", + " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events\n", + " - metadata_input_dir: ❌ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects\n", + "2024-12-14 17:20:15.730 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:769 - Starting event conversion.\n", + "2024-12-14 17:20:15.730 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:771 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs/event_configs.yaml\n", + "2024-12-14 17:20:15.779 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:773 - Event conversion config:\n", + "subject_id_col: patienthealthsystemstayid\n", + "patient:\n", + " dob:\n", + " code: MEDS_BIRTH\n", + " time: col(dateofbirth)\n", + " uniquepid: uniquepid\n", + " gender:\n", + " code:\n", + " - GENDER\n", + " - col(gender)\n", + " time: null\n", + " ethnicity:\n", + " code:\n", + " - ETHNICITY\n", + " - col(ethnicity)\n", + " time: null\n", + " hosp_admission:\n", + " code:\n", + " - HOSPITAL_ADMISSION\n", + " - col(hospitaladmitsource)\n", + " - col(hospitalregion)\n", + " - col(hospitalteachingstatus)\n", + " - col(hospitalnumbedscategory)\n", + " time: col(hospitaladmittimestamp)\n", + " hospital_id: hospitalid\n", + " hosp_discharge:\n", + " code:\n", + " - HOSPITAL_DISCHARGE\n", + " - col(hospitaldischargestatus)\n", + " - col(hospitaldischargelocation)\n", + " time: col(hospitaldischargetimestamp)\n", + " unit_admission:\n", + " code:\n", + " - UNIT_ADMISSION\n", + " - col(unitadmitsource)\n", + " - col(unitstaytype)\n", + " time: col(unitadmittimestamp)\n", + " ward_id: wardid\n", + " unit_stay_id: patientunitstayid\n", + " unit_admission_weight:\n", + " code:\n", + " - UNIT_ADMISSION_WEIGHT\n", + " time: col(unitadmittimestamp)\n", + " numeric_value: unitadmissionweight\n", + " unit_admission_height:\n", + " code:\n", + " - UNIT_ADMISSION_HEIGHT\n", + " time: col(unitadmittimestamp)\n", + " numeric_value: unitadmissionheight\n", + " unit_discharge:\n", + " code:\n", + " - UNIT_DISCHARGE\n", + " - col(unitdischargestatus)\n", + " - col(unitdischargelocation)\n", + " time: col(unitdischargetimestamp)\n", + " unit_discharge_weight:\n", + " code:\n", + " - UNIT_DISCHARGE_WEIGHT\n", + " time: col(unitdischargetimestamp)\n", + " numeric_value: unitdischargeweight\n", + "admissionDx:\n", + " admission_diagnosis:\n", + " code:\n", + " - ADMISSION_DX\n", + " - col(admitdxname)\n", + " time: col(admitDxEnteredTimestamp)\n", + " admission_dx_id: admissiondxid\n", + " unit_stay_id: patientunitstayid\n", + "allergy:\n", + " allergy:\n", + " code:\n", + " - ALLERGY\n", + " - col(allergytype)\n", + " - col(allergyname)\n", + " time: col(allergyEnteredTimestamp)\n", + "carePlanGeneral:\n", + " cplItem:\n", + " code:\n", + " - CAREPLAN_GENERAL\n", + " - col(cplgroup)\n", + " - col(cplitemvalue)\n", + " time: col(carePlanGeneralItemEnteredTimestamp)\n", + "carePlanEOL:\n", + " cplEolDiscussion:\n", + " code:\n", + " - CAREPLAN_EOL\n", + " time: col(carePlanEolDiscussionOccurredTimestamp)\n", + "carePlanGoal:\n", + " cplGoal:\n", + " code:\n", + " - CAREPLAN_GOAL\n", + " - col(cplgoalcategory)\n", + " - col(cplgoalvalue)\n", + " - col(cplgoalstatus)\n", + " time: col(carePlanGoalEnteredTimestamp)\n", + "carePlanInfectiousDisease:\n", + " cplInfectDisease:\n", + " code:\n", + " - CAREPLAN_INFECTIOUS_DISEASE\n", + " - col(infectdiseasesite)\n", + " - col(infectdiseaseassessment)\n", + " - col(treatment)\n", + " - col(responsetotherapy)\n", + " time: col(carePlanInfectDiseaseEnteredTimestamp)\n", + "diagnosis:\n", + " diagnosis:\n", + " code:\n", + " - ICD9CM\n", + " - col(icd9code)\n", + " - col(diagnosispriority)\n", + " time: col(diagnosisEnteredTimestamp)\n", + " diagnosis_string: diagnosisstring\n", + "lab:\n", + " lab:\n", + " code:\n", + " - LAB\n", + " - col(labmeasurenamesystem)\n", + " - col(labmeasurenameinterface)\n", + " - col(labname)\n", + " time: col(labResultDrawnTimestamp)\n", + " numeric_value: labresult\n", + " text_value: labresulttext\n", + " lab_type_id: labtypeid\n", + "medication:\n", + " drug_ordered:\n", + " code:\n", + " - MEDICATION\n", + " - ORDERED\n", + " - col(drugname)\n", + " time: col(drugordertimestamp)\n", + " medication_id: medicationid\n", + " drug_iv_admixture: drugivadmixture\n", + " dosage: dosage\n", + " route_admin: routeadmin\n", + " frequency: frequency\n", + " loading_dose: loadingdose\n", + " prn: prn\n", + " gtc: gtc\n", + " drug_started:\n", + " code:\n", + " - MEDICATION\n", + " - STARTED\n", + " - col(drugname)\n", + " time: col(drugstarttimestamp)\n", + " medication_id: medicationid\n", + " drug_stopped:\n", + " code:\n", + " - MEDICATION\n", + " - STOPPED\n", + " - col(drugname)\n", + " time: col(drugstoptimestamp)\n", + " medication_id: medicationid\n", + "nurseAssessment:\n", + " nurse_assessment_performed:\n", + " code:\n", + " - NURSE_ASSESSMENT\n", + " - PERFORMED\n", + " - NOT YET DONE\n", + " time: col(nurseAssessPerformedTimestamp)\n", + " nurse_assessment_id: nurseassessid\n", + " cell_label: celllabel\n", + " cell_attribute: cellattribute\n", + " cell_attribute_value: cellattributevalue\n", + " nurse_assessment_entered:\n", + " code:\n", + " - NURSE_ASSESSMENT\n", + " - ENTERED\n", + " - NOT YET DONE\n", + " time: col(nurseAssessEnteredTimestamp)\n", + " nurse_assessment_id: nurseassessid\n", + " cell_label: celllabel\n", + " cell_attribute: cellattribute\n", + " cell_attribute_value: cellattributevalue\n", + "nurseCare:\n", + " nurse_care_performed:\n", + " code:\n", + " - NURSE_CARE\n", + " - PERFORMED\n", + " - NOT YET DONE\n", + " time: col(nurseCarePerformedTimestamp)\n", + " nurse_care_id: nursecareid\n", + " cell_label: celllabel\n", + " cell_attribute: cellattribute\n", + " cell_attribute_value: cellattributevalue\n", + " nurse_care_entered:\n", + " code:\n", + " - NURSE_CARE\n", + " - ENTERED\n", + " - NOT YET DONE\n", + " time: col(nurseCareEnteredTimestamp)\n", + " nurse_care_id: nursecareid\n", + " cell_label: celllabel\n", + " cell_attribute: cellattribute\n", + " cell_attribute_value: cellattributevalue\n", + "nurseCharting:\n", + " nurse_charting_performed:\n", + " code:\n", + " - NURSE_CHARTING\n", + " - PERFORMED\n", + " - NOT YET DONE\n", + " time: col(nursingChartPerformedTimestamp)\n", + " nurse_charting_id: nursingchartid\n", + " cell_type_cat: nursingchartcelltypecat\n", + " cell_type_val_name: nursingchartcelltypevalname\n", + " cell_type_val_label: nursingchartcelltypevallabel\n", + " cell_value: nursingchartvalue\n", + " nurse_charting_entered:\n", + " code:\n", + " - NURSE_CHARTING\n", + " - ENTERED\n", + " - NOT YET DONE\n", + " time: col(nursingChartEnteredTimestamp)\n", + " nurse_charting_id: nursingchartid\n", + " cell_type_cat: nursingchartcelltypecat\n", + " cell_type_val_name: nursingchartcelltypevalname\n", + " cell_type_val_label: nursingchartcelltypevallabel\n", + " cell_value: nursingchartvalue\n", + "pastHistory:\n", + " past_history_taken:\n", + " code:\n", + " - PAST_HISTORY\n", + " - TAKEN\n", + " - NOT YET DONE\n", + " time: col(pastHistoryTakenTimestamp)\n", + " past_history_id: pasthistoryid\n", + " note_type: pasthistorynotetype\n", + " path: pasthistorypath\n", + " value: pasthistoryvalue\n", + " value_text: pasthistoryvaluetext\n", + " past_history_entered:\n", + " code:\n", + " - PAST_HISTORY\n", + " - ENTERED\n", + " - NOT YET DONE\n", + " time: col(pastHistoryEnteredTimestamp)\n", + " past_history_id: pasthistoryid\n", + " note_type: pasthistorynotetype\n", + " path: pasthistorypath\n", + " value: pasthistoryvalue\n", + " value_text: pasthistoryvaluetext\n", + "physicalExam:\n", + " physical_exam_entered:\n", + " code:\n", + " - PHYSICAL_EXAM\n", + " - ENTERED\n", + " - NOT YET DONE\n", + " time: col(physicalExamEnteredTimestamp)\n", + " physical_exam_id: physicalexamid\n", + " text: physicalexamtext\n", + " path: physicalexampath\n", + " value: physicalexamvalue\n", + "respiratoryCare:\n", + " resp_care_status:\n", + " code:\n", + " - RESP_CARE\n", + " - STATUS\n", + " - NOT YET DONE\n", + " time: col(respCareStatusEnteredTimestamp)\n", + " resp_care_id: respcareid\n", + " airwaytype: airwaytype\n", + " airwaysize: airwaysize\n", + " airwayposition: airwayposition\n", + " cuffpressure: cuffpressure\n", + " lowexhmvlimit: lowexhmvlimit\n", + " hiexhmvlimit: hiexhmvlimit\n", + " lowexhtvlimit: lowexhtvlimit\n", + " hipeakpreslimit: hipeakpreslimit\n", + " lowpeakpreslimit: lowpeakpreslimit\n", + " hirespratelimit: hirespratelimit\n", + " lowrespratelimit: lowrespratelimit\n", + " sighpreslimit: sighpreslimit\n", + " lowironoxlimit: lowironoxlimit\n", + " highironoxlimit: highironoxlimit\n", + " meanairwaypreslimit: meanairwaypreslimit\n", + " peeplimit: peeplimit\n", + " cpaplimit: cpaplimit\n", + " setapneainterval: setapneainterval\n", + " setapneatv: setapneatv\n", + " setapneaippeephigh: setapneaippeephigh\n", + " setapnearr: setapnearr\n", + " setapneapeakflow: setapneapeakflow\n", + " setapneainsptime: setapneainsptime\n", + " setapneaie: setapneaie\n", + " setapneafio2: setapneafio2\n", + " vent_start:\n", + " code:\n", + " - VENT\n", + " - START\n", + " - NOT YET DONE\n", + " time: col(ventStartTimestamp)\n", + " resp_care_id: respcareid\n", + " vent_end:\n", + " code:\n", + " - VENT\n", + " - END\n", + " - NOT YET DONE\n", + " time: col(ventEndTimestamp)\n", + " resp_care_id: respcareid\n", + "respiratoryCharting:\n", + " resp_charting_performed:\n", + " code:\n", + " - RESP_CHARTING\n", + " - PERFORMED\n", + " - NOT YET DONE\n", + " time: col(respChartPerformedTimestamp)\n", + " resp_chart_id: respchartid\n", + " type_cat: respcharttypecat\n", + " value_label: respchartvaluelabel\n", + " value: respchartvalue\n", + " resp_charting_entered:\n", + " code:\n", + " - RESP_CHARTING\n", + " - ENTERED\n", + " - NOT YET DONE\n", + " time: col(respChartEnteredTimestamp)\n", + " resp_chart_id: respchartid\n", + " type_cat: respcharttypecat\n", + " value_label: respchartvaluelabel\n", + " value: respchartvalue\n", + "treatment:\n", + " treatment:\n", + " code:\n", + " - TREATMENT\n", + " - ENTERED\n", + " - col(treatmentstring)\n", + " time: col(treatmentEnteredTimestamp)\n", + " treatment_id: treatmentid\n", + "vitalAperiodic:\n", + " non_invasive_systolic:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - BP\n", + " - NONINVASIVE_SYSTOLIC\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: noninvasivesystolic\n", + " non_invasive_diastolic:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - BP\n", + " - NONINVASIVE_DIASTOLIC\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: noninvasivediastolic\n", + " non_invasive_mean:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - BP\n", + " - NONINVASIVE_MEAN\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: noninvasivemean\n", + " paop:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - PAOP\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: paop\n", + " cardiac_output:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - CARDIAC_OUTPUT\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: cardiacoutput\n", + " cardiac_input:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - CARDIAC_INPUT\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: cardiacinput\n", + " svr:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - SVR\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: svr\n", + " svri:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - SVRI\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: svri\n", + " pvr:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - PVR\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: pvr\n", + " pvri:\n", + " code:\n", + " - VITALS\n", + " - APERIODIC\n", + " - PVRI\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalaperiodicid\n", + " numeric_value: pvri\n", + "vitalPeriodic:\n", + " temperature:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - TEMPERATURE\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: temperature\n", + " saO2:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - SAO2\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: sao2\n", + " heartRate:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - HEARTRATE\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: heartrate\n", + " respiration:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - RESPIRATION\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: respiration\n", + " cvp:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - CVP\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: cvp\n", + " etCo2:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - ETCO2\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: etco2\n", + " systemic_systolic:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - BP\n", + " - SYSTEMIC_SYSTOLIC\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: systemicsystolic\n", + " systemic_diastolic:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - BP\n", + " - SYSTEMIC_DIASTOLIC\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: systemicdiastolic\n", + " systemic_mean:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - BP\n", + " - SYSTEMIC_MEAN\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: systemicmean\n", + " pa_systolic:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - BP\n", + " - PULM_ART_SYSTOLIC\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: pasystolic\n", + " pa_diastolic:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - BP\n", + " - PULM_ART_DIASTOLIC\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: padiastolic\n", + " pa_mean:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - BP\n", + " - PULM_ART_MEAN\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: pamean\n", + " st1:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - ST1\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: st1\n", + " st2:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - ST2\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: st2\n", + " st3:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - ST3\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: st3\n", + " ICP:\n", + " code:\n", + " - VITALS\n", + " - PERIODIC\n", + " - ICP\n", + " time: col(observationEnteredTimestamp)\n", + " vital_id: vitalperiodicid\n", + " numeric_value: icp\n", + "\n", + "2024-12-14 17:20:15.793 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCare/[0-42080).parquet\n", + "2024-12-14 17:20:15.794 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:15.794543. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:15.795 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet\n", + "2024-12-14 17:20:15.796 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:802: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + " typed_subjects = pl.Series(subjects, dtype=df.schema[input_subject_id_column])\n", + "2024-12-14 17:20:15.802 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseCare/[0-42080).parquet\n", + "2024-12-14 17:20:15.806 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_care_performed\n", + "2024-12-14 17:20:15.807 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseCarePerformedTimestamp should already be in Date/time format\n", + "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:513: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + " if v not in df.schema:\n", + "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:517: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + " is_numeric = df.schema[v].is_numeric()\n", + "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:518: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + " is_str = df.schema[v] == pl.Utf8\n", + "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:519: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + " is_cat = isinstance(df.schema[v], pl.Categorical)\n", + "2024-12-14 17:20:15.809 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseCarePerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:15.809 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_care_entered\n", + "2024-12-14 17:20:15.809 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseCareEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:15.809 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseCareEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:15.810 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCare/[0-42080).parquet\n", + "2024-12-14 17:20:15.857 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.063106\n", + "2024-12-14 17:20:15.857 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCare/.[0-42080).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCare/.[0-42080).parquet_cache/locks/2024-12-14T17:20:15.794543.json\n", + "2024-12-14 17:20:15.858 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanEOL/[0-15).parquet\n", + "2024-12-14 17:20:15.858 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:15.858597. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:15.858 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet\n", + "2024-12-14 17:20:15.858 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:15.859 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanEOL/[0-15).parquet\n", + "2024-12-14 17:20:15.862 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplEolDiscussion\n", + "2024-12-14 17:20:15.862 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanEolDiscussionOccurredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:15.862 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanEolDiscussionOccurredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:15.863 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanEOL/[0-15).parquet\n", + "2024-12-14 17:20:15.865 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007113\n", + "2024-12-14 17:20:15.865 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanEOL/.[0-15).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanEOL/.[0-15).parquet_cache/locks/2024-12-14T17:20:15.858597.json\n", + "2024-12-14 17:20:15.866 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 17:20:15.866 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:15.866664. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:15.866 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 17:20:15.867 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:15.867 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseCharting/[0-1477163).parquet\n", + "2024-12-14 17:20:15.871 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_charting_performed\n", + "2024-12-14 17:20:15.871 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nursingChartPerformedTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:15.871 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nursingChartPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:15.871 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_charting_entered\n", + "2024-12-14 17:20:15.872 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nursingChartEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:15.872 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nursingChartEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:15.872 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 17:20:16.546 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.680040\n", + "2024-12-14 17:20:16.546 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCharting/.[0-1477163).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCharting/.[0-1477163).parquet_cache/locks/2024-12-14T17:20:15.866664.json\n", + "2024-12-14 17:20:16.548 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 17:20:16.549 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:16.549008. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:16.549 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 17:20:16.549 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:16.550 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 17:20:16.555 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_systolic\n", + "2024-12-14 17:20:16.555 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.556 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.556 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_diastolic\n", + "2024-12-14 17:20:16.556 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.556 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.556 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_mean\n", + "2024-12-14 17:20:16.557 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.557 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.557 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting paop\n", + "2024-12-14 17:20:16.557 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.557 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.557 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cardiac_output\n", + "2024-12-14 17:20:16.558 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.558 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.558 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cardiac_input\n", + "2024-12-14 17:20:16.558 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.558 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.558 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting svr\n", + "2024-12-14 17:20:16.559 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.559 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.559 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting svri\n", + "2024-12-14 17:20:16.559 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.559 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.559 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pvr\n", + "2024-12-14 17:20:16.560 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.560 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.560 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pvri\n", + "2024-12-14 17:20:16.560 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.560 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.560 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 17:20:16.862 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.313909\n", + "2024-12-14 17:20:16.863 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalAperiodic/.[0-274088).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalAperiodic/.[0-274088).parquet_cache/locks/2024-12-14T17:20:16.549008.json\n", + "2024-12-14 17:20:16.863 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 17:20:16.864 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:16.864327. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:16.864 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 17:20:16.864 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:16.865 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseAssessment/[0-91589).parquet\n", + "2024-12-14 17:20:16.868 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_assessment_performed\n", + "2024-12-14 17:20:16.869 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseAssessPerformedTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.869 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseAssessPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.869 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_assessment_entered\n", + "2024-12-14 17:20:16.869 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseAssessEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.870 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseAssessEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.870 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 17:20:16.912 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.048560\n", + "2024-12-14 17:20:16.912 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseAssessment/.[0-91589).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseAssessment/.[0-91589).parquet_cache/locks/2024-12-14T17:20:16.864327.json\n", + "2024-12-14 17:20:16.913 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/medication/[0-75604).parquet\n", + "2024-12-14 17:20:16.914 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:16.914149. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:16.914 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet\n", + "2024-12-14 17:20:16.914 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:16.914 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for medication/[0-75604).parquet\n", + "2024-12-14 17:20:16.918 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_ordered\n", + "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:468: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + " if col not in df.schema:\n", + "2024-12-14 17:20:16.919 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", + "2024-12-14 17:20:16.919 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugordertimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.920 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugordertimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.920 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_started\n", + "2024-12-14 17:20:16.920 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", + "2024-12-14 17:20:16.920 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugstarttimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.920 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugstarttimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.920 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_stopped\n", + "2024-12-14 17:20:16.920 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", + "2024-12-14 17:20:16.920 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugstoptimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.921 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugstoptimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.921 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/medication/[0-75604).parquet\n", + "2024-12-14 17:20:16.976 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.062129\n", + "2024-12-14 17:20:16.976 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/medication/.[0-75604).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/medication/.[0-75604).parquet_cache/locks/2024-12-14T17:20:16.914149.json\n", + "2024-12-14 17:20:16.976 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/diagnosis/[0-24978).parquet\n", + "2024-12-14 17:20:16.977 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:16.977324. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:16.977 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet\n", + "2024-12-14 17:20:16.977 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:16.978 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for diagnosis/[0-24978).parquet\n", + "2024-12-14 17:20:16.981 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", + "2024-12-14 17:20:16.982 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column diagnosispriority\n", + "2024-12-14 17:20:16.982 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd9code\n", + "2024-12-14 17:20:16.982 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - diagnosisEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.982 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"diagnosisEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.982 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/diagnosis/[0-24978).parquet\n", + "2024-12-14 17:20:16.989 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.012023\n", + "2024-12-14 17:20:16.989 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/diagnosis/.[0-24978).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/diagnosis/.[0-24978).parquet_cache/locks/2024-12-14T17:20:16.977324.json\n", + "2024-12-14 17:20:16.989 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/allergy/[0-2475).parquet\n", + "2024-12-14 17:20:16.990 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:16.990072. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:16.990 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet\n", + "2024-12-14 17:20:16.990 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:16.990 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for allergy/[0-2475).parquet\n", + "2024-12-14 17:20:16.994 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting allergy\n", + "2024-12-14 17:20:16.994 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column allergyname\n", + "2024-12-14 17:20:16.994 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column allergytype\n", + "2024-12-14 17:20:16.994 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - allergyEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:16.994 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"allergyEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:16.994 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/allergy/[0-2475).parquet\n", + "2024-12-14 17:20:16.997 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007124\n", + "2024-12-14 17:20:16.997 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/allergy/.[0-2475).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/allergy/.[0-2475).parquet_cache/locks/2024-12-14T17:20:16.990072.json\n", + "2024-12-14 17:20:16.997 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 17:20:16.998 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:16.998026. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:16.998 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 17:20:16.998 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:16.998 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 17:20:17.002 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplInfectDisease\n", + "2024-12-14 17:20:17.002 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column infectdiseaseassessment\n", + "2024-12-14 17:20:17.002 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column responsetotherapy\n", + "2024-12-14 17:20:17.002 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column infectdiseasesite\n", + "2024-12-14 17:20:17.002 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column treatment\n", + "2024-12-14 17:20:17.002 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanInfectDiseaseEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.002 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanInfectDiseaseEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.002 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 17:20:17.004 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006326\n", + "2024-12-14 17:20:17.004 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanInfectiousDisease/.[0-112).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanInfectiousDisease/.[0-112).parquet_cache/locks/2024-12-14T17:20:16.998026.json\n", + "2024-12-14 17:20:17.004 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/physicalExam/[0-84058).parquet\n", + "2024-12-14 17:20:17.005 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.005067. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:17.005 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet\n", + "2024-12-14 17:20:17.005 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:17.005 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for physicalExam/[0-84058).parquet\n", + "2024-12-14 17:20:17.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting physical_exam_entered\n", + "2024-12-14 17:20:17.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - physicalExamEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"physicalExamEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.009 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/physicalExam/[0-84058).parquet\n", + "2024-12-14 17:20:17.032 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.026926\n", + "2024-12-14 17:20:17.032 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/physicalExam/.[0-84058).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/physicalExam/.[0-84058).parquet_cache/locks/2024-12-14T17:20:17.005067.json\n", + "2024-12-14 17:20:17.032 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/treatment/[0-38290).parquet\n", + "2024-12-14 17:20:17.033 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.032910. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:17.033 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet\n", + "2024-12-14 17:20:17.033 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:17.033 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for treatment/[0-38290).parquet\n", + "2024-12-14 17:20:17.037 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting treatment\n", + "2024-12-14 17:20:17.037 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column treatmentstring\n", + "2024-12-14 17:20:17.037 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - treatmentEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.038 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"treatmentEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.038 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/treatment/[0-38290).parquet\n", + "2024-12-14 17:20:17.046 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.013724\n", + "2024-12-14 17:20:17.046 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/treatment/.[0-38290).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/treatment/.[0-38290).parquet_cache/locks/2024-12-14T17:20:17.032910.json\n", + "2024-12-14 17:20:17.047 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/pastHistory/[0-12109).parquet\n", + "2024-12-14 17:20:17.047 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.047613. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:17.047 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet\n", + "2024-12-14 17:20:17.047 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:17.048 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for pastHistory/[0-12109).parquet\n", + "2024-12-14 17:20:17.051 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting past_history_taken\n", + "2024-12-14 17:20:17.052 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - pastHistoryTakenTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.052 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"pastHistoryTakenTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.052 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting past_history_entered\n", + "2024-12-14 17:20:17.053 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - pastHistoryEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.053 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"pastHistoryEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.053 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/pastHistory/[0-12109).parquet\n", + "2024-12-14 17:20:17.063 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.015651\n", + "2024-12-14 17:20:17.063 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/pastHistory/.[0-12109).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/pastHistory/.[0-12109).parquet_cache/locks/2024-12-14T17:20:17.047613.json\n", + "2024-12-14 17:20:17.064 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 17:20:17.064 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.064617. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:17.064 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 17:20:17.064 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:17.065 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 17:20:17.068 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplItem\n", + "2024-12-14 17:20:17.068 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgroup\n", + "2024-12-14 17:20:17.068 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplitemvalue\n", + "2024-12-14 17:20:17.068 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanGeneralItemEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.069 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanGeneralItemEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.069 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 17:20:17.075 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010798\n", + "2024-12-14 17:20:17.075 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGeneral/.[0-33148).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGeneral/.[0-33148).parquet_cache/locks/2024-12-14T17:20:17.064617.json\n", + "2024-12-14 17:20:17.075 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 17:20:17.076 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.076091. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:17.076 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 17:20:17.076 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:17.076 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanGoal/[0-3633).parquet\n", + "2024-12-14 17:20:17.079 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplGoal\n", + "2024-12-14 17:20:17.080 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalcategory\n", + "2024-12-14 17:20:17.080 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalvalue\n", + "2024-12-14 17:20:17.080 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalstatus\n", + "2024-12-14 17:20:17.080 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanGoalEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.080 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanGoalEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.080 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 17:20:17.083 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007029\n", + "2024-12-14 17:20:17.083 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGoal/.[0-3633).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGoal/.[0-3633).parquet_cache/locks/2024-12-14T17:20:17.076091.json\n", + "2024-12-14 17:20:17.084 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 17:20:17.084 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.084405. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:17.084 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 17:20:17.084 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:17.085 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for respiratoryCare/[0-5436).parquet\n", + "2024-12-14 17:20:17.089 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_care_status\n", + "2024-12-14 17:20:17.089 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respCareStatusEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respCareStatusEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting vent_start\n", + "2024-12-14 17:20:17.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - ventStartTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"ventStartTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting vent_end\n", + "2024-12-14 17:20:17.093 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - ventEndTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.093 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"ventEndTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.093 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 17:20:17.103 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.018818\n", + "2024-12-14 17:20:17.103 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCare/.[0-5436).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCare/.[0-5436).parquet_cache/locks/2024-12-14T17:20:17.084405.json\n", + "2024-12-14 17:20:17.103 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/admissionDx/[0-7578).parquet\n", + "2024-12-14 17:20:17.103 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.103882. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:17.104 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet\n", + "2024-12-14 17:20:17.104 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:17.104 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for admissionDx/[0-7578).parquet\n", + "2024-12-14 17:20:17.107 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission_diagnosis\n", + "2024-12-14 17:20:17.108 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admitdxname\n", + "2024-12-14 17:20:17.108 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - admitDxEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.108 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admitDxEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.108 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/admissionDx/[0-7578).parquet\n", + "2024-12-14 17:20:17.111 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007481\n", + "2024-12-14 17:20:17.111 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/admissionDx/.[0-7578).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/admissionDx/.[0-7578).parquet_cache/locks/2024-12-14T17:20:17.103882.json\n", + "2024-12-14 17:20:17.111 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/lab/[0-434660).parquet\n", + "2024-12-14 17:20:17.112 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.112132. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:17.112 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet\n", + "2024-12-14 17:20:17.112 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:17.112 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for lab/[0-434660).parquet\n", + "2024-12-14 17:20:17.116 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", + "2024-12-14 17:20:17.116 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labname\n", + "2024-12-14 17:20:17.116 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labmeasurenamesystem\n", + "2024-12-14 17:20:17.116 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labmeasurenameinterface\n", + "2024-12-14 17:20:17.116 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - labResultDrawnTimestamp should already be in Date/time format\n", + "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:529: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + " case \"text_value\" if not df.schema[v] == pl.Utf8:\n", + "2024-12-14 17:20:17.117 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"labResultDrawnTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.117 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/lab/[0-434660).parquet\n", + "2024-12-14 17:20:17.185 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.073375\n", + "2024-12-14 17:20:17.185 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/lab/.[0-434660).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/lab/.[0-434660).parquet_cache/locks/2024-12-14T17:20:17.112132.json\n", + "2024-12-14 17:20:17.186 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/patient/[0-2520).parquet\n", + "2024-12-14 17:20:17.187 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.187269. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:17.187 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet\n", + "2024-12-14 17:20:17.187 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:17.188 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for patient/[0-2520).parquet\n", + "2024-12-14 17:20:17.192 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", + "2024-12-14 17:20:17.192 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - dateofbirth should already be in Date/time format\n", + "2024-12-14 17:20:17.192 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dateofbirth\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.193 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", + "2024-12-14 17:20:17.193 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", + "2024-12-14 17:20:17.193 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", + "2024-12-14 17:20:17.193 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ethnicity\n", + "2024-12-14 17:20:17.193 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column ethnicity\n", + "2024-12-14 17:20:17.193 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", + "2024-12-14 17:20:17.193 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hosp_admission\n", + "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaladmitsource\n", + "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalteachingstatus\n", + "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalregion\n", + "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalnumbedscategory\n", + "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - hospitaladmittimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hospitaladmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hosp_discharge\n", + "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaldischargestatus\n", + "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaldischargelocation\n", + "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - hospitaldischargetimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hospitaldischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission\n", + "2024-12-14 17:20:17.195 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitstaytype\n", + "2024-12-14 17:20:17.195 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitadmitsource\n", + "2024-12-14 17:20:17.195 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.195 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.195 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission_weight\n", + "2024-12-14 17:20:17.195 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.195 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.195 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission_height\n", + "2024-12-14 17:20:17.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_discharge\n", + "2024-12-14 17:20:17.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitdischargelocation\n", + "2024-12-14 17:20:17.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitdischargestatus\n", + "2024-12-14 17:20:17.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitdischargetimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitdischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_discharge_weight\n", + "2024-12-14 17:20:17.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitdischargetimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.197 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitdischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.197 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/patient/[0-2520).parquet\n", + "2024-12-14 17:20:17.211 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.024675\n", + "2024-12-14 17:20:17.212 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/patient/.[0-2520).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/patient/.[0-2520).parquet_cache/locks/2024-12-14T17:20:17.187269.json\n", + "2024-12-14 17:20:17.212 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 17:20:17.212 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.212843. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:17.213 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 17:20:17.213 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:17.213 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 17:20:17.217 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_charting_performed\n", + "2024-12-14 17:20:17.217 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respChartPerformedTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.217 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respChartPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.217 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_charting_entered\n", + "2024-12-14 17:20:17.218 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respChartEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.218 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respChartEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.218 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 17:20:17.284 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.071501\n", + "2024-12-14 17:20:17.284 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCharting/.[0-176089).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCharting/.[0-176089).parquet_cache/locks/2024-12-14T17:20:17.212843.json\n", + "2024-12-14 17:20:17.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 17:20:17.288 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.288028. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:17.288 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 17:20:17.288 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:17.288 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 17:20:17.294 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting temperature\n", + "2024-12-14 17:20:17.294 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.295 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.295 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting saO2\n", + "2024-12-14 17:20:17.295 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.295 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.295 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting heartRate\n", + "2024-12-14 17:20:17.296 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.296 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.296 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting respiration\n", + "2024-12-14 17:20:17.296 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.296 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.296 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cvp\n", + "2024-12-14 17:20:17.297 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.297 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.297 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting etCo2\n", + "2024-12-14 17:20:17.297 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.297 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.298 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_systolic\n", + "2024-12-14 17:20:17.298 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.298 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.298 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_diastolic\n", + "2024-12-14 17:20:17.298 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.299 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.299 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_mean\n", + "2024-12-14 17:20:17.299 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.299 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.299 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_systolic\n", + "2024-12-14 17:20:17.299 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.300 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.300 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_diastolic\n", + "2024-12-14 17:20:17.300 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.300 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.300 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_mean\n", + "2024-12-14 17:20:17.301 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.301 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.301 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st1\n", + "2024-12-14 17:20:17.301 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.301 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.301 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st2\n", + "2024-12-14 17:20:17.302 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.302 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.302 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st3\n", + "2024-12-14 17:20:17.302 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.302 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.302 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ICP\n", + "2024-12-14 17:20:17.303 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:17.303 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:17.303 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 17:20:20.250 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:02.962939\n", + "2024-12-14 17:20:20.251 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalPeriodic/.[0-1634960).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalPeriodic/.[0-1634960).parquet_cache/locks/2024-12-14T17:20:17.288028.json\n", + "2024-12-14 17:20:20.251 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCare/[0-42080).parquet\n", + "2024-12-14 17:20:20.252 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.252404. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.252 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet\n", + "2024-12-14 17:20:20.252 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.253 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseCare/[0-42080).parquet\n", + "2024-12-14 17:20:20.257 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_care_performed\n", + "2024-12-14 17:20:20.257 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseCarePerformedTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.257 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseCarePerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.257 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_care_entered\n", + "2024-12-14 17:20:20.258 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseCareEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.258 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseCareEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.258 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCare/[0-42080).parquet\n", + "2024-12-14 17:20:20.273 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.021446\n", + "2024-12-14 17:20:20.273 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCare/.[0-42080).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCare/.[0-42080).parquet_cache/locks/2024-12-14T17:20:20.252404.json\n", + "2024-12-14 17:20:20.274 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanEOL/[0-15).parquet\n", + "2024-12-14 17:20:20.274 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.274563. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.274 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet\n", + "2024-12-14 17:20:20.274 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.275 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanEOL/[0-15).parquet\n", + "2024-12-14 17:20:20.278 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplEolDiscussion\n", + "2024-12-14 17:20:20.278 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanEolDiscussionOccurredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.278 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanEolDiscussionOccurredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.278 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanEOL/[0-15).parquet\n", + "2024-12-14 17:20:20.280 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006231\n", + "2024-12-14 17:20:20.280 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanEOL/.[0-15).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanEOL/.[0-15).parquet_cache/locks/2024-12-14T17:20:20.274563.json\n", + "2024-12-14 17:20:20.281 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 17:20:20.281 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.281752. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.282 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 17:20:20.282 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.282 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseCharting/[0-1477163).parquet\n", + "2024-12-14 17:20:20.285 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_charting_performed\n", + "2024-12-14 17:20:20.286 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nursingChartPerformedTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.286 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nursingChartPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.286 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_charting_entered\n", + "2024-12-14 17:20:20.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nursingChartEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nursingChartEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.287 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 17:20:20.622 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.341012\n", + "2024-12-14 17:20:20.622 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCharting/.[0-1477163).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCharting/.[0-1477163).parquet_cache/locks/2024-12-14T17:20:20.281752.json\n", + "2024-12-14 17:20:20.624 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 17:20:20.625 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.625045. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.625 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 17:20:20.625 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.625 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 17:20:20.630 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_systolic\n", + "2024-12-14 17:20:20.630 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.631 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.631 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_diastolic\n", + "2024-12-14 17:20:20.631 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.631 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.631 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_mean\n", + "2024-12-14 17:20:20.632 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.632 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.632 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting paop\n", + "2024-12-14 17:20:20.632 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.632 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.632 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cardiac_output\n", + "2024-12-14 17:20:20.633 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.633 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.633 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cardiac_input\n", + "2024-12-14 17:20:20.633 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.633 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.633 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting svr\n", + "2024-12-14 17:20:20.634 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.634 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.634 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting svri\n", + "2024-12-14 17:20:20.634 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.634 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.634 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pvr\n", + "2024-12-14 17:20:20.635 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.635 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.635 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pvri\n", + "2024-12-14 17:20:20.635 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.636 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.636 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 17:20:20.756 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.131885\n", + "2024-12-14 17:20:20.757 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalAperiodic/.[0-274088).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalAperiodic/.[0-274088).parquet_cache/locks/2024-12-14T17:20:20.625045.json\n", + "2024-12-14 17:20:20.757 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 17:20:20.758 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.758208. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.758 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 17:20:20.758 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.758 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseAssessment/[0-91589).parquet\n", + "2024-12-14 17:20:20.763 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_assessment_performed\n", + "2024-12-14 17:20:20.763 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseAssessPerformedTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.764 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseAssessPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.764 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_assessment_entered\n", + "2024-12-14 17:20:20.764 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseAssessEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.764 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseAssessEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.764 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 17:20:20.785 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.026920\n", + "2024-12-14 17:20:20.785 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseAssessment/.[0-91589).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseAssessment/.[0-91589).parquet_cache/locks/2024-12-14T17:20:20.758208.json\n", + "2024-12-14 17:20:20.786 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/medication/[0-75604).parquet\n", + "2024-12-14 17:20:20.786 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.786460. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.786 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet\n", + "2024-12-14 17:20:20.786 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.787 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for medication/[0-75604).parquet\n", + "2024-12-14 17:20:20.791 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_ordered\n", + "2024-12-14 17:20:20.791 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", + "2024-12-14 17:20:20.791 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugordertimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.792 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugordertimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.792 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_started\n", + "2024-12-14 17:20:20.792 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", + "2024-12-14 17:20:20.792 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugstarttimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.792 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugstarttimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.792 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_stopped\n", + "2024-12-14 17:20:20.793 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", + "2024-12-14 17:20:20.793 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugstoptimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.793 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugstoptimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.793 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/medication/[0-75604).parquet\n", + "2024-12-14 17:20:20.825 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.038700\n", + "2024-12-14 17:20:20.825 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/medication/.[0-75604).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/medication/.[0-75604).parquet_cache/locks/2024-12-14T17:20:20.786460.json\n", + "2024-12-14 17:20:20.825 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/diagnosis/[0-24978).parquet\n", + "2024-12-14 17:20:20.826 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.826051. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.826 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet\n", + "2024-12-14 17:20:20.826 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.826 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for diagnosis/[0-24978).parquet\n", + "2024-12-14 17:20:20.830 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", + "2024-12-14 17:20:20.830 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column diagnosispriority\n", + "2024-12-14 17:20:20.830 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd9code\n", + "2024-12-14 17:20:20.830 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - diagnosisEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.830 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"diagnosisEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.830 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/diagnosis/[0-24978).parquet\n", + "2024-12-14 17:20:20.834 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008767\n", + "2024-12-14 17:20:20.834 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/diagnosis/.[0-24978).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/diagnosis/.[0-24978).parquet_cache/locks/2024-12-14T17:20:20.826051.json\n", + "2024-12-14 17:20:20.835 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/allergy/[0-2475).parquet\n", + "2024-12-14 17:20:20.835 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.835439. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.835 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet\n", + "2024-12-14 17:20:20.835 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.835 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for allergy/[0-2475).parquet\n", + "2024-12-14 17:20:20.849 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting allergy\n", + "2024-12-14 17:20:20.849 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column allergyname\n", + "2024-12-14 17:20:20.849 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column allergytype\n", + "2024-12-14 17:20:20.850 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - allergyEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.850 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"allergyEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.850 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/allergy/[0-2475).parquet\n", + "2024-12-14 17:20:20.855 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.019692\n", + "2024-12-14 17:20:20.855 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/allergy/.[0-2475).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/allergy/.[0-2475).parquet_cache/locks/2024-12-14T17:20:20.835439.json\n", + "2024-12-14 17:20:20.858 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 17:20:20.860 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.859824. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.861 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 17:20:20.861 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.861 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 17:20:20.866 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplInfectDisease\n", + "2024-12-14 17:20:20.866 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column infectdiseaseassessment\n", + "2024-12-14 17:20:20.866 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column responsetotherapy\n", + "2024-12-14 17:20:20.866 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column infectdiseasesite\n", + "2024-12-14 17:20:20.866 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column treatment\n", + "2024-12-14 17:20:20.866 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanInfectDiseaseEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.866 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanInfectDiseaseEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.866 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 17:20:20.869 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.009974\n", + "2024-12-14 17:20:20.869 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanInfectiousDisease/.[0-112).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanInfectiousDisease/.[0-112).parquet_cache/locks/2024-12-14T17:20:20.859824.json\n", + "2024-12-14 17:20:20.870 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/physicalExam/[0-84058).parquet\n", + "2024-12-14 17:20:20.870 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.870785. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.871 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet\n", + "2024-12-14 17:20:20.871 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.871 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for physicalExam/[0-84058).parquet\n", + "2024-12-14 17:20:20.875 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting physical_exam_entered\n", + "2024-12-14 17:20:20.875 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - physicalExamEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.875 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"physicalExamEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.876 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/physicalExam/[0-84058).parquet\n", + "2024-12-14 17:20:20.886 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.016078\n", + "2024-12-14 17:20:20.886 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/physicalExam/.[0-84058).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/physicalExam/.[0-84058).parquet_cache/locks/2024-12-14T17:20:20.870785.json\n", + "2024-12-14 17:20:20.887 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/treatment/[0-38290).parquet\n", + "2024-12-14 17:20:20.887 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.887774. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.888 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet\n", + "2024-12-14 17:20:20.888 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.888 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for treatment/[0-38290).parquet\n", + "2024-12-14 17:20:20.892 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting treatment\n", + "2024-12-14 17:20:20.892 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column treatmentstring\n", + "2024-12-14 17:20:20.892 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - treatmentEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.892 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"treatmentEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.892 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/treatment/[0-38290).parquet\n", + "2024-12-14 17:20:20.898 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010328\n", + "2024-12-14 17:20:20.898 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/treatment/.[0-38290).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/treatment/.[0-38290).parquet_cache/locks/2024-12-14T17:20:20.887774.json\n", + "2024-12-14 17:20:20.898 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/pastHistory/[0-12109).parquet\n", + "2024-12-14 17:20:20.899 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.899071. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.899 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet\n", + "2024-12-14 17:20:20.899 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.899 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for pastHistory/[0-12109).parquet\n", + "2024-12-14 17:20:20.903 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting past_history_taken\n", + "2024-12-14 17:20:20.903 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - pastHistoryTakenTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.904 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"pastHistoryTakenTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.904 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting past_history_entered\n", + "2024-12-14 17:20:20.904 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - pastHistoryEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.905 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"pastHistoryEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.905 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/pastHistory/[0-12109).parquet\n", + "2024-12-14 17:20:20.911 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.012724\n", + "2024-12-14 17:20:20.911 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/pastHistory/.[0-12109).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/pastHistory/.[0-12109).parquet_cache/locks/2024-12-14T17:20:20.899071.json\n", + "2024-12-14 17:20:20.912 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 17:20:20.912 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.912592. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.912 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 17:20:20.912 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.913 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 17:20:20.916 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplItem\n", + "2024-12-14 17:20:20.917 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgroup\n", + "2024-12-14 17:20:20.917 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplitemvalue\n", + "2024-12-14 17:20:20.917 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanGeneralItemEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.917 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanGeneralItemEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.917 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 17:20:20.921 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008875\n", + "2024-12-14 17:20:20.921 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGeneral/.[0-33148).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGeneral/.[0-33148).parquet_cache/locks/2024-12-14T17:20:20.912592.json\n", + "2024-12-14 17:20:20.921 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 17:20:20.922 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.922179. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.922 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 17:20:20.922 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.922 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanGoal/[0-3633).parquet\n", + "2024-12-14 17:20:20.926 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplGoal\n", + "2024-12-14 17:20:20.927 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalcategory\n", + "2024-12-14 17:20:20.927 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalvalue\n", + "2024-12-14 17:20:20.927 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalstatus\n", + "2024-12-14 17:20:20.927 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanGoalEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.927 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanGoalEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.927 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 17:20:20.929 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007137\n", + "2024-12-14 17:20:20.929 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGoal/.[0-3633).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGoal/.[0-3633).parquet_cache/locks/2024-12-14T17:20:20.922179.json\n", + "2024-12-14 17:20:20.930 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 17:20:20.930 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.930594. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.930 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 17:20:20.930 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.931 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for respiratoryCare/[0-5436).parquet\n", + "2024-12-14 17:20:20.935 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_care_status\n", + "2024-12-14 17:20:20.935 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respCareStatusEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.938 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respCareStatusEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.938 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting vent_start\n", + "2024-12-14 17:20:20.938 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - ventStartTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.939 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"ventStartTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.939 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting vent_end\n", + "2024-12-14 17:20:20.939 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - ventEndTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.939 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"ventEndTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.939 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 17:20:20.948 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.018362\n", + "2024-12-14 17:20:20.949 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCare/.[0-5436).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCare/.[0-5436).parquet_cache/locks/2024-12-14T17:20:20.930594.json\n", + "2024-12-14 17:20:20.949 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/admissionDx/[0-7578).parquet\n", + "2024-12-14 17:20:20.949 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.949702. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.949 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet\n", + "2024-12-14 17:20:20.950 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.950 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for admissionDx/[0-7578).parquet\n", + "2024-12-14 17:20:20.953 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission_diagnosis\n", + "2024-12-14 17:20:20.954 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admitdxname\n", + "2024-12-14 17:20:20.954 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - admitDxEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.954 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admitDxEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.954 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/admissionDx/[0-7578).parquet\n", + "2024-12-14 17:20:20.957 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007359\n", + "2024-12-14 17:20:20.957 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/admissionDx/.[0-7578).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/admissionDx/.[0-7578).parquet_cache/locks/2024-12-14T17:20:20.949702.json\n", + "2024-12-14 17:20:20.957 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/lab/[0-434660).parquet\n", + "2024-12-14 17:20:20.958 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.958027. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:20.958 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet\n", + "2024-12-14 17:20:20.958 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:20.958 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for lab/[0-434660).parquet\n", + "2024-12-14 17:20:20.962 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", + "2024-12-14 17:20:20.962 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labname\n", + "2024-12-14 17:20:20.962 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labmeasurenamesystem\n", + "2024-12-14 17:20:20.962 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labmeasurenameinterface\n", + "2024-12-14 17:20:20.962 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - labResultDrawnTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:20.962 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"labResultDrawnTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:20.963 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/lab/[0-434660).parquet\n", + "2024-12-14 17:20:20.999 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.041938\n", + "2024-12-14 17:20:21.000 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/lab/.[0-434660).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/lab/.[0-434660).parquet_cache/locks/2024-12-14T17:20:20.958027.json\n", + "2024-12-14 17:20:21.001 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/patient/[0-2520).parquet\n", + "2024-12-14 17:20:21.002 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:21.001855. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:21.002 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet\n", + "2024-12-14 17:20:21.002 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:21.002 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for patient/[0-2520).parquet\n", + "2024-12-14 17:20:21.007 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", + "2024-12-14 17:20:21.007 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - dateofbirth should already be in Date/time format\n", + "2024-12-14 17:20:21.007 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dateofbirth\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.007 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", + "2024-12-14 17:20:21.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", + "2024-12-14 17:20:21.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", + "2024-12-14 17:20:21.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ethnicity\n", + "2024-12-14 17:20:21.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column ethnicity\n", + "2024-12-14 17:20:21.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", + "2024-12-14 17:20:21.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hosp_admission\n", + "2024-12-14 17:20:21.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaladmitsource\n", + "2024-12-14 17:20:21.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalteachingstatus\n", + "2024-12-14 17:20:21.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalregion\n", + "2024-12-14 17:20:21.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalnumbedscategory\n", + "2024-12-14 17:20:21.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - hospitaladmittimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hospitaladmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hosp_discharge\n", + "2024-12-14 17:20:21.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaldischargestatus\n", + "2024-12-14 17:20:21.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaldischargelocation\n", + "2024-12-14 17:20:21.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - hospitaldischargetimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hospitaldischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission\n", + "2024-12-14 17:20:21.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitstaytype\n", + "2024-12-14 17:20:21.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitadmitsource\n", + "2024-12-14 17:20:21.010 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.010 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.010 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission_weight\n", + "2024-12-14 17:20:21.010 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.010 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.010 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission_height\n", + "2024-12-14 17:20:21.010 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_discharge\n", + "2024-12-14 17:20:21.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitdischargelocation\n", + "2024-12-14 17:20:21.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitdischargestatus\n", + "2024-12-14 17:20:21.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitdischargetimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitdischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_discharge_weight\n", + "2024-12-14 17:20:21.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitdischargetimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitdischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.011 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/patient/[0-2520).parquet\n", + "2024-12-14 17:20:21.027 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.025725\n", + "2024-12-14 17:20:21.027 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/patient/.[0-2520).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/patient/.[0-2520).parquet_cache/locks/2024-12-14T17:20:21.001855.json\n", + "2024-12-14 17:20:21.028 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 17:20:21.028 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:21.028566. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:21.028 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 17:20:21.028 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:21.029 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 17:20:21.032 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_charting_performed\n", + "2024-12-14 17:20:21.033 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respChartPerformedTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.033 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respChartPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.033 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_charting_entered\n", + "2024-12-14 17:20:21.034 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respChartEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.034 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respChartEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.034 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 17:20:21.075 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.046489\n", + "2024-12-14 17:20:21.075 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCharting/.[0-176089).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCharting/.[0-176089).parquet_cache/locks/2024-12-14T17:20:21.028566.json\n", + "2024-12-14 17:20:21.077 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 17:20:21.078 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:21.078339. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:21.078 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 17:20:21.078 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:21.079 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 17:20:21.085 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting temperature\n", + "2024-12-14 17:20:21.086 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.086 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.086 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting saO2\n", + "2024-12-14 17:20:21.086 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting heartRate\n", + "2024-12-14 17:20:21.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting respiration\n", + "2024-12-14 17:20:21.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.088 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.088 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cvp\n", + "2024-12-14 17:20:21.088 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.088 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.088 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting etCo2\n", + "2024-12-14 17:20:21.089 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.089 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.089 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_systolic\n", + "2024-12-14 17:20:21.089 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.089 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.089 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_diastolic\n", + "2024-12-14 17:20:21.090 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.090 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.090 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_mean\n", + "2024-12-14 17:20:21.090 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.091 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.091 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_systolic\n", + "2024-12-14 17:20:21.091 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.091 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.091 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_diastolic\n", + "2024-12-14 17:20:21.091 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_mean\n", + "2024-12-14 17:20:21.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st1\n", + "2024-12-14 17:20:21.093 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.093 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.093 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st2\n", + "2024-12-14 17:20:21.093 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.093 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.093 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st3\n", + "2024-12-14 17:20:21.094 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.094 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.094 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ICP\n", + "2024-12-14 17:20:21.094 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:21.094 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:21.094 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 17:20:22.505 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:01.427590\n", + "2024-12-14 17:20:22.506 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalPeriodic/.[0-1634960).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalPeriodic/.[0-1634960).parquet_cache/locks/2024-12-14T17:20:21.078339.json\n", + "2024-12-14 17:20:22.507 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCare/[0-42080).parquet\n", + "2024-12-14 17:20:22.507 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:22.507490. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:22.507 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet\n", + "2024-12-14 17:20:22.507 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:22.508 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseCare/[0-42080).parquet\n", + "2024-12-14 17:20:22.512 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_care_performed\n", + "2024-12-14 17:20:22.512 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseCarePerformedTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.512 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseCarePerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.512 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_care_entered\n", + "2024-12-14 17:20:22.513 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseCareEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.513 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseCareEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.513 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCare/[0-42080).parquet\n", + "2024-12-14 17:20:22.523 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.015754\n", + "2024-12-14 17:20:22.523 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCare/.[0-42080).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCare/.[0-42080).parquet_cache/locks/2024-12-14T17:20:22.507490.json\n", + "2024-12-14 17:20:22.523 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanEOL/[0-15).parquet\n", + "2024-12-14 17:20:22.524 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:22.524001. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:22.524 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet\n", + "2024-12-14 17:20:22.524 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:22.524 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanEOL/[0-15).parquet\n", + "2024-12-14 17:20:22.528 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplEolDiscussion\n", + "2024-12-14 17:20:22.528 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanEolDiscussionOccurredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.528 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanEolDiscussionOccurredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.528 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanEOL/[0-15).parquet\n", + "2024-12-14 17:20:22.529 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.005803\n", + "2024-12-14 17:20:22.529 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanEOL/.[0-15).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanEOL/.[0-15).parquet_cache/locks/2024-12-14T17:20:22.524001.json\n", + "2024-12-14 17:20:22.530 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 17:20:22.530 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:22.530690. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:22.530 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 17:20:22.530 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:22.531 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseCharting/[0-1477163).parquet\n", + "2024-12-14 17:20:22.534 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_charting_performed\n", + "2024-12-14 17:20:22.535 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nursingChartPerformedTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.535 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nursingChartPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.535 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_charting_entered\n", + "2024-12-14 17:20:22.536 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nursingChartEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.536 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nursingChartEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.536 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 17:20:22.829 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.299273\n", + "2024-12-14 17:20:22.830 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCharting/.[0-1477163).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCharting/.[0-1477163).parquet_cache/locks/2024-12-14T17:20:22.530690.json\n", + "2024-12-14 17:20:22.832 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 17:20:22.832 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:22.832372. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:22.832 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 17:20:22.832 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:22.833 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 17:20:22.838 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_systolic\n", + "2024-12-14 17:20:22.838 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.838 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.838 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_diastolic\n", + "2024-12-14 17:20:22.839 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.839 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.839 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_mean\n", + "2024-12-14 17:20:22.839 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.839 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.839 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting paop\n", + "2024-12-14 17:20:22.840 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.840 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.840 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cardiac_output\n", + "2024-12-14 17:20:22.840 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.840 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.840 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cardiac_input\n", + "2024-12-14 17:20:22.841 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.841 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.841 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting svr\n", + "2024-12-14 17:20:22.841 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.841 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.841 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting svri\n", + "2024-12-14 17:20:22.842 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.842 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.842 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pvr\n", + "2024-12-14 17:20:22.842 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.842 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.843 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pvri\n", + "2024-12-14 17:20:22.843 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.843 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.843 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 17:20:22.960 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.128224\n", + "2024-12-14 17:20:22.960 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalAperiodic/.[0-274088).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalAperiodic/.[0-274088).parquet_cache/locks/2024-12-14T17:20:22.832372.json\n", + "2024-12-14 17:20:22.961 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 17:20:22.962 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:22.962000. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:22.962 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 17:20:22.962 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:22.962 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseAssessment/[0-91589).parquet\n", + "2024-12-14 17:20:22.966 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_assessment_performed\n", + "2024-12-14 17:20:22.966 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseAssessPerformedTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.967 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseAssessPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.967 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_assessment_entered\n", + "2024-12-14 17:20:22.967 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseAssessEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:22.967 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseAssessEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:22.968 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 17:20:22.990 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.028308\n", + "2024-12-14 17:20:22.991 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseAssessment/.[0-91589).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseAssessment/.[0-91589).parquet_cache/locks/2024-12-14T17:20:22.962000.json\n", + "2024-12-14 17:20:22.995 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/medication/[0-75604).parquet\n", + "2024-12-14 17:20:22.996 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:22.996131. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:22.996 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet\n", + "2024-12-14 17:20:22.996 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:22.998 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for medication/[0-75604).parquet\n", + "2024-12-14 17:20:23.005 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_ordered\n", + "2024-12-14 17:20:23.006 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", + "2024-12-14 17:20:23.006 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugordertimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.007 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugordertimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.007 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_started\n", + "2024-12-14 17:20:23.007 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", + "2024-12-14 17:20:23.007 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugstarttimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugstarttimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_stopped\n", + "2024-12-14 17:20:23.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", + "2024-12-14 17:20:23.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugstoptimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugstoptimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.008 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/medication/[0-75604).parquet\n", + "2024-12-14 17:20:23.039 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.043148\n", + "2024-12-14 17:20:23.039 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/medication/.[0-75604).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/medication/.[0-75604).parquet_cache/locks/2024-12-14T17:20:22.996131.json\n", + "2024-12-14 17:20:23.039 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/diagnosis/[0-24978).parquet\n", + "2024-12-14 17:20:23.040 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.040126. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:23.040 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet\n", + "2024-12-14 17:20:23.040 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:23.040 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for diagnosis/[0-24978).parquet\n", + "2024-12-14 17:20:23.044 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", + "2024-12-14 17:20:23.044 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column diagnosispriority\n", + "2024-12-14 17:20:23.044 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd9code\n", + "2024-12-14 17:20:23.044 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - diagnosisEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.044 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"diagnosisEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.044 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/diagnosis/[0-24978).parquet\n", + "2024-12-14 17:20:23.049 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008941\n", + "2024-12-14 17:20:23.049 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/diagnosis/.[0-24978).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/diagnosis/.[0-24978).parquet_cache/locks/2024-12-14T17:20:23.040126.json\n", + "2024-12-14 17:20:23.049 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/allergy/[0-2475).parquet\n", + "2024-12-14 17:20:23.050 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.049948. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:23.050 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet\n", + "2024-12-14 17:20:23.050 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:23.050 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for allergy/[0-2475).parquet\n", + "2024-12-14 17:20:23.054 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting allergy\n", + "2024-12-14 17:20:23.054 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column allergyname\n", + "2024-12-14 17:20:23.054 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column allergytype\n", + "2024-12-14 17:20:23.054 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - allergyEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.054 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"allergyEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.055 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/allergy/[0-2475).parquet\n", + "2024-12-14 17:20:23.056 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007014\n", + "2024-12-14 17:20:23.057 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/allergy/.[0-2475).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/allergy/.[0-2475).parquet_cache/locks/2024-12-14T17:20:23.049948.json\n", + "2024-12-14 17:20:23.057 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 17:20:23.057 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.057704. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:23.057 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 17:20:23.058 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:23.058 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 17:20:23.061 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplInfectDisease\n", + "2024-12-14 17:20:23.062 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column infectdiseaseassessment\n", + "2024-12-14 17:20:23.062 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column responsetotherapy\n", + "2024-12-14 17:20:23.062 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column infectdiseasesite\n", + "2024-12-14 17:20:23.062 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column treatment\n", + "2024-12-14 17:20:23.062 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanInfectDiseaseEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.062 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanInfectDiseaseEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.062 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 17:20:23.064 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006415\n", + "2024-12-14 17:20:23.064 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanInfectiousDisease/.[0-112).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanInfectiousDisease/.[0-112).parquet_cache/locks/2024-12-14T17:20:23.057704.json\n", + "2024-12-14 17:20:23.064 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/physicalExam/[0-84058).parquet\n", + "2024-12-14 17:20:23.064 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.064773. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:23.065 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet\n", + "2024-12-14 17:20:23.065 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:23.065 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for physicalExam/[0-84058).parquet\n", + "2024-12-14 17:20:23.068 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting physical_exam_entered\n", + "2024-12-14 17:20:23.069 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - physicalExamEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.069 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"physicalExamEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.069 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/physicalExam/[0-84058).parquet\n", + "2024-12-14 17:20:23.081 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.016276\n", + "2024-12-14 17:20:23.081 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/physicalExam/.[0-84058).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/physicalExam/.[0-84058).parquet_cache/locks/2024-12-14T17:20:23.064773.json\n", + "2024-12-14 17:20:23.081 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/treatment/[0-38290).parquet\n", + "2024-12-14 17:20:23.081 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.081778. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:23.082 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet\n", + "2024-12-14 17:20:23.082 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:23.082 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for treatment/[0-38290).parquet\n", + "2024-12-14 17:20:23.085 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting treatment\n", + "2024-12-14 17:20:23.086 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column treatmentstring\n", + "2024-12-14 17:20:23.086 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - treatmentEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.086 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"treatmentEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.086 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/treatment/[0-38290).parquet\n", + "2024-12-14 17:20:23.091 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010117\n", + "2024-12-14 17:20:23.091 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/treatment/.[0-38290).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/treatment/.[0-38290).parquet_cache/locks/2024-12-14T17:20:23.081778.json\n", + "2024-12-14 17:20:23.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/pastHistory/[0-12109).parquet\n", + "2024-12-14 17:20:23.092 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.092753. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:23.092 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet\n", + "2024-12-14 17:20:23.093 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:23.093 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for pastHistory/[0-12109).parquet\n", + "2024-12-14 17:20:23.096 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting past_history_taken\n", + "2024-12-14 17:20:23.097 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - pastHistoryTakenTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.097 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"pastHistoryTakenTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.097 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting past_history_entered\n", + "2024-12-14 17:20:23.098 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - pastHistoryEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.098 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"pastHistoryEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.098 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/pastHistory/[0-12109).parquet\n", + "2024-12-14 17:20:23.105 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.012818\n", + "2024-12-14 17:20:23.105 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/pastHistory/.[0-12109).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/pastHistory/.[0-12109).parquet_cache/locks/2024-12-14T17:20:23.092753.json\n", + "2024-12-14 17:20:23.106 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 17:20:23.106 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.106417. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:23.106 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 17:20:23.106 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:23.106 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 17:20:23.110 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplItem\n", + "2024-12-14 17:20:23.110 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgroup\n", + "2024-12-14 17:20:23.110 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplitemvalue\n", + "2024-12-14 17:20:23.110 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanGeneralItemEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.110 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanGeneralItemEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.110 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 17:20:23.115 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008873\n", + "2024-12-14 17:20:23.115 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGeneral/.[0-33148).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGeneral/.[0-33148).parquet_cache/locks/2024-12-14T17:20:23.106417.json\n", + "2024-12-14 17:20:23.115 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 17:20:23.116 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.116227. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:23.116 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 17:20:23.116 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:23.116 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanGoal/[0-3633).parquet\n", + "2024-12-14 17:20:23.120 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplGoal\n", + "2024-12-14 17:20:23.120 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalcategory\n", + "2024-12-14 17:20:23.120 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalvalue\n", + "2024-12-14 17:20:23.120 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalstatus\n", + "2024-12-14 17:20:23.120 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanGoalEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.121 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanGoalEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.121 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 17:20:23.123 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007319\n", + "2024-12-14 17:20:23.123 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGoal/.[0-3633).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGoal/.[0-3633).parquet_cache/locks/2024-12-14T17:20:23.116227.json\n", + "2024-12-14 17:20:23.124 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 17:20:23.125 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.124966. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:23.125 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 17:20:23.125 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:23.125 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for respiratoryCare/[0-5436).parquet\n", + "2024-12-14 17:20:23.130 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_care_status\n", + "2024-12-14 17:20:23.130 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respCareStatusEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.133 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respCareStatusEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.133 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting vent_start\n", + "2024-12-14 17:20:23.133 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - ventStartTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.134 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"ventStartTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.134 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting vent_end\n", + "2024-12-14 17:20:23.134 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - ventEndTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.134 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"ventEndTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.134 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 17:20:23.143 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.018378\n", + "2024-12-14 17:20:23.143 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCare/.[0-5436).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCare/.[0-5436).parquet_cache/locks/2024-12-14T17:20:23.124966.json\n", + "2024-12-14 17:20:23.143 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/admissionDx/[0-7578).parquet\n", + "2024-12-14 17:20:23.144 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.144139. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:23.144 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet\n", + "2024-12-14 17:20:23.144 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:23.144 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for admissionDx/[0-7578).parquet\n", + "2024-12-14 17:20:23.148 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission_diagnosis\n", + "2024-12-14 17:20:23.148 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admitdxname\n", + "2024-12-14 17:20:23.148 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - admitDxEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.149 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admitDxEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.149 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/admissionDx/[0-7578).parquet\n", + "2024-12-14 17:20:23.151 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007778\n", + "2024-12-14 17:20:23.151 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/admissionDx/.[0-7578).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/admissionDx/.[0-7578).parquet_cache/locks/2024-12-14T17:20:23.144139.json\n", + "2024-12-14 17:20:23.152 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/lab/[0-434660).parquet\n", + "2024-12-14 17:20:23.152 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.152636. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:23.152 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet\n", + "2024-12-14 17:20:23.152 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:23.153 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for lab/[0-434660).parquet\n", + "2024-12-14 17:20:23.156 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", + "2024-12-14 17:20:23.157 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labname\n", + "2024-12-14 17:20:23.157 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labmeasurenamesystem\n", + "2024-12-14 17:20:23.157 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labmeasurenameinterface\n", + "2024-12-14 17:20:23.157 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - labResultDrawnTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.157 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"labResultDrawnTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.157 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/lab/[0-434660).parquet\n", + "2024-12-14 17:20:23.193 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.040408\n", + "2024-12-14 17:20:23.193 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/lab/.[0-434660).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/lab/.[0-434660).parquet_cache/locks/2024-12-14T17:20:23.152636.json\n", + "2024-12-14 17:20:23.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/patient/[0-2520).parquet\n", + "2024-12-14 17:20:23.195 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.195057. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:23.195 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet\n", + "2024-12-14 17:20:23.195 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:23.195 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for patient/[0-2520).parquet\n", + "2024-12-14 17:20:23.200 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", + "2024-12-14 17:20:23.200 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - dateofbirth should already be in Date/time format\n", + "2024-12-14 17:20:23.200 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dateofbirth\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.200 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", + "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", + "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", + "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ethnicity\n", + "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column ethnicity\n", + "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", + "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hosp_admission\n", + "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaladmitsource\n", + "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalteachingstatus\n", + "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalregion\n", + "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalnumbedscategory\n", + "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - hospitaladmittimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.202 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hospitaladmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.202 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hosp_discharge\n", + "2024-12-14 17:20:23.202 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaldischargestatus\n", + "2024-12-14 17:20:23.202 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaldischargelocation\n", + "2024-12-14 17:20:23.202 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - hospitaldischargetimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.202 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hospitaldischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.202 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission\n", + "2024-12-14 17:20:23.202 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitstaytype\n", + "2024-12-14 17:20:23.202 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitadmitsource\n", + "2024-12-14 17:20:23.202 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.203 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.203 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission_weight\n", + "2024-12-14 17:20:23.203 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.203 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.203 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission_height\n", + "2024-12-14 17:20:23.203 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.203 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.203 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_discharge\n", + "2024-12-14 17:20:23.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitdischargelocation\n", + "2024-12-14 17:20:23.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitdischargestatus\n", + "2024-12-14 17:20:23.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitdischargetimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitdischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_discharge_weight\n", + "2024-12-14 17:20:23.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitdischargetimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitdischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.204 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/patient/[0-2520).parquet\n", + "2024-12-14 17:20:23.220 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.024942\n", + "2024-12-14 17:20:23.220 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/patient/.[0-2520).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/patient/.[0-2520).parquet_cache/locks/2024-12-14T17:20:23.195057.json\n", + "2024-12-14 17:20:23.220 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 17:20:23.221 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.221246. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:23.221 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 17:20:23.221 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:23.222 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 17:20:23.226 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_charting_performed\n", + "2024-12-14 17:20:23.226 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respChartPerformedTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.226 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respChartPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.227 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_charting_entered\n", + "2024-12-14 17:20:23.227 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respChartEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.227 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respChartEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.227 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 17:20:23.268 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.047219\n", + "2024-12-14 17:20:23.268 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCharting/.[0-176089).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCharting/.[0-176089).parquet_cache/locks/2024-12-14T17:20:23.221246.json\n", + "2024-12-14 17:20:23.271 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 17:20:23.272 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.271971. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:23.272 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 17:20:23.272 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:23.272 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 17:20:23.278 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting temperature\n", + "2024-12-14 17:20:23.279 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.279 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.279 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting saO2\n", + "2024-12-14 17:20:23.279 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.279 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.279 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting heartRate\n", + "2024-12-14 17:20:23.280 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.280 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.280 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting respiration\n", + "2024-12-14 17:20:23.280 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.281 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.281 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cvp\n", + "2024-12-14 17:20:23.281 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.281 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.281 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting etCo2\n", + "2024-12-14 17:20:23.282 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.282 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.282 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_systolic\n", + "2024-12-14 17:20:23.282 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.283 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.283 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_diastolic\n", + "2024-12-14 17:20:23.283 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.283 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.283 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_mean\n", + "2024-12-14 17:20:23.284 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.284 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.284 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_systolic\n", + "2024-12-14 17:20:23.284 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.284 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.285 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_diastolic\n", + "2024-12-14 17:20:23.285 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.285 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.285 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_mean\n", + "2024-12-14 17:20:23.285 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.286 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.286 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st1\n", + "2024-12-14 17:20:23.286 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.286 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.286 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st2\n", + "2024-12-14 17:20:23.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st3\n", + "2024-12-14 17:20:23.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ICP\n", + "2024-12-14 17:20:23.288 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 17:20:23.288 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 17:20:23.288 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 17:20:24.698 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:01.426391\n", + "2024-12-14 17:20:24.698 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalPeriodic/.[0-1634960).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalPeriodic/.[0-1634960).parquet_cache/locks/2024-12-14T17:20:23.271971.json\n", + "2024-12-14 17:20:24.698 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:823 - Subsharded into converted events.\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:24.857\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: merge_to_MEDS_cohort\u001b[0m\n", + "\u001b[32m2024-12-14 17:20:24.863\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-merge_to_MEDS_cohort --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=merge_to_MEDS_cohort\u001b[0m\n", + "\u001b[32m2024-12-14 17:21:21.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:53.121\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 16:18:08.184 | INFO | MEDS_transforms.utils:stage_init:73 - Running merge_to_MEDS_cohort with the following configuration:\n", + "\u001b[32m2024-12-14 17:21:21.085\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 17:20:25.493 | INFO | MEDS_transforms.utils:stage_init:73 - Running merge_to_MEDS_cohort with the following configuration:\n", "input_dir: ${oc.env:EICU_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:EICU_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -4097,7 +7176,7 @@ "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", "\n", - "2024-12-14 16:18:08.199 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "2024-12-14 17:20:25.508 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", "unique_by: null\n", "additional_sort_by: null\n", "is_metadata: false\n", @@ -4110,13 +7189,39 @@ " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events\n", " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort\n", " - metadata_input_dir: ❌ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects\n", - "2024-12-14 16:18:08.214 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator_by_shard_map:687 - Mapping computation over a maximum of 3 shards\n", - "2024-12-14 16:18:08.222 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", - "2024-12-14 16:18:08.224 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:302 - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet is in progress as of 2024-12-14 16:17:18.058267. Returning.\n", - "2024-12-14 16:18:08.224 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", - "2024-12-14 16:18:08.224 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:18:08.224594. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:18:08.225 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0\n", - "2024-12-14 16:18:08.226 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 20 files:\n", + "2024-12-14 17:20:25.523 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator_by_shard_map:687 - Mapping computation over a maximum of 3 shards\n", + "2024-12-14 17:20:25.531 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", + "2024-12-14 17:20:25.532 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:25.531974. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:25.533 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0\n", + "2024-12-14 17:20:25.535 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 20 files:\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/patient/[0-2520).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/admissionDx/[0-7578).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/allergy/[0-2475).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGeneral/[0-33148).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanEOL/[0-15).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGoal/[0-3633).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanInfectiousDisease/[0-112).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/diagnosis/[0-24978).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/lab/[0-434660).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/medication/[0-75604).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseAssessment/[0-91589).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCare/[0-42080).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCharting/[0-1477163).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/pastHistory/[0-12109).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/physicalExam/[0-84058).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCare/[0-5436).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCharting/[0-176089).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/treatment/[0-38290).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalAperiodic/[0-274088).parquet\n", + " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 17:20:25.546 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:25.546 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", + "2024-12-14 17:20:39.416 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:13.884657\n", + "2024-12-14 17:20:39.417 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/held_out/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/held_out/.0.parquet_cache/locks/2024-12-14T17:20:25.531974.json\n", + "2024-12-14 17:20:39.418 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", + "2024-12-14 17:20:39.419 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:39.419421. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:39.419 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0\n", + "2024-12-14 17:20:39.421 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 20 files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/patient/[0-2520).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/admissionDx/[0-7578).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/allergy/[0-2475).parquet\n", @@ -4137,14 +7242,14 @@ " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/treatment/[0-38290).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalAperiodic/[0-274088).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalPeriodic/[0-1634960).parquet\n", - "2024-12-14 16:18:08.236 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:18:08.237 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", - "2024-12-14 16:18:20.068 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:11.843473\n", - "2024-12-14 16:18:20.068 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/.0.parquet_cache/locks/2024-12-14T16:18:08.224594.json\n", - "2024-12-14 16:18:20.069 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", - "2024-12-14 16:18:20.070 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:18:20.070463. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:18:20.070 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0\n", - "2024-12-14 16:18:20.072 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 20 files:\n", + "2024-12-14 17:20:39.432 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:39.433 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", + "2024-12-14 17:20:49.043 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:09.624267\n", + "2024-12-14 17:20:49.044 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/.0.parquet_cache/locks/2024-12-14T17:20:39.419421.json\n", + "2024-12-14 17:20:49.045 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", + "2024-12-14 17:20:49.046 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:49.045956. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:20:49.046 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0\n", + "2024-12-14 17:20:49.047 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 20 files:\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/patient/[0-2520).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/admissionDx/[0-7578).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/allergy/[0-2475).parquet\n", @@ -4165,18 +7270,18 @@ " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/treatment/[0-38290).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalAperiodic/[0-274088).parquet\n", " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalPeriodic/[0-1634960).parquet\n", - "2024-12-14 16:18:20.083 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:18:20.083 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", - "2024-12-14 16:18:52.696 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:32.625813\n", - "2024-12-14 16:18:52.698 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/.0.parquet_cache/locks/2024-12-14T16:18:20.070463.json\n", - "2024-12-14 16:18:52.698 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:44.499135\n", + "2024-12-14 17:20:49.059 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:20:49.059 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", + "2024-12-14 17:21:20.691 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:31.645753\n", + "2024-12-14 17:21:20.692 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/.0.parquet_cache/locks/2024-12-14T17:20:49.045956.json\n", + "2024-12-14 17:21:20.693 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:55.184901\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:53.124\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: finalize_MEDS_metadata\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:53.176\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-finalize_MEDS_metadata --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=finalize_MEDS_metadata\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:53.928\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 17:21:21.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: finalize_MEDS_metadata\u001b[0m\n", + "\u001b[32m2024-12-14 17:21:21.143\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-finalize_MEDS_metadata --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=finalize_MEDS_metadata\u001b[0m\n", + "\u001b[32m2024-12-14 17:21:21.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:53.928\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 16:18:53.861 | INFO | MEDS_transforms.utils:stage_init:73 - Running finalize_MEDS_metadata with the following configuration:\n", + "\u001b[32m2024-12-14 17:21:21.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 17:21:21.837 | INFO | MEDS_transforms.utils:stage_init:73 - Running finalize_MEDS_metadata with the following configuration:\n", "input_dir: ${oc.env:EICU_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:EICU_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -4244,7 +7349,7 @@ "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", "\n", - "2024-12-14 16:18:53.877 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "2024-12-14 17:21:21.853 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", "is_metadata: true\n", "do_retype: true\n", "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort\n", @@ -4257,23 +7362,23 @@ " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort\n", " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/finalize_MEDS_metadata\n", " - metadata_input_dir: ❌ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects\n", - "2024-12-14 16:18:53.881 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:170 - Validating code metadata\n", - "2024-12-14 16:18:53.881 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:179 - No code metadata found at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects/codes.parquet. Making empty metadata file.\n", - "2024-12-14 16:18:53.883 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:183 - Writing finalized metadata df to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata/codes.parquet\n", - "2024-12-14 16:18:53.885 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:187 - Creating dataset metadata\n", - "2024-12-14 16:18:53.887 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:199 - Writing finalized dataset metadata to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata/dataset.json\n", - "2024-12-14 16:18:53.887 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:204 - Creating subject splits from {str(shards_map_fp.resolve())}\n", - "2024-12-14 16:18:53.888 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split train has 1087 subjects\n", - "2024-12-14 16:18:53.888 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split tuning has 544 subjects\n", - "2024-12-14 16:18:53.888 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split held_out has 543 subjects\n", - "2024-12-14 16:18:53.889 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:222 - Writing finalized subject splits to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata/subject_splits.parquet\n", + "2024-12-14 17:21:21.857 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:170 - Validating code metadata\n", + "2024-12-14 17:21:21.857 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:179 - No code metadata found at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects/codes.parquet. Making empty metadata file.\n", + "2024-12-14 17:21:21.859 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:183 - Writing finalized metadata df to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata/codes.parquet\n", + "2024-12-14 17:21:21.861 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:187 - Creating dataset metadata\n", + "2024-12-14 17:21:21.863 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:199 - Writing finalized dataset metadata to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata/dataset.json\n", + "2024-12-14 17:21:21.864 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:204 - Creating subject splits from {str(shards_map_fp.resolve())}\n", + "2024-12-14 17:21:21.865 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split train has 1087 subjects\n", + "2024-12-14 17:21:21.865 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split tuning has 544 subjects\n", + "2024-12-14 17:21:21.865 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split held_out has 543 subjects\n", + "2024-12-14 17:21:21.865 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:222 - Writing finalized subject splits to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata/subject_splits.parquet\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:53.929\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: finalize_MEDS_data\u001b[0m\n", - "\u001b[32m2024-12-14 16:18:53.936\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-finalize_MEDS_data --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=finalize_MEDS_data\u001b[0m\n", - "\u001b[32m2024-12-14 16:19:11.520\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 17:21:21.906\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: finalize_MEDS_data\u001b[0m\n", + "\u001b[32m2024-12-14 17:21:21.913\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-finalize_MEDS_data --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=finalize_MEDS_data\u001b[0m\n", + "\u001b[32m2024-12-14 17:21:45.018\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 16:19:11.521\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 16:18:54.479 | INFO | MEDS_transforms.utils:stage_init:73 - Running finalize_MEDS_data with the following configuration:\n", + "\u001b[32m2024-12-14 17:21:45.019\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 17:21:22.492 | INFO | MEDS_transforms.utils:stage_init:73 - Running finalize_MEDS_data with the following configuration:\n", "input_dir: ${oc.env:EICU_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:EICU_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -4341,7 +7446,7 @@ "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", "\n", - "2024-12-14 16:18:54.495 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "2024-12-14 17:21:22.508 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", "do_retype: true\n", "is_metadata: false\n", "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort\n", @@ -4353,22 +7458,29 @@ " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort\n", " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data\n", " - metadata_input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata\n", - "2024-12-14 16:18:54.511 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator:600 - Mapping computation over a maximum of 2 shards\n", - "2024-12-14 16:18:54.519 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/tuning/0.parquet\n", - "2024-12-14 16:18:54.520 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:18:54.520021. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:18:54.521 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", - "2024-12-14 16:18:54.522 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:18:56.160 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/tuning/0.parquet\n", - "2024-12-14 16:18:59.125 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:04.605576\n", - "2024-12-14 16:18:59.125 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/tuning/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/tuning/.0.parquet_cache/locks/2024-12-14T16:18:54.520021.json\n", - "2024-12-14 16:18:59.331 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/train/0.parquet\n", - "2024-12-14 16:18:59.332 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 16:18:59.331903. Double checking no earlier locks have been registered.\n", - "2024-12-14 16:18:59.332 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", - "2024-12-14 16:18:59.332 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 16:19:03.318 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/train/0.parquet\n", - "2024-12-14 16:19:10.540 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:11.208637\n", - "2024-12-14 16:19:10.540 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/train/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/train/.0.parquet_cache/locks/2024-12-14T16:18:59.331903.json\n", - "2024-12-14 16:19:11.376 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:16.881477\n", + "2024-12-14 17:21:22.524 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator:600 - Mapping computation over a maximum of 3 shards\n", + "2024-12-14 17:21:22.532 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/held_out/0.parquet\n", + "2024-12-14 17:21:22.533 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:21:22.533510. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:21:22.535 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", + "2024-12-14 17:21:22.535 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:21:24.217 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/held_out/0.parquet\n", + "2024-12-14 17:21:27.278 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:04.745218\n", + "2024-12-14 17:21:27.278 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/held_out/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/held_out/.0.parquet_cache/locks/2024-12-14T17:21:22.533510.json\n", + "2024-12-14 17:21:27.495 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/tuning/0.parquet\n", + "2024-12-14 17:21:27.495 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:21:27.495681. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:21:27.496 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", + "2024-12-14 17:21:27.496 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:21:28.862 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/tuning/0.parquet\n", + "2024-12-14 17:21:31.908 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:04.412390\n", + "2024-12-14 17:21:31.908 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/tuning/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/tuning/.0.parquet_cache/locks/2024-12-14T17:21:27.495681.json\n", + "2024-12-14 17:21:32.132 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/train/0.parquet\n", + "2024-12-14 17:21:32.133 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:21:32.133050. Double checking no earlier locks have been registered.\n", + "2024-12-14 17:21:32.133 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", + "2024-12-14 17:21:32.133 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 17:21:36.556 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/train/0.parquet\n", + "2024-12-14 17:21:43.984 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:11.851649\n", + "2024-12-14 17:21:43.984 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/train/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/train/.0.parquet_cache/locks/2024-12-14T17:21:32.133050.json\n", + "2024-12-14 17:21:44.856 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:22.347847\n", "\u001b[0m\n" ] } @@ -4388,7 +7500,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 50, "metadata": {}, "outputs": [ { @@ -4401,20 +7513,20 @@ " white-space: pre-wrap;\n", "}\n", "\n", - "shape: (25_221_384, 4)
subject_idtimecodenumeric_value
i64datetime[μs]strf32
129391null"GENDER//Female"null
129391null"ETHNICITY//Caucasian"null
1293911929-06-29 23:44:00"MEDS_BIRTH"null
1293911929-07-01 13:14:00"MEDS_BIRTH"null
1293912015-12-29 20:34:00"NURSE_CHARTING//PERFORMED//NOT…null
27362412015-12-31 13:00:00"NURSE_CHARTING//ENTERED//NOT Y…null
27362412015-12-31 13:00:00"RESP_CARE//STATUS//NOT YET DON…null
27362412015-12-31 19:29:00"HOSPITAL_DISCHARGE//Alive//Hom…null
27362412015-12-31 23:43:00"MEDICATION//STOPPED//MORPHINE …null
27362412016-01-01 13:25:00"MEDICATION//STOPPED//KETOROLAC…null
" + "shape: (33_392_344, 4)
subject_idtimecodenumeric_value
i64datetime[μs]strf32
131736null"GENDER//Female"null
131736null"ETHNICITY//Caucasian"null
1317361982-07-02 02:07:00"MEDS_BIRTH"null
1317362015-12-30 22:52:00"LAB//mg/dL//mg/dL//glucose"99.0
1317362015-12-30 22:52:00"LAB//mg/dL//mg/dL//calcium"8.8
27362412015-12-31 13:00:00"NURSE_CHARTING//ENTERED//NOT Y…null
27362412015-12-31 13:00:00"RESP_CARE//STATUS//NOT YET DON…null
27362412015-12-31 19:29:00"HOSPITAL_DISCHARGE//Alive//Hom…null
27362412015-12-31 23:43:00"MEDICATION//STOPPED//MORPHINE …null
27362412016-01-01 13:25:00"MEDICATION//STOPPED//KETOROLAC…null
" ], "text/plain": [ - "shape: (25_221_384, 4)\n", + "shape: (33_392_344, 4)\n", "┌────────────┬─────────────────────┬─────────────────────────────────┬───────────────┐\n", "│ subject_id ┆ time ┆ code ┆ numeric_value │\n", "│ --- ┆ --- ┆ --- ┆ --- │\n", "│ i64 ┆ datetime[μs] ┆ str ┆ f32 │\n", "╞════════════╪═════════════════════╪═════════════════════════════════╪═══════════════╡\n", - "│ 129391 ┆ null ┆ GENDER//Female ┆ null │\n", - "│ 129391 ┆ null ┆ ETHNICITY//Caucasian ┆ null │\n", - "│ 129391 ┆ 1929-06-29 23:44:00 ┆ MEDS_BIRTH ┆ null │\n", - "│ 129391 ┆ 1929-07-01 13:14:00 ┆ MEDS_BIRTH ┆ null │\n", - "│ 129391 ┆ 2015-12-29 20:34:00 ┆ NURSE_CHARTING//PERFORMED//NOT… ┆ null │\n", + "│ 131736 ┆ null ┆ GENDER//Female ┆ null │\n", + "│ 131736 ┆ null ┆ ETHNICITY//Caucasian ┆ null │\n", + "│ 131736 ┆ 1982-07-02 02:07:00 ┆ MEDS_BIRTH ┆ null │\n", + "│ 131736 ┆ 2015-12-30 22:52:00 ┆ LAB//mg/dL//mg/dL//glucose ┆ 99.0 │\n", + "│ 131736 ┆ 2015-12-30 22:52:00 ┆ LAB//mg/dL//mg/dL//calcium ┆ 8.8 │\n", "│ … ┆ … ┆ … ┆ … │\n", "│ 2736241 ┆ 2015-12-31 13:00:00 ┆ NURSE_CHARTING//ENTERED//NOT Y… ┆ null │\n", "│ 2736241 ┆ 2015-12-31 13:00:00 ┆ RESP_CARE//STATUS//NOT YET DON… ┆ null │\n", @@ -4424,7 +7536,7 @@ "└────────────┴─────────────────────┴─────────────────────────────────┴───────────────┘" ] }, - "execution_count": 35, + "execution_count": 50, "metadata": {}, "output_type": "execute_result" } From d960275f9e9b91cf7d98a43f9b1114a468190a24 Mon Sep 17 00:00:00 2001 From: Nassim Oufattole Date: Sat, 14 Dec 2024 19:46:10 -0800 Subject: [PATCH 4/8] Removed .py files since they do not render correctly on github, and switched them to ipynb files. Updated meds-tab file usage --- demo/README.MD | 19 + demo/aces.ipynb | 3 +- demo/meds_cehrbert.ipynb | 295 +++++ demo/meds_cehrbert.py | 397 ------- demo/meds_tab.ipynb | 2252 ++++++++++++++++++++++++++++++++++++-- demo/meds_tab.py | 240 ---- 6 files changed, 2472 insertions(+), 734 deletions(-) create mode 100644 demo/README.MD create mode 100644 demo/meds_cehrbert.ipynb delete mode 100644 demo/meds_cehrbert.py delete mode 100644 demo/meds_tab.py diff --git a/demo/README.MD b/demo/README.MD new file mode 100644 index 0000000..0a242b7 --- /dev/null +++ b/demo/README.MD @@ -0,0 +1,19 @@ +# Demo Notebooks Guide + +## Data Extraction +1. Run `extract_meds_data.ipynb` to convert datasets to MEDS format: + - MIMIC-IV demo dataset + - EICU dataset + +Extracted data locations: +- EICU: `demo/work_dir/eicu_demo/meds` +- MIMIC-IV: `demo/work_dir/mimiciv_demo/meds` + +## Task Label Generation +2. Run `aces.ipynb` to generate prediction task labels + +## Model Experiments +3. Run any of these notebooks for model experiments and AUC results: +- `meds_cehrbert.ipynb` - CEHR-BERT implementation +- `meds_tab.ipynb` - Tabular models +- `meds_torch.ipynb` - PyTorch models diff --git a/demo/aces.ipynb b/demo/aces.ipynb index db13aab..03bacc3 100644 --- a/demo/aces.ipynb +++ b/demo/aces.ipynb @@ -72,7 +72,6 @@ ], "source": [ "#@title Download E-ICU demo\n", - "import tempfile\n", "import os\n", "from pathlib import Path\n", "notebook_dir = os.getcwd()\n", @@ -87,7 +86,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": { "id": "H6fqe217XDhi" }, diff --git a/demo/meds_cehrbert.ipynb b/demo/meds_cehrbert.ipynb new file mode 100644 index 0000000..a378210 --- /dev/null +++ b/demo/meds_cehrbert.ipynb @@ -0,0 +1,295 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e9226e6d", + "metadata": { + "lines_to_next_cell": 0 + }, + "source": [ + "## Use a python 3.11 kernel cehrbert" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e4c2dca6", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install meds_reader==0.1.9\n", + "!pip install setuptools\n", + "!pip install cehrbert==1.3.1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "541b292a", + "metadata": {}, + "outputs": [], + "source": [ + "MIMICIV_MEDS_DIR = \"./content/meds/\"\n", + "MIMICIV_MEDS_READER_DIR = \"./content/meds_reader/\"\n", + "TASK_DIR=\"./content/tasks/\"\n", + "TASK_NAME=\"in_hospital_3d_los_after_48h\"\n", + "OUTPUT_PRETRAIN_MODEL_DIR=\"./content/output/cehrbert/\"\n", + "# TODO this variable has an identical name?\n", + "OUTPUT_PRETRAIN_MODEL_DIR=\"./content/output/cehrbert_finetuned/\"" + ] + }, + { + "cell_type": "markdown", + "id": "e2e4ac93", + "metadata": {}, + "source": [ + "Run meds_reader on the MEDS data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6010330a", + "metadata": {}, + "outputs": [], + "source": [ + "!meds_reader_convert $MIMICIV_MEDS_DIR $MIMICIV_MEDS_READER_DIR" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "26f1edfb", + "metadata": {}, + "outputs": [], + "source": [ + "!mkdir -p ./content/output/cehrbert/\n", + "!mkdir -p ./content/output/cehrbert_dataset_prepared/\n", + "!mkdir -p ./content/output/cehrbert_finetuned/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "17c4af8d", + "metadata": {}, + "outputs": [], + "source": [ + "!mkdir ./content/github_repo;cd ./content/github_repo;git clone https://github.com/cumc-dbmi/cehrbert.git;cd cehrbert;git checkout fix/meds_evaluation;pip install .;" + ] + }, + { + "cell_type": "markdown", + "id": "a58e10d2", + "metadata": {}, + "source": [ + "Create the cehrbert pretraining configuration yaml file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7f033a64", + "metadata": {}, + "outputs": [], + "source": [ + "cehrbert_pretrain_config = \"\"\"\n", + "#Model arguments\n", + "model_name_or_path: \"./content/output/cehrbert/\"\n", + "tokenizer_name_or_path: \"./content/output/cehrbert/\"\n", + "num_hidden_layers: 6\n", + "max_position_embeddings: 1024\n", + "hidden_size: 768\n", + "vocab_size: 100000\n", + "min_frequency: 50\n", + "include_value_prediction: false # additional CEHR-BERT learning objective\n", + "\n", + "#Data arguments\n", + "data_folder: \"./content/meds_reader/\"\n", + "dataset_prepared_path: \"./content/output/cehrbert_dataset_prepared/\"\n", + "\n", + "# Below is a list of Med-to-CehrBert related arguments\n", + "preprocessing_num_workers: 2\n", + "preprocessing_batch_size: 128\n", + "# if is_data_in_med is false, it assumes the data is in the cehrbert format\n", + "is_data_in_meds: true\n", + "att_function_type: \"cehr_bert\"\n", + "inpatient_att_function_type: \"mix\"\n", + "include_auxiliary_token: true\n", + "include_demographic_prompt: false\n", + "# if the data is in the meds format, the validation split will be omitted\n", + "# as the meds already provide train/tuning/held_out splits\n", + "validation_split_percentage: 0.05\n", + "\n", + "# Huggingface Arguments\n", + "dataloader_num_workers: 2\n", + "dataloader_prefetch_factor: 2\n", + "\n", + "overwrite_output_dir: false\n", + "resume_from_checkpoint: # automatically infer the latest checkpoint from the output folder\n", + "seed: 42\n", + "\n", + "output_dir: \"./content/output/cehrbert/\"\n", + "evaluation_strategy: \"epoch\"\n", + "save_strategy: \"epoch\"\n", + "eval_accumulation_steps: 10\n", + "\n", + "learning_rate: 0.00005\n", + "per_device_train_batch_size: 8\n", + "per_device_eval_batch_size: 8\n", + "gradient_accumulation_steps: 2\n", + "\n", + "num_train_epochs: 50 # for large datasets, 5-10 epochs should suffice\n", + "warmup_steps: 10\n", + "weight_decay: 0.01\n", + "logging_dir: \"./logs\"\n", + "logging_steps: 10\n", + "\n", + "save_total_limit:\n", + "load_best_model_at_end: true\n", + "metric_for_best_model: \"eval_loss\"\n", + "greater_is_better: false\n", + "\n", + "report_to: \"none\"\n", + "\"\"\"\n", + "PRETRAIN_CONFIG_FP = f\"./content/output/cehrbert/cehrbert_pretrain_config.yaml\"\n", + "with open(PRETRAIN_CONFIG_FP, 'w') as f:\n", + " f.write(cehrbert_pretrain_config)" + ] + }, + { + "cell_type": "markdown", + "id": "20844f8a", + "metadata": {}, + "source": [ + "## Pretrain cehrbert using MLM\n", + "!python3.11 -m cehrbert.runners.hf_cehrbert_pretrain_runner ./content/output/cehrbert/cehrbert_pretrain_config.yaml" + ] + }, + { + "cell_type": "markdown", + "id": "6ce8fa79", + "metadata": {}, + "source": [ + "## Create the cehrbert finetuning configuration yaml file\n", + "cehrbert_finetune_config = f\"\"\"\n", + "Model arguments\n", + "model_name_or_path: \"./content/output/cehrbert/\"\n", + "tokenizer_name_or_path: \"./content/output/cehrbert/\"\n", + "num_hidden_layers: 6\n", + "max_position_embeddings: 1024\n", + "hidden_size: 768\n", + "vocab_size: 100000\n", + "min_frequency: 50\n", + "include_value_prediction: false # additional CEHR-BERT learning objective\n", + "\n", + "Data arguments\n", + "cohort_folder: \"./content/tasks/{TASK_NAME}/\"\n", + "data_folder: \"./content/meds_reader/\"\n", + "dataset_prepared_path: \"./content/output/cehrbert_dataset_prepared/\"\n", + "\n", + "LORA\n", + "use_lora: True\n", + "lora_rank: 64\n", + "lora_alpha: 16\n", + "target_modules: [ \"query\", \"value\" ]\n", + "lora_dropout: 0.1\n", + "\n", + "Below is a list of Med-to-CehrBert related arguments\n", + "preprocessing_num_workers: 2\n", + "preprocessing_batch_size: 128\n", + "if is_data_in_med is false, it assumes the data is in the cehrbert format\n", + "is_data_in_meds: true\n", + "att_function_type: \"cehr_bert\"\n", + "inpatient_att_function_type: \"mix\"\n", + "include_auxiliary_token: true\n", + "include_demographic_prompt: false\n", + "if the data is in the meds format, the validation split will be omitted\n", + "as the meds already provide train/tuning/held_out splits\n", + "validation_split_percentage: 0.05\n", + "\n", + "Huggingface Arguments\n", + "dataloader_num_workers: 2\n", + "dataloader_prefetch_factor: 2\n", + "\n", + "overwrite_output_dir: false\n", + "resume_from_checkpoint: # automatically infer the latest checkpoint from the output folder\n", + "seed: 42\n", + "\n", + "output_dir: \"./content/output/cehrbert_finetuned\"\n", + "evaluation_strategy: \"epoch\"\n", + "save_strategy: \"epoch\"\n", + "eval_accumulation_steps: 10\n", + "\n", + "do_train: True\n", + "do_predict: True\n", + "\n", + "learning_rate: 0.00005\n", + "per_device_train_batch_size: 8\n", + "per_device_eval_batch_size: 8\n", + "gradient_accumulation_steps: 2\n", + "\n", + "num_train_epochs: 10\n", + "warmup_steps: 10\n", + "weight_decay: 0.01\n", + "logging_dir: \"./logs\"\n", + "logging_steps: 10\n", + "\n", + "save_total_limit:\n", + "load_best_model_at_end: true\n", + "metric_for_best_model: \"eval_loss\"\n", + "greater_is_better: false\n", + "\n", + "report_to: \"none\"\n", + "\"\"\"\n", + "FINETUNE_CONFIG_FP = f\"./content/output/cehrbert/cehrbert_finetune_config.yaml\"\n", + "with open(FINETUNE_CONFIG_FP, 'w') as f:\n", + " f.write(cehrbert_finetune_config)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf0b1d0f", + "metadata": {}, + "outputs": [], + "source": [ + "# ## Finetune cehrbert for the downstream task\n", + "!python3.11 -m cehrbert.runners.hf_cehrbert_finetune_runner ./content/output/cehrbert/cehrbert_finetune_config.yaml" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "187aeb7b", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "pd.read_parquet(\"./content/output/cehrbert_finetuned/test_predictions\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9977fc54", + "metadata": {}, + "outputs": [], + "source": [ + "!cat ./content/output/cehrbert_finetuned/test_results.json" + ] + } + ], + "metadata": { + "jupytext": { + "main_language": "python" + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/demo/meds_cehrbert.py b/demo/meds_cehrbert.py deleted file mode 100644 index de1c37f..0000000 --- a/demo/meds_cehrbert.py +++ /dev/null @@ -1,397 +0,0 @@ -# --- -# jupyter: -# jupytext: -# text_representation: -# extension: .py -# format_name: percent -# format_version: '1.3' -# jupytext_version: 1.16.4 -# kernelspec: -# display_name: Python 3 -# name: python3 -# --- - -# %% [Colab-only] Switch Colab to python 3.12 -# !sudo apt-get install python3.12 python3.12-venv -# import sys -# !python3.12 -m venv meds_env -# import os -# os.environ['PATH'] = '/content/meds_env/bin:' + os.environ['PATH'] -# !pip install --upgrade pip - -# # Then in a new code cell: -# import sys -# sys.executable = '/content/meds_env/bin/python' - -# # Confirm python version is 3.12 -# !python --version - -# %% [markdown] -# ## Install dependencies - -# %% -!pip -q install meds_etl==0.3.6 meds_transforms==0.0.7 - -# %% [markdown] -# # Download MIMIC-IV demo - -# %% -# macOS users should install wget (e.g. through brew) -!wget -q -r -N -c --no-host-directories --cut-dirs=1 -np -P ./content/download https://physionet.org/files/mimic-iv-demo/2.2/ - -# %% -# Download pre-meds script, event config (defining how raw data is converted to meds data), and meds-transform config -!mkdir -p ./content/meds-transform/ -!git clone --depth 1 https://github.com/mmcdermott/MEDS_transforms.git ./content/tmp/ -!mv ./content/tmp/MIMIC-IV_Example ./content/MIMIC-IV_Example - -# %% -# Download MIMIC IV metadata -MIMICIV_RAW_DIR = "https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map" -MIMICIV_PRE_MEDS_DIR = "./content/pre_meds/" -!mkdir {MIMICIV_PRE_MEDS_DIR} - -OUTPUT_DIR = "./content/download/mimic-iv-demo/2.2" - -files = [ - 'd_labitems_to_loinc.csv', - 'inputevents_to_rxnorm.csv', - 'lab_itemid_to_loinc.csv', - 'meas_chartevents_main.csv', - 'meas_chartevents_value.csv', - 'numerics-summary.csv', - 'outputevents_to_loinc.csv', - 'proc_datetimeevents.csv', - 'proc_itemid.csv', - 'waveforms-summary.csv' -] - -for file in files: - !wget -O {OUTPUT_DIR}/{file} {MIMICIV_RAW_DIR}/{file} - !wget -O {MIMICIV_PRE_MEDS_DIR}/{file} {MIMICIV_RAW_DIR}/{file} - -# %% -# Convert to MEDS -CURRENT_DIR = !pwd -CURRENT_DIR = CURRENT_DIR[0] - -# %% -# Convert to MEDS -TUTORIAL_DIR = CURRENT_DIR + "/content/MIMIC-IV_Example" -MIMICIV_RAW_DIR = CURRENT_DIR + "/content/download/mimic-iv-demo/2.2" -MIMICIV_PRE_MEDS_DIR = CURRENT_DIR + "/content/pre_meds" -MIMICIV_MEDS_DIR = CURRENT_DIR + "/content/meds" - -EVENT_CONVERSION_CONFIG_FP = CURRENT_DIR + "/content/MIMIC-IV_Example/configs/event_config.yaml" -PIPELINE_CONFIG_PATH = CURRENT_DIR + "/content/MIMIC-IV_Example/configs/pipeline_config.yaml" -!cd {TUTORIAL_DIR} && ./run.sh {MIMICIV_RAW_DIR} {MIMICIV_PRE_MEDS_DIR} {MIMICIV_MEDS_DIR} do_unzip=true - -# %% [markdown] -# # Examine MEDS data - -# %% -import polars as pl - -data = pl.read_parquet('./content/meds/data/**/*.parquet') -data[['subject_id', 'time', 'code', 'numeric_value']] - -# %% [markdown] -# # A simple Polars analysis - -# %% -icd10_events = data.filter(pl.col('code').str.starts_with('DIAGNOSIS//ICD//10//')) -icd10_events.group_by('code').count().sort('count', descending=True) - -# %% -df = pl.read_parquet("./content/meds/metadata/codes.parquet") -df - -# %% [markdown] -# ## Using an example MEDS tool, ACES for labeling - -# %% [markdown] -# ## Install ACES - -# %% -!pip install es-aces - -# %% - -# From ACES documentation -task_config = """ -description: >- - This file specifies the base configuration for the prediction of a hospital los being greater than 3days, - leveraging only the first 48 hours of data after admission, with a 24 hour gap between the input window - and the target window. Patients who die or are discharged in the gap window are excluded. Note that this - task is in-**hospital** los, not in-**ICU** los which is a different task. - -predicates: - hospital_admission: - code: {regex: "HOSPITAL_ADMISSION//.*"} - hospital_discharge: - code: {regex: "HOSPITAL_DISCHARGE//.*"} - death: - code: MEDS_DEATH - discharge_or_death: - expr: or(hospital_discharge, death) - -trigger: hospital_admission - -windows: - input: - start: NULL - end: trigger + 48h - start_inclusive: True - end_inclusive: True - index_timestamp: end - gap: - start: input.end - end: start + 24h - start_inclusive: False - end_inclusive: True - has: - hospital_admission: (None, 0) - discharge_or_death: (None, 0) - target: - start: trigger - end: start + 3d - start_inclusive: False - end_inclusive: True - label: discharge_or_death -""" - -!mkdir ./content/tasks/ -p -TASK_NAME = "in_hospital_3d_los_after_48h" -TASK_CONFIG_FP = f"./content/tasks/{TASK_NAME}.yaml" -with open(TASK_CONFIG_FP, 'w') as f: - f.write(task_config) - - -# %% -!pip install es-aces - -# %% -!echo $TASK_NAME -!echo $TASK_CONFIG_FP - -# %% -!aces-cli --multirun data=sharded data.standard=meds data.root="$MIMICIV_MEDS_DIR/data" "data.shard=$(expand_shards ./content/meds/data/)" cohort_dir=" ./content/tasks" cohort_name="$TASK_NAME" config_path="$TASK_CONFIG_FP" - -# %% -# TODO: reimporting polars due to dependencies? -import polars as pl - -# Execute query and get results -df = pl.read_parquet(f"./content/tasks/{TASK_NAME}/**/*.parquet") - -print("train prevalence: " + str(round(pl.read_parquet(f"./content/tasks/{TASK_NAME}/train/*.parquet")['boolean_value'].mean(), 3))) -print("tuning prevalence: " + str(round(pl.read_parquet(f"./content/tasks/{TASK_NAME}/tuning/*.parquet")['boolean_value'].mean(), 3))) -print("held_out prevalence: " + str(round(pl.read_parquet(f"./content/tasks/{TASK_NAME}/held_out/*.parquet")['boolean_value'].mean(), 3))) - - -df.sort('boolean_value') - -# %% [markdown] -# ## Switch Colab to python 3.11 for cehrbert -# %% -# %%capture -# !sudo apt-get install python3.11 python3.11-venv -# import sys -# !python3.11 -m venv cehrbert -# import os -# os.environ['PATH'] = './content/cehrbert/bin:' + os.environ['PATH'] -# !pip install --upgrade pip - -# %% -# import sys -# sys.executable = './content/cehrbert/bin/python' - -# %% [markdown] -# ## Install cehrbert and its dependencies - -# %% -!pip install meds_reader==0.1.9 -!pip install setuptools -!pip install cehrbert==1.3.1 - -# %% -MIMICIV_MEDS_DIR = "./content/meds/" -MIMICIV_MEDS_READER_DIR = "./content/meds_reader/" -TASK_DIR="./content/tasks/" -TASK_NAME="in_hospital_3d_los_after_48h" -OUTPUT_PRETRAIN_MODEL_DIR="./content/output/cehrbert/" -# TODO this variable has an identical name? -OUTPUT_PRETRAIN_MODEL_DIR="./content/output/cehrbert_finetuned/" - -# %% [markdown] -# Run meds_reader on the MEDS data - -# %% -!meds_reader_convert $MIMICIV_MEDS_DIR $MIMICIV_MEDS_READER_DIR - -# %% -!mkdir -p ./content/output/cehrbert/ -!mkdir -p ./content/output/cehrbert_dataset_prepared/ -!mkdir -p ./content/output/cehrbert_finetuned/ - -# %% -# !mkdir ./content/github_repo;cd ./content/github_repo;git clone https://github.com/cumc-dbmi/cehrbert.git;cd cehrbert;git checkout fix/meds_evaluation;pip install .; - -# %% [markdown] -# Create the cehrbert pretraining configuration yaml file - -# %% -cehrbert_pretrain_config = """ -#Model arguments -model_name_or_path: "./content/output/cehrbert/" -tokenizer_name_or_path: "./content/output/cehrbert/" -num_hidden_layers: 6 -max_position_embeddings: 1024 -hidden_size: 768 -vocab_size: 100000 -min_frequency: 50 -include_value_prediction: false # additional CEHR-BERT learning objective - -#Data arguments -data_folder: "./content/meds_reader/" -dataset_prepared_path: "./content/output/cehrbert_dataset_prepared/" - -# Below is a list of Med-to-CehrBert related arguments -preprocessing_num_workers: 2 -preprocessing_batch_size: 128 -# if is_data_in_med is false, it assumes the data is in the cehrbert format -is_data_in_meds: true -att_function_type: "cehr_bert" -inpatient_att_function_type: "mix" -include_auxiliary_token: true -include_demographic_prompt: false -# if the data is in the meds format, the validation split will be omitted -# as the meds already provide train/tuning/held_out splits -validation_split_percentage: 0.05 - -# Huggingface Arguments -dataloader_num_workers: 2 -dataloader_prefetch_factor: 2 - -overwrite_output_dir: false -resume_from_checkpoint: # automatically infer the latest checkpoint from the output folder -seed: 42 - -output_dir: "./content/output/cehrbert/" -evaluation_strategy: "epoch" -save_strategy: "epoch" -eval_accumulation_steps: 10 - -learning_rate: 0.00005 -per_device_train_batch_size: 8 -per_device_eval_batch_size: 8 -gradient_accumulation_steps: 2 - -num_train_epochs: 50 # for large datasets, 5-10 epochs should suffice -warmup_steps: 10 -weight_decay: 0.01 -logging_dir: "./logs" -logging_steps: 10 - -save_total_limit: -load_best_model_at_end: true -metric_for_best_model: "eval_loss" -greater_is_better: false - -report_to: "none" -""" -PRETRAIN_CONFIG_FP = f"./content/output/cehrbert/cehrbert_pretrain_config.yaml" -with open(PRETRAIN_CONFIG_FP, 'w') as f: - f.write(cehrbert_pretrain_config) - -# %% [markdown] -# ## Pretrain cehrbert using MLM -!python3.11 -m cehrbert.runners.hf_cehrbert_pretrain_runner ./content/output/cehrbert/cehrbert_pretrain_config.yaml - -# %% [markdown] -# ## Create the cehrbert finetuning configuration yaml file -cehrbert_finetune_config = f""" -#Model arguments -model_name_or_path: "./content/output/cehrbert/" -tokenizer_name_or_path: "./content/output/cehrbert/" -num_hidden_layers: 6 -max_position_embeddings: 1024 -hidden_size: 768 -vocab_size: 100000 -min_frequency: 50 -include_value_prediction: false # additional CEHR-BERT learning objective - -#Data arguments -cohort_folder: "./content/tasks/{TASK_NAME}/" -data_folder: "./content/meds_reader/" -dataset_prepared_path: "./content/output/cehrbert_dataset_prepared/" - -#LORA -use_lora: True -lora_rank: 64 -lora_alpha: 16 -target_modules: [ "query", "value" ] -lora_dropout: 0.1 - -# Below is a list of Med-to-CehrBert related arguments -preprocessing_num_workers: 2 -preprocessing_batch_size: 128 -# if is_data_in_med is false, it assumes the data is in the cehrbert format -is_data_in_meds: true -att_function_type: "cehr_bert" -inpatient_att_function_type: "mix" -include_auxiliary_token: true -include_demographic_prompt: false -# if the data is in the meds format, the validation split will be omitted -# as the meds already provide train/tuning/held_out splits -validation_split_percentage: 0.05 - -# Huggingface Arguments -dataloader_num_workers: 2 -dataloader_prefetch_factor: 2 - -overwrite_output_dir: false -resume_from_checkpoint: # automatically infer the latest checkpoint from the output folder -seed: 42 - -output_dir: "./content/output/cehrbert_finetuned" -evaluation_strategy: "epoch" -save_strategy: "epoch" -eval_accumulation_steps: 10 - -do_train: True -do_predict: True - -learning_rate: 0.00005 -per_device_train_batch_size: 8 -per_device_eval_batch_size: 8 -gradient_accumulation_steps: 2 - -num_train_epochs: 10 -warmup_steps: 10 -weight_decay: 0.01 -logging_dir: "./logs" -logging_steps: 10 - -save_total_limit: -load_best_model_at_end: true -metric_for_best_model: "eval_loss" -greater_is_better: false - -report_to: "none" -""" -FINETUNE_CONFIG_FP = f"./content/output/cehrbert/cehrbert_finetune_config.yaml" -with open(FINETUNE_CONFIG_FP, 'w') as f: - f.write(cehrbert_finetune_config) - -# %% -# ## Finetune cehrbert for the downstream task -!python3.11 -m cehrbert.runners.hf_cehrbert_finetune_runner ./content/output/cehrbert/cehrbert_finetune_config.yaml - -# %% -import pandas as pd - -pd.read_parquet("./content/output/cehrbert_finetuned/test_predictions") - -# %% -!cat ./content/output/cehrbert_finetuned/test_results.json diff --git a/demo/meds_tab.ipynb b/demo/meds_tab.ipynb index 20de31f..4ed2e0b 100644 --- a/demo/meds_tab.ipynb +++ b/demo/meds_tab.ipynb @@ -1,73 +1,8 @@ { "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "PZmYRVX2W8m7" - }, - "source": [ - "# Using an example MEDS tool, ACES for labeling" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "es-39eHOyp5a", - "outputId": "9d5e2468-fdd5-4c4b-8615-fe24f5a9310f" - }, - "outputs": [], - "source": [ - "!pip install es-aces" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "bXLiJGEry-Gb", - "outputId": "7d954ab4-cf5c-4d02-a99c-669b5822bf44" - }, - "outputs": [], - "source": [ - "!aces-cli --multirun data=sharded data.standard=meds data.root=\"$MIMICIV_MEDS_DIR/data\" \"data.shard=$(expand_shards /content/meds/data/)\" cohort_dir=\" /content/tasks\" cohort_name=\"$TASK_NAME\" config_path=\"$TASK_CONFIG_FP\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 497 - }, - "id": "7Vvac7DIWyRT", - "outputId": "40493f0e-48ba-4f5e-9d9a-401e26f1a9b7" - }, - "outputs": [], - "source": [ - "import polars as pl\n", - "\n", - "# execute query and get results\n", - "df = pl.read_parquet(f\"/content/tasks/{TASK_NAME}/**/*.parquet\")\n", - "\n", - "print(\"train prevalence: \" + str(round(pl.read_parquet(f\"/content/tasks/{TASK_NAME}/train/*.parquet\")['boolean_value'].mean(), 3)))\n", - "print(\"tuning prevalence: \" + str(round(pl.read_parquet(f\"/content/tasks/{TASK_NAME}/tuning/*.parquet\")['boolean_value'].mean(), 3)))\n", - "print(\"held_out prevalence: \" + str(round(pl.read_parquet(f\"/content/tasks/{TASK_NAME}/held_out/*.parquet\")['boolean_value'].mean(), 3)))\n", - "\n", - "\n", - "df.sort('boolean_value')" - ] - }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -76,32 +11,97 @@ "id": "XWB7O1UGhRIo", "outputId": "e3416d5e-7427-4cf4-c0ab-20053a9d3430" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: meds-tab in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (0.0.7)\n", + "Requirement already satisfied: polars<=1.17.1,>=1.6.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-tab) (1.17.1)\n", + "Requirement already satisfied: pyarrow in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-tab) (17.0.0)\n", + "Requirement already satisfied: loguru in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-tab) (0.7.3)\n", + "Requirement already satisfied: hydra-core==1.3.2 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-tab) (1.3.2)\n", + "Requirement already satisfied: numpy in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-tab) (2.2.0)\n", + "Requirement already satisfied: scipy<1.14.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-tab) (1.13.1)\n", + "Requirement already satisfied: pandas in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-tab) (2.2.3)\n", + "Requirement already satisfied: tqdm in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-tab) (4.67.1)\n", + "Requirement already satisfied: xgboost in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-tab) (2.1.3)\n", + "Requirement already satisfied: scikit-learn in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-tab) (1.6.0)\n", + "Requirement already satisfied: hydra-optuna-sweeper in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-tab) (1.2.0)\n", + "Requirement already satisfied: hydra-joblib-launcher in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-tab) (1.2.0)\n", + "Requirement already satisfied: ml-mixins in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-tab) (0.1.0)\n", + "Requirement already satisfied: meds>=0.3.3 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-tab) (0.3.3)\n", + "Requirement already satisfied: meds-transforms>=0.0.7 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-tab) (0.0.9)\n", + "Requirement already satisfied: omegaconf<2.4,>=2.2 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from hydra-core==1.3.2->meds-tab) (2.3.0)\n", + "Requirement already satisfied: antlr4-python3-runtime==4.9.* in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from hydra-core==1.3.2->meds-tab) (4.9.3)\n", + "Requirement already satisfied: packaging in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from hydra-core==1.3.2->meds-tab) (24.2)\n", + "Requirement already satisfied: jsonschema>=4.0.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds>=0.3.3->meds-tab) (4.23.0)\n", + "Requirement already satisfied: typing-extensions>=4.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds>=0.3.3->meds-tab) (4.12.2)\n", + "Requirement already satisfied: nested_ragged_tensors>=0.0.8 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-transforms>=0.0.7->meds-tab) (0.1)\n", + "Requirement already satisfied: joblib>=0.14.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from hydra-joblib-launcher->meds-tab) (1.4.2)\n", + "Requirement already satisfied: optuna<3.0.0,>=2.10.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from hydra-optuna-sweeper->meds-tab) (2.10.1)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from pandas->meds-tab) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from pandas->meds-tab) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from pandas->meds-tab) (2024.2)\n", + "Requirement already satisfied: threadpoolctl>=3.1.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from scikit-learn->meds-tab) (3.5.0)\n", + "Requirement already satisfied: attrs>=22.2.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds>=0.3.3->meds-tab) (24.2.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds>=0.3.3->meds-tab) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds>=0.3.3->meds-tab) (0.35.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds>=0.3.3->meds-tab) (0.22.3)\n", + "Requirement already satisfied: safetensors in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from nested_ragged_tensors>=0.0.8->meds-transforms>=0.0.7->meds-tab) (0.4.5)\n", + "Requirement already satisfied: PyYAML>=5.1.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from omegaconf<2.4,>=2.2->hydra-core==1.3.2->meds-tab) (6.0.2)\n", + "Requirement already satisfied: alembic in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (1.14.0)\n", + "Requirement already satisfied: cliff in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (4.8.0)\n", + "Requirement already satisfied: cmaes>=0.8.2 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (0.11.1)\n", + "Requirement already satisfied: colorlog in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (6.9.0)\n", + "Requirement already satisfied: sqlalchemy>=1.1.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (2.0.36)\n", + "Requirement already satisfied: six>=1.5 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas->meds-tab) (1.17.0)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from sqlalchemy>=1.1.0->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (3.1.1)\n", + "Requirement already satisfied: Mako in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from alembic->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (1.3.8)\n", + "Requirement already satisfied: PrettyTable>=0.7.2 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from cliff->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (3.12.0)\n", + "Requirement already satisfied: autopage>=0.4.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from cliff->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (0.5.2)\n", + "Requirement already satisfied: cmd2>=1.0.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from cliff->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (2.5.7)\n", + "Requirement already satisfied: stevedore>=2.0.1 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from cliff->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (5.4.0)\n", + "Requirement already satisfied: gnureadline in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from cmd2>=1.0.0->cliff->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (8.2.13)\n", + "Requirement already satisfied: pyperclip in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from cmd2>=1.0.0->cliff->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (1.9.0)\n", + "Requirement already satisfied: wcwidth in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from cmd2>=1.0.0->cliff->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (0.2.13)\n", + "Requirement already satisfied: pbr>=2.0.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from stevedore>=2.0.1->cliff->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (6.1.0)\n", + "Requirement already satisfied: MarkupSafe>=0.9.2 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from Mako->alembic->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (3.0.2)\n" + ] + } + ], "source": [ - "#@title Install meds-tab\n", - "\n", - "!pip uninstall es-aces -y\n", "!pip install meds-tab" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { "id": "SeGawIqli0nn" }, "outputs": [], "source": [ - "MIMICIV_MEDS_DIR = \"/content/meds/\"\n", - "OUTPUT_TABULARIZATION_DIR=\"/content/tabularized/\"\n", - "TASK_DIR=\"/content/tasks/\"\n", - "TASK_NAME=\"in_hospital_3d_los_after_48h\"\n", - "OUTPUT_MODEL_DIR=\"/content/output/meds_tab/\"" + "#@title Download E-ICU demo\n", + "import os\n", + "from pathlib import Path\n", + "notebook_dir = os.getcwd()\n", + "\n", + "ROOT_DIR=f\"{notebook_dir}/work_dir/mimiciv_demo/\"\n", + "# ROOT_DIR=f\"{notebook_dir}/work_dir/eicu_demo/\"\n", + "Path(ROOT_DIR).mkdir(parents=True, exist_ok=True)\n", + "\n", + "MEDS_DIR = ROOT_DIR + \"/meds/\"\n", + "OUTPUT_TABULARIZATION_DIR= ROOT_DIR + \"/tabularized/\"\n", + "TASK_DIR = MEDS_DIR + \"/task_labels\"\n", + "TASK_NAME=\"mortality/in_icu/first_24h\"\n", + "# TASK_NAME=\"los_in_hospital_first_48h\"\n", + "OUTPUT_MODEL_DIR= ROOT_DIR + \"/output/meds_tab/\"" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -109,14 +109,84 @@ "id": "Tud0_0cgjljP", "outputId": "fb3417e0-3ba4-4f9a-ab95-ce3ba8731ca1" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[32m2024-12-14 19:28:24.645\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m446\u001b[0m - \u001b[1mRunning describe_codes with the following configuration:\n", + "input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "do_overwrite: false\n", + "seed: 1\n", + "tqdm: false\n", + "worker: 0\n", + "loguru_init: false\n", + "log_dir: ${output_dir}/.logs/\n", + "cache_dir: ${output_dir}/.cache\n", + "output_filepath: ${output_dir}/metadata/codes.parquet\n", + "name: describe_codes\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 19:28:24.646\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m467\u001b[0m - \u001b[34m\u001b[1mStage config:\n", + "input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "do_overwrite: false\n", + "seed: 1\n", + "tqdm: false\n", + "worker: 0\n", + "loguru_init: false\n", + "log_dir: ${output_dir}/.logs/\n", + "cache_dir: ${output_dir}/.cache\n", + "output_filepath: ${output_dir}/metadata/codes.parquet\n", + "name: describe_codes\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\u001b[0m\n", + "\u001b[32m2024-12-14 19:28:24.649\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.describe_codes\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m41\u001b[0m - \u001b[1mIterating through shards and caching feature frequencies.\u001b[0m\n", + "\u001b[32m2024-12-14 19:28:24.652\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:28:24.651936. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:28:24.653\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:28:24.653\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_tabular_automl/describe_codes.py:34: DataOrientationWarning: Row orientation inferred during DataFrame construction. Explicitly specify the orientation by passing `orient=\"row\"` to silence this warning.\n", + " return pl.DataFrame([[col, freq] for col, freq in freq_dict.items()], schema=[\"code\", \"count\"])\n", + "\u001b[32m2024-12-14 19:28:24.706\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.052346\u001b[0m\n", + "\u001b[32m2024-12-14 19:28:24.706\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/.cache/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:28:24.938\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.286837\u001b[0m\n", + "\u001b[32m2024-12-14 19:28:24.938\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/.cache/train/.0_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:28:24.940\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:28:24.939927. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:28:24.940\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:28:24.940\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:28:24.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.018227\u001b[0m\n", + "\u001b[32m2024-12-14 19:28:24.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/.cache/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:28:24.960\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.020954\u001b[0m\n", + "\u001b[32m2024-12-14 19:28:24.960\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/.cache/tuning/.0_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:28:24.961\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:28:24.961781. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:28:24.962\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:28:24.962\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:28:24.974\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.011915\u001b[0m\n", + "\u001b[32m2024-12-14 19:28:24.974\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/.cache/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:28:24.975\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.013785\u001b[0m\n", + "\u001b[32m2024-12-14 19:28:24.975\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/.cache/held_out/.0_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:28:24.975\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.describe_codes\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m66\u001b[0m - \u001b[1mSumming frequency computations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:28:24.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:28:24.976571. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:28:24.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/.cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:28:24.977\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_tabular_automl/describe_codes.py:63: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", + " if not df.columns == [\"code\", \"count\"]:\n", + "\u001b[32m2024-12-14 19:28:24.992\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.015360\u001b[0m\n", + "\u001b[32m2024-12-14 19:28:24.992\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:28:24.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.018957\u001b[0m\n", + "\u001b[32m2024-12-14 19:28:24.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/.codes_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:28:24.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.describe_codes\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m94\u001b[0m - \u001b[1mStored feature columns and frequencies.\u001b[0m\n" + ] + } + ], "source": [ - "!meds-tab-describe input_dir={MIMICIV_MEDS_DIR}/data output_dir={OUTPUT_TABULARIZATION_DIR}" + "!meds-tab-describe input_dir={MEDS_DIR}/data output_dir={OUTPUT_TABULARIZATION_DIR}" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": { "id": "RVLBdOn1mnV5" }, @@ -129,18 +199,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": { "id": "KhCPqBmduNYK" }, "outputs": [], "source": [ - "!rm -rf /content/tabularized/tabularize/" + "# !rm -rf /content/tabularized/tabularize/" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -148,14 +218,122 @@ "id": "p_D07KzxjVUl", "outputId": "8836b076-cf64-4f29-da81-ac5125ab7608" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[32m2024-12-14 19:29:06.167\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m446\u001b[0m - \u001b[1mRunning tabularize_static with the following configuration:\n", + "input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "do_overwrite: false\n", + "seed: 1\n", + "tqdm: false\n", + "worker: 0\n", + "loguru_init: false\n", + "log_dir: ${output_dir}/.logs/\n", + "cache_dir: ${output_dir}/.cache\n", + "tabularization:\n", + " filtered_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + " allowed_codes: null\n", + " min_code_inclusion_count: 10\n", + " min_code_inclusion_frequency: null\n", + " min_correlation: null\n", + " max_by_correlation: null\n", + " max_included_codes: null\n", + " window_sizes:\n", + " - 1d\n", + " - 30d\n", + " - 365d\n", + " aggs:\n", + " - static/present\n", + " - code/count\n", + " - value/count\n", + " - value/sum\n", + " - value/sum_sqd\n", + " - value/min\n", + " - value/max\n", + " _resolved_codes: ${filter_to_codes:${tabularization.filtered_code_metadata_fp},${tabularization.allowed_codes},${tabularization.min_code_inclusion_count},${tabularization.min_code_inclusion_frequency},${tabularization.max_included_codes}}\n", + "input_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + "output_tabularized_dir: ${output_dir}/tabularize\n", + "name: tabularization\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:06.169\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m467\u001b[0m - \u001b[34m\u001b[1mStage config:\n", + "input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "do_overwrite: false\n", + "seed: 1\n", + "tqdm: false\n", + "worker: 0\n", + "loguru_init: false\n", + "log_dir: ${output_dir}/.logs/\n", + "cache_dir: ${output_dir}/.cache\n", + "tabularization:\n", + " filtered_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + " allowed_codes: null\n", + " min_code_inclusion_count: 10\n", + " min_code_inclusion_frequency: null\n", + " min_correlation: null\n", + " max_by_correlation: null\n", + " max_included_codes: null\n", + " window_sizes:\n", + " - 1d\n", + " - 30d\n", + " - 365d\n", + " aggs:\n", + " - static/present\n", + " - code/count\n", + " - value/count\n", + " - value/sum\n", + " - value/sum_sqd\n", + " - value/min\n", + " - value/max\n", + " _resolved_codes: ${filter_to_codes:${tabularization.filtered_code_metadata_fp},${tabularization.allowed_codes},${tabularization.min_code_inclusion_count},${tabularization.min_code_inclusion_frequency},${tabularization.max_included_codes}}\n", + "input_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + "output_tabularized_dir: ${output_dir}/tabularize\n", + "name: tabularization\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_code_metadata_fp: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\n", + " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", + " - tabularization.filtered_code_metadata_fp: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:06.173\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet exists; reading directly and returning.\u001b[0m\n", + "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_tabular_automl/describe_codes.py:63: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", + " if not df.columns == [\"code\", \"count\"]:\n", + "\u001b[32m2024-12-14 19:29:06.191\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:29:06.190860. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:06.192\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:06.367\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:06.388\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_static_features\u001b[0m:\u001b[36mget_sparse_static_rep\u001b[0m:\u001b[36m69\u001b[0m - \u001b[1mMake static data sparse and merge it with the time-series data\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:06.460\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.092494\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:06.460\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/none/static/present.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:06.462\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.271271\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:06.462\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/none/static/.present_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:06.463\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:29:06.463428. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:06.463\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:06.601\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:06.614\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_static_features\u001b[0m:\u001b[36mget_sparse_static_rep\u001b[0m:\u001b[36m69\u001b[0m - \u001b[1mMake static data sparse and merge it with the time-series data\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:06.698\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.096838\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:06.698\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/none/static/present.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:06.699\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.236471\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:06.699\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/none/static/.present_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:06.701\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:29:06.700988. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:06.701\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:06.835\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:06.859\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_static_features\u001b[0m:\u001b[36mget_sparse_static_rep\u001b[0m:\u001b[36m69\u001b[0m - \u001b[1mMake static data sparse and merge it with the time-series data\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:07.036\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.199972\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:07.036\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/none/static/present.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:07.037\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.336466\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:07.037\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/none/static/.present_cache\u001b[0m\n" + ] + } + ], "source": [ - "!meds-tab-tabularize-static \"input_dir=$MIMICIV_MEDS_DIR/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS" + "!meds-tab-tabularize-static \"input_dir=$MEDS_DIR/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" do_overwrite=False \"$WINDOW_SIZES\" \"$AGGREGATIONS\"" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -163,20 +341,1506 @@ "id": "u-e-mV2Hk-Qf", "outputId": "c292be12-ff74-44e4-f039-758e10ccc909" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2024-12-14 19:29:41,824][HYDRA] Joblib.Parallel(n_jobs=-1,backend=loky,prefer=processes,require=None,verbose=0,timeout=None,pre_dispatch=2*n_jobs,batch_size=auto,temp_folder=None,max_nbytes=None,mmap_mode=r) is launching 2 jobs\n", + "[2024-12-14 19:29:41,824][HYDRA] Launching jobs, sweep output dir : /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/.logs\n", + "[2024-12-14 19:29:41,824][HYDRA] \t#0 : worker=0 input_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data output_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/ do_overwrite=False tabularization.window_sizes=[1d,30d,365d] tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max]\n", + "[2024-12-14 19:29:41,824][HYDRA] \t#1 : worker=1 input_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data output_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/ do_overwrite=False tabularization.window_sizes=[1d,30d,365d] tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max]\n", + "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/joblib/externals/loky/backend/fork_exec.py:38: RuntimeWarning: Using fork() can cause Polars to deadlock in the child process.\n", + "In addition, using fork() with Python in general is a recipe for mysterious\n", + "deadlocks and crashes.\n", + "\n", + "The most likely reason you are seeing this error is because you are using the\n", + "multiprocessing module on Linux, which uses fork() by default. This will be\n", + "fixed in Python 3.14. Until then, you want to use the \"spawn\" context instead.\n", + "\n", + "See https://docs.pola.rs/user-guide/misc/multiprocessing/ for details.\n", + "\n", + "If you really know what your doing, you can silence this warning with the warning module\n", + "or by setting POLARS_ALLOW_FORKING_THREAD=1.\n", + "\n", + " pid = os.fork()\n", + "\u001b[32m2024-12-14 19:29:44.342\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mcurrent_script_name\u001b[0m:\u001b[36m433\u001b[0m - \u001b[33m\u001b[1mCan't find main function in __main__ module. Using sys.argv[0] as a fallback.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:44.343\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m446\u001b[0m - \u001b[1mRunning meds-tab-tabularize-time-series with the following configuration:\n", + "input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "do_overwrite: false\n", + "seed: 1\n", + "tqdm: false\n", + "worker: 0\n", + "loguru_init: false\n", + "log_dir: ${output_dir}/.logs/\n", + "cache_dir: ${output_dir}/.cache\n", + "tabularization:\n", + " filtered_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + " allowed_codes: null\n", + " min_code_inclusion_count: 10\n", + " min_code_inclusion_frequency: null\n", + " min_correlation: null\n", + " max_by_correlation: null\n", + " max_included_codes: null\n", + " window_sizes:\n", + " - 1d\n", + " - 30d\n", + " - 365d\n", + " aggs:\n", + " - static/present\n", + " - code/count\n", + " - value/count\n", + " - value/sum\n", + " - value/sum_sqd\n", + " - value/min\n", + " - value/max\n", + " _resolved_codes: ${filter_to_codes:${tabularization.filtered_code_metadata_fp},${tabularization.allowed_codes},${tabularization.min_code_inclusion_count},${tabularization.min_code_inclusion_frequency},${tabularization.max_included_codes}}\n", + "input_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + "output_tabularized_dir: ${output_dir}/tabularize\n", + "name: tabularization\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:44.343\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mcurrent_script_name\u001b[0m:\u001b[36m433\u001b[0m - \u001b[33m\u001b[1mCan't find main function in __main__ module. Using sys.argv[0] as a fallback.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:44.344\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m446\u001b[0m - \u001b[1mRunning meds-tab-tabularize-time-series with the following configuration:\n", + "input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "do_overwrite: false\n", + "seed: 1\n", + "tqdm: false\n", + "worker: 1\n", + "loguru_init: false\n", + "log_dir: ${output_dir}/.logs/\n", + "cache_dir: ${output_dir}/.cache\n", + "tabularization:\n", + " filtered_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + " allowed_codes: null\n", + " min_code_inclusion_count: 10\n", + " min_code_inclusion_frequency: null\n", + " min_correlation: null\n", + " max_by_correlation: null\n", + " max_included_codes: null\n", + " window_sizes:\n", + " - 1d\n", + " - 30d\n", + " - 365d\n", + " aggs:\n", + " - static/present\n", + " - code/count\n", + " - value/count\n", + " - value/sum\n", + " - value/sum_sqd\n", + " - value/min\n", + " - value/max\n", + " _resolved_codes: ${filter_to_codes:${tabularization.filtered_code_metadata_fp},${tabularization.allowed_codes},${tabularization.min_code_inclusion_count},${tabularization.min_code_inclusion_frequency},${tabularization.max_included_codes}}\n", + "input_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + "output_tabularized_dir: ${output_dir}/tabularize\n", + "name: tabularization\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:44.345\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m467\u001b[0m - \u001b[34m\u001b[1mStage config:\n", + "input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "do_overwrite: false\n", + "seed: 1\n", + "tqdm: false\n", + "worker: 0\n", + "loguru_init: false\n", + "log_dir: ${output_dir}/.logs/\n", + "cache_dir: ${output_dir}/.cache\n", + "tabularization:\n", + " filtered_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + " allowed_codes: null\n", + " min_code_inclusion_count: 10\n", + " min_code_inclusion_frequency: null\n", + " min_correlation: null\n", + " max_by_correlation: null\n", + " max_included_codes: null\n", + " window_sizes:\n", + " - 1d\n", + " - 30d\n", + " - 365d\n", + " aggs:\n", + " - static/present\n", + " - code/count\n", + " - value/count\n", + " - value/sum\n", + " - value/sum_sqd\n", + " - value/min\n", + " - value/max\n", + " _resolved_codes: ${filter_to_codes:${tabularization.filtered_code_metadata_fp},${tabularization.allowed_codes},${tabularization.min_code_inclusion_count},${tabularization.min_code_inclusion_frequency},${tabularization.max_included_codes}}\n", + "input_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + "output_tabularized_dir: ${output_dir}/tabularize\n", + "name: tabularization\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_code_metadata_fp: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\n", + " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", + " - tabularization.filtered_code_metadata_fp: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:44.346\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m467\u001b[0m - \u001b[34m\u001b[1mStage config:\n", + "input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "do_overwrite: false\n", + "seed: 1\n", + "tqdm: false\n", + "worker: 1\n", + "loguru_init: false\n", + "log_dir: ${output_dir}/.logs/\n", + "cache_dir: ${output_dir}/.cache\n", + "tabularization:\n", + " filtered_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + " allowed_codes: null\n", + " min_code_inclusion_count: 10\n", + " min_code_inclusion_frequency: null\n", + " min_correlation: null\n", + " max_by_correlation: null\n", + " max_included_codes: null\n", + " window_sizes:\n", + " - 1d\n", + " - 30d\n", + " - 365d\n", + " aggs:\n", + " - static/present\n", + " - code/count\n", + " - value/count\n", + " - value/sum\n", + " - value/sum_sqd\n", + " - value/min\n", + " - value/max\n", + " _resolved_codes: ${filter_to_codes:${tabularization.filtered_code_metadata_fp},${tabularization.allowed_codes},${tabularization.min_code_inclusion_count},${tabularization.min_code_inclusion_frequency},${tabularization.max_included_codes}}\n", + "input_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + "output_tabularized_dir: ${output_dir}/tabularize\n", + "name: tabularization\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_code_metadata_fp: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\n", + " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", + " - tabularization.filtered_code_metadata_fp: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\u001b[0m\n", + "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_tabular_automl/describe_codes.py:63: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", + " if not df.columns == [\"code\", \"count\"]:\n", + "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_tabular_automl/describe_codes.py:63: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", + " if not df.columns == [\"code\", \"count\"]:\n", + "\u001b[32m2024-12-14 19:29:44.368\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:29:44.368445. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:44.368\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:29:44.368478. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:44.369\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:44.369\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:44.511\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:44.513\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:44.516\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:44.517\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:44.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum_sqd for window_size 365d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:44.680\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:44.770\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum_sqd for window_size 365d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:44.802\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:46.193\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:46.213\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:47.803\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:47.803\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:03.290497\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:47.803\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:47.820\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:03.451847\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:47.820\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:47.821\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:29:47.821471. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:47.821\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:47.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:47.965\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:47.970\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:47.979\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:48.080\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum for window_size 365d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:48.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:49.355\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:49.364\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:50.657\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:50.657\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:02.691439\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:50.657\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:50.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:02.851850\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:50.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:50.674\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:29:50.674418. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:50.674\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:50.813\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:50.818\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:51.035\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/min for window_size 30d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:51.063\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:51.319\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:51.319\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:06.808234\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:51.319\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:51.335\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:06.967497\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:51.336\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:51.337\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:29:51.337319. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:51.337\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:51.475\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:51.480\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:51.539\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/count for window_size 365d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:51.549\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:51.892\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:51.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:52.264\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:52.264\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.789179\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:52.264\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:52.277\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.940558\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:52.277\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:52.279\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:29:52.279073. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:52.279\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:52.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:52.432\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:52.538\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum for window_size 30d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:52.557\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:53.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:53.781\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:55.038\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:55.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:55.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:02.619791\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:55.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:55.049\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:55.065\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:02.786003\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:55.065\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:55.066\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:29:55.066239. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:55.066\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:55.205\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:55.210\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:55.266\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/count for window_size 30d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:55.274\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:55.618\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:55.621\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:55.998\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:55.998\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.793007\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:55.998\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:56.014\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.947830\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:56.014\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:56.015\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:29:56.015070. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:56.015\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:56.153\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:56.158\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:56.221\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/min for window_size 365d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:56.231\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:57.244\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:57.246\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:58.298\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:58.298\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:02.144824\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:58.298\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:58.313\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:02.298852\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:58.314\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:58.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:29:58.315084. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:58.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:58.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:58.459\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:58.653\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum_sqd for window_size 1d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:58.680\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:59.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:59.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:08.286706\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:59.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:59.113\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:08.438738\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:59.113\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:59.114\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:29:59.114326. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:59.114\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:59.253\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:59.258\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:59.469\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/count for window_size 365d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:29:59.505\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:00.862\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:00.877\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:01.809\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:01.815\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:02.297\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:02.297\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:03.043633\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:02.297\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:02.310\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:03.196022\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:02.310\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:02.311\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:02.311480. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:02.311\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:02.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:02.456\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:02.678\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/max for window_size 30d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:02.713\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:04.972\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:04.972\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:06.518336\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:04.972\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:04.987\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:06.672445\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:04.987\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:04.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:04.988614. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:04.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:05.126\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:05.130\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:05.323\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/count for window_size 1d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:05.350\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:06.714\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:06.724\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:06.738\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:06.745\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:08.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:08.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:02.973982\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:08.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:08.114\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:03.125978\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:08.114\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:08.115\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:08.115686. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:08.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:08.256\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:08.261\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:08.380\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/count for window_size 365d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:08.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:09.049\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:09.054\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:09.740\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:09.740\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:01.484012\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:09.740\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:09.754\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:01.639095\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:09.754\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:09.756\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:09.755883. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:09.756\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:09.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:09.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:09.973\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation code/count for window_size 1d, with 7033 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:09.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:10.519\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:10.521\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:10.760\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:10.761\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:08.309484\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:10.761\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:10.775\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:08.463632\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:10.775\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:10.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:10.776228. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:10.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:10.916\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:10.921\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:10.982\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum_sqd for window_size 365d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:10.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:11.193\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:11.194\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:01.299031\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:11.194\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:11.294\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:01.538430\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:11.294\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:11.295\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:11.295472. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:11.295\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:11.433\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:11.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:11.643\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum_sqd for window_size 30d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:11.679\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:11.797\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:11.799\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:12.651\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:12.651\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:01.734256\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:12.651\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:12.666\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:01.889755\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:12.666\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:12.667\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:12.667033. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:12.667\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:12.808\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:12.813\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:12.918\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/min for window_size 365d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:12.936\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:14.839\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:14.846\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:14.851\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:14.856\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:16.826\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:16.826\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:04.017321\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:16.826\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:16.842\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:04.175854\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:16.843\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:16.844\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:16.844065. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:16.844\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:16.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:16.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:17.052\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum for window_size 1d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:17.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:17.732\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:17.734\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:18.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:18.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:06.703274\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:18.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:18.150\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:06.855055\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:18.150\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:18.151\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:18.151696. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:18.152\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:18.290\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:18.294\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:18.408\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:18.408\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:01.423637\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:18.408\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:18.425\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:01.581304\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:18.425\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:18.426\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:18.426646. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:18.427\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:18.552\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation code/count for window_size 365d, with 7033 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:18.566\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:18.571\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:18.589\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:18.627\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum_sqd for window_size 30d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:18.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:19.444\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:19.446\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:20.298\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:20.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:01.732474\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:20.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:20.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:01.888907\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:20.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:20.316\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:20.316486. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:20.316\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:20.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:20.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:20.681\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/count for window_size 30d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:20.716\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:20.717\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:20.729\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:22.071\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:22.077\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:23.486\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:23.486\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:03.002779\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:23.486\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:23.500\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:03.183946\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:23.500\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:23.501\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:23.501441. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:23.501\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:23.641\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:23.646\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:23.712\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/min for window_size 1d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:23.720\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:24.731\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:24.733\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:25.536\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:25.536\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:07.246122\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:25.538\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:25.764\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:25.764\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:02.122564\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:25.764\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:25.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:02.280840\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:25.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:25.783\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:25.783527. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:25.784\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:25.924\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:25.929\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:26.046\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/count for window_size 30d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:26.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:26.505\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:08.353698\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:26.505\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:26.506\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:26.506679. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:26.507\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:26.652\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:26.657\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:26.721\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/max for window_size 1d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:26.722\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:26.726\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:26.732\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:27.403\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:27.403\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:01.478692\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:27.403\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:27.420\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:01.636762\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:27.420\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:27.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:27.421440. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:27.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:27.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:27.565\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:27.627\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum for window_size 30d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:27.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:27.744\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:27.746\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:28.297\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:28.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:28.775\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:28.775\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:02.122697\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:28.775\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:28.788\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:02.281859\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:28.788\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:28.790\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:28.789747. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:28.790\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:28.929\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:28.934\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:28.991\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:28.992\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:01.430692\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:28.992\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:29.006\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:01.585463\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:29.007\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:29.008\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:29.007931. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:29.008\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:29.151\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:29.155\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:29.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation code/count for window_size 1d, with 7033 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:29.220\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum_sqd for window_size 1d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:29.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:29.231\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:30.040\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:30.042\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:30.866\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:30.866\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:01.715615\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:30.866\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:30.883\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:01.875855\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:30.883\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:30.884\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:30.884794. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:30.885\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:31.024\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:31.029\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:31.127\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/max for window_size 365d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:31.141\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:31.326\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:31.332\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:33.077\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:33.081\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:33.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:33.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:05.059885\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:33.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:34.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:05.608663\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:34.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:34.399\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/sum.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:34.400\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:34.400358. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:34.400\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:34.539\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:34.544\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:34.765\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum for window_size 1d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:34.790\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:35.043\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:35.043\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:04.018792\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:35.044\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:35.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:04.175562\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:35.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:35.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:35.061625. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:35.062\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:35.200\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:35.205\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:35.276\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation code/count for window_size 30d, with 7033 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:35.283\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:35.821\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:35.824\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:36.757\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:36.757\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:01.557473\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:36.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:36.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:01.896954\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:36.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:36.959\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:36.959721. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:36.960\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:37.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:37.098\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:37.158\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/max for window_size 30d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:37.168\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:37.333\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:37.338\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:38.139\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:38.142\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:39.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:39.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:02.052899\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:39.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:39.158\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:02.199030\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:39.158\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:39.160\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:39.159826. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:39.160\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:39.295\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:39.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:39.362\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/count for window_size 1d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:39.370\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:39.709\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:39.711\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:39.852\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:39.852\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:05.313204\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:39.852\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:39.866\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:05.466418\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:39.866\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:39.867\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/sum_sqd.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:39.868\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:39.868178. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:39.868\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:40.004\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:40.009\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:40.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:40.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.772471\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:40.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:40.080\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.920812\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:40.080\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:40.081\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:40.081722. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:40.082\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:40.113\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/min for window_size 1d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:40.131\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:40.219\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:40.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:40.449\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum for window_size 365d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:40.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:42.034\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:42.037\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:43.056\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:43.064\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:43.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:43.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:03.945570\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:43.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:43.965\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:04.096982\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:43.965\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:43.966\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:43.966\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:43.966572. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:43.966\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:44.106\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:44.110\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:44.220\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum_sqd for window_size 30d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:44.232\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:45.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:45.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:45.759\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:05.539699\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:45.759\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:45.762\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:45.774\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:05.693115\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:45.774\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:45.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:45.775948. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:45.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:45.915\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:45.920\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:46.109\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/max for window_size 365d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:46.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:47.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:47.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:03.236261\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:47.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:47.357\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:03.390809\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:47.357\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:47.358\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/min.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:47.358\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:47.358\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/sum_sqd.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:47.359\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:47.359\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:47.359625. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:47.359\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:47.499\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:47.503\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:47.706\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/max for window_size 1d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:47.741\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:50.127\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:50.134\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:51.727\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:51.732\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:54.265\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:54.265\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:08.350187\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:54.265\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:54.279\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:08.503259\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:54.279\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:54.280\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/min.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:54.280\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:54.280636. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:54.281\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:54.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:54.426\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:54.537\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum_sqd for window_size 1d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:54.555\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:55.738\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:55.738\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:08.239307\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:55.738\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:55.755\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:08.395853\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:55.755\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:55.756\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/max.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:55.756\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/max.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:55.757\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m223\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/sum_sqd.npz is in progress as of 2024-12-14 19:30:54.280636. Returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:55.757\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/sum_sqd.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:55.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:55.758022. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:55.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:55.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:55.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:55.959\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/max for window_size 365d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:55.967\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:56.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:56.101\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:56.986\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:56.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:57.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:57.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:03.251303\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:57.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:57.688\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:03.407902\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:57.688\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:57.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:57.689601. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:57.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:57.830\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:57.834\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:57.951\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/max for window_size 1d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:57.970\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:58.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:58.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:02.151356\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:58.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:58.069\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:02.311068\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:58.069\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:58.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:58.070419. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:58.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:58.222\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:58.228\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:58.365\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation code/count for window_size 30d, with 7033 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:58.379\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:59.401\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:59.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:59.952\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:30:59.956\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:01.233\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:01.233\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:03.010203\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:01.233\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:01.479\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:03.408739\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:01.479\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:01.480\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m223\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/max.npz is in progress as of 2024-12-14 19:30:57.689601. Returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:01.480\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/code/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:01.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/sum.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:01.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/min.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:01.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/min.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:01.482\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/sum.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:01.482\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:01.482507. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:01.482\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:01.617\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:01.622\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:01.683\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum for window_size 365d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:01.691\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:01.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:01.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:04.074945\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:01.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:01.918\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:04.228736\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:01.918\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:01.919\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:01.919543. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:01.919\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:02.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:02.066\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:02.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/count for window_size 1d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:02.170\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:02.368\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:02.370\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:02.846\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:02.850\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:03.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:03.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:01.449459\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:03.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:03.082\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:01.600210\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:03.082\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:03.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/max.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:03.084\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:03.084505. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:03.085\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:03.226\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:03.231\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:03.356\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation code/count for window_size 1d, with 7033 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:03.370\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:03.526\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:03.527\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:01.465358\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:03.527\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:03.544\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:01.624964\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:03.544\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:03.545\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:03.545555. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:03.545\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:03.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:03.692\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:03.799\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum for window_size 1d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:03.819\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:04.415\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:04.418\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:05.105\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:05.108\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:05.685\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:05.685\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:02.459236\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:05.685\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:05.835\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:02.750706\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:05.835\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:05.836\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:05.836372. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:05.836\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:05.973\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:05.978\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:06.092\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/max for window_size 30d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:06.110\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:06.358\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:06.358\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:02.671017\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:06.358\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:06.370\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:02.825396\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:06.371\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:06.371\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/code/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:06.372\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/sum.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:06.372\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/sum_sqd.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:06.372\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/code/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:06.373\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:06.373154. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:06.373\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:06.509\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:06.514\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:06.747\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum for window_size 30d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:06.783\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:08.046\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:08.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:09.404\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:09.411\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:10.026\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:10.026\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:04.052935\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:10.026\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:10.041\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:04.204687\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:10.041\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:10.042\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:10.042142. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:10.042\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:10.193\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:10.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:10.381\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/min for window_size 1d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:10.417\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:12.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:12.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:05.578928\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:12.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:12.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:05.727701\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:12.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:12.102\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:12.102019. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:12.102\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:12.237\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:12.242\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:12.307\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/min for window_size 30d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:12.318\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:13.296\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:13.298\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:14.333\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:14.334\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:02.096215\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:14.334\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:14.336\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:14.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:14.353\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:02.251713\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:14.353\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:14.355\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:14.354885. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:14.355\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:14.534\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:14.538\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:14.655\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/min for window_size 30d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:14.668\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:16.740\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:16.744\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:18.566\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:18.566\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:08.373020\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:18.567\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:18.581\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:08.539738\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:18.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:18.583\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:18.583144. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:18.583\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:18.733\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:18.738\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:18.755\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:18.755\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:04.220569\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:18.755\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:18.774\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:04.419170\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:18.774\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:18.775\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/code/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:18.775\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/max.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:18.775\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/sum.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:18.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:18.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:18.777\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/code/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:18.777\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/sum_sqd.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:18.777\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/max.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:18.778\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/sum_sqd.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:18.778\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/sum.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:18.779\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:18.778926. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:18.779\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:18.815\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation code/count for window_size 365d, with 7033 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:18.826\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:18.921\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:18.926\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:19.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/min for window_size 365d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:19.175\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:19.380\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:19.382\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:20.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:20.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:01.609524\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:20.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:20.546\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:01.963247\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:20.546\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:20.547\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m223\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/min.npz is in progress as of 2024-12-14 19:31:18.778926. Returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:20.548\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:20.548180. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:20.548\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:20.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:20.694\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:20.946\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation code/count for window_size 30d, with 7033 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:20.972\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:23.062\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:23.068\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:23.139\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:23.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:27.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:27.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:06.312096\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:27.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:27.264\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:27.264\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:08.342520\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:27.264\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:27.279\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:08.500363\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:27.279\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:27.280\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:27.280528. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:27.281\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:27.423\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:27.427\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:27.599\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation code/count for window_size 365d, with 7033 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:27.614\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:27.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:07.141059\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:27.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:27.690\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/max.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:27.690\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/min.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:27.691\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/sum.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:27.691\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/min.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:27.691\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:27.692\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m223\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/code/count.npz is in progress as of 2024-12-14 19:31:27.280528. Returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:27.692\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/sum_sqd.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:27.692\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:27.693\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/sum.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:27.693\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/sum.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:28.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:28.590\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:30.571\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:30.571\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:03.148404\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:30.571\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:31.046\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:03.766112\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:31.047\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:31.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/code/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:31.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/min.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:31.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/code/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:31.049\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:31.049\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/min.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:31.049\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/sum_sqd.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:31.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/code/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:31.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/max.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:31.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:31.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/max.npz exists; reading directly and returning.\u001b[0m\n" + ] + } + ], "source": [ - "!meds-tab-tabularize-time-series --multirun \"worker=range(0,2)\" \"hydra/launcher=joblib\" \"input_dir=$MIMICIV_MEDS_DIR/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS" + "!meds-tab-tabularize-time-series --multirun \"worker=range(0,2)\" \"hydra/launcher=joblib\" \"input_dir=$MEDS_DIR/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" do_overwrite=False \"$WINDOW_SIZES\" \"$AGGREGATIONS\"" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": { "id": "NmaR_-Fik4eH" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[32m2024-12-14 19:31:32.691\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m446\u001b[0m - \u001b[1mRunning cache_task with the following configuration:\n", + "input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "do_overwrite: false\n", + "seed: 1\n", + "tqdm: false\n", + "worker: 0\n", + "loguru_init: false\n", + "log_dir: ${output_dir}/.logs/\n", + "cache_dir: ${output_dir}/.cache\n", + "tabularization:\n", + " filtered_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + " allowed_codes: null\n", + " min_code_inclusion_count: 10\n", + " min_code_inclusion_frequency: null\n", + " min_correlation: null\n", + " max_by_correlation: null\n", + " max_included_codes: null\n", + " window_sizes:\n", + " - 1d\n", + " - 30d\n", + " - 365d\n", + " aggs:\n", + " - static/present\n", + " - code/count\n", + " - value/count\n", + " - value/sum\n", + " - value/sum_sqd\n", + " - value/min\n", + " - value/max\n", + " _resolved_codes: ${filter_to_codes:${tabularization.filtered_code_metadata_fp},${tabularization.allowed_codes},${tabularization.min_code_inclusion_count},${tabularization.min_code_inclusion_frequency},${tabularization.max_included_codes}}\n", + "task_name: mortality/in_icu/first_24h\n", + "input_tabularized_dir: ${output_dir}/tabularize\n", + "input_label_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//task_labels/mortality/in_icu/first_24h/\n", + "output_tabularized_cache_dir: ${output_dir}/${task_name}/task_cache\n", + "output_label_cache_dir: ${output_dir}/${task_name}/labels\n", + "label_column: boolean_value\n", + "name: task_specific_caching\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:32.693\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m467\u001b[0m - \u001b[34m\u001b[1mStage config:\n", + "input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "do_overwrite: false\n", + "seed: 1\n", + "tqdm: false\n", + "worker: 0\n", + "loguru_init: false\n", + "log_dir: ${output_dir}/.logs/\n", + "cache_dir: ${output_dir}/.cache\n", + "tabularization:\n", + " filtered_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + " allowed_codes: null\n", + " min_code_inclusion_count: 10\n", + " min_code_inclusion_frequency: null\n", + " min_correlation: null\n", + " max_by_correlation: null\n", + " max_included_codes: null\n", + " window_sizes:\n", + " - 1d\n", + " - 30d\n", + " - 365d\n", + " aggs:\n", + " - static/present\n", + " - code/count\n", + " - value/count\n", + " - value/sum\n", + " - value/sum_sqd\n", + " - value/min\n", + " - value/max\n", + " _resolved_codes: ${filter_to_codes:${tabularization.filtered_code_metadata_fp},${tabularization.allowed_codes},${tabularization.min_code_inclusion_count},${tabularization.min_code_inclusion_frequency},${tabularization.max_included_codes}}\n", + "task_name: mortality/in_icu/first_24h\n", + "input_tabularized_dir: ${output_dir}/tabularize\n", + "input_label_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//task_labels/mortality/in_icu/first_24h/\n", + "output_tabularized_cache_dir: ${output_dir}/${task_name}/task_cache\n", + "output_label_cache_dir: ${output_dir}/${task_name}/labels\n", + "label_column: boolean_value\n", + "name: task_specific_caching\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", + " - input_label_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/task_labels/mortality/in_icu/first_24h\n", + " - input_tabularized_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize\n", + " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized\n", + " - tabularization.filtered_code_metadata_fp: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\u001b[0m\n", + "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_tabular_automl/describe_codes.py:63: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", + " if not df.columns == [\"code\", \"count\"]:\n", + "\u001b[32m2024-12-14 19:31:32.718\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:32.718444. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:32.720\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/max.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:32.720\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mExtracting labels for /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_tabular_automl/scripts/cache_task.py:138: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", + " if \"numeric_value\" not in pl.scan_parquet(meds_data_fp).columns:\n", + "\u001b[32m2024-12-14 19:31:33.117\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.121\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.004008\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.121\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.403557\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.124\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:33.123828. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.124\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.124\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mExtracting labels for /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.311\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.312\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000829\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.312\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.312\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.189162\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.313\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.314\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:33.314377. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.314\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/max.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.314\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000467\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.316\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001786\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.316\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.317\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:33.317717. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.318\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.318\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mExtracting labels for /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.598\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.599\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000750\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.599\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.599\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.281862\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.599\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.601\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:33.600910. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.601\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/sum_sqd.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.601\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.601\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.602\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000506\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.602\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.602\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001858\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.602\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.604\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:33.604087. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.604\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/max.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.604\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.604\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.605\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000514\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.605\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.605\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001834\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.605\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.607\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:33.607287. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.607\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.607\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.608\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.608\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000479\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.608\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.608\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001616\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.608\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.610\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:33.610091. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.610\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/none/static/present.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.610\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.611\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.612\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000564\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.612\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/none/static/present.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.612\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002447\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.612\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/none/static/.present_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.613\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:33.613706. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.613\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/code/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.614\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.803\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.092937\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.897\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.283911\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.897\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:33.899028. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/code/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.933\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.957\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.024107\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.059743\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.960\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:33.960194. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.960\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/code/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:33.960\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.143\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.240\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.096358\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.240\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.240\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.280730\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.240\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.242\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.242218. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.242\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/sum.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.242\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.243\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.243\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000614\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.244\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.244\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002073\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.244\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.245\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.245544. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.245\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.245\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.246\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.247\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000766\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.247\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.247\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002275\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.247\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.249\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.249117. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.249\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/sum_sqd.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.249\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.250\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.250\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000515\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.250\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.251\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001884\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.251\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.252\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.252423. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.252\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/min.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.252\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.253\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.253\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000494\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.253\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.254\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001767\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.254\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.255\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.255409. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.255\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/sum.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.255\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.256\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.256\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000610\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.256\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.257\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001738\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.257\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.258\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.258242. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.258\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/max.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.258\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.258\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.259\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000440\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.259\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.259\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001533\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.259\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.261\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.261017. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.261\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/code/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.261\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.378\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.429\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.051062\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.429\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.430\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.169042\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.430\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.431\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.431334. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.431\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/max.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.431\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.432\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.433\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000719\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.433\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.433\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002271\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.433\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.435\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.435039. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.435\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/sum_sqd.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.435\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000529\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.437\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001961\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.437\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.438258. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000549\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.440\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001836\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.440\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.441\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.441675. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.441\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/min.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.441\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.442\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.443\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000480\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.443\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.443\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001698\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.443\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.444\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.444502. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.444\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/sum.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.444\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000432\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001475\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.446\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.447\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.447331. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.447\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.447\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.448\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.449\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000560\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.449\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.449\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002037\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.449\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.450\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.450709. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/sum_sqd.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.452\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000461\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.452\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.452\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001706\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.452\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.453\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.453614. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.453\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/sum_sqd.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.453\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000431\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001619\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.456\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.456354. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.456\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/min.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.456\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.457\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.457\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000500\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.457\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.458\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001761\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.458\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.476\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.476393. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.476\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.476\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.477\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.478\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000694\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.478\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.478\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002162\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.478\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.479\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.479821. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.480\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/sum_sqd.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.480\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.480\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000443\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001582\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.482\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.482562. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.482\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/min.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.482\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000450\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.484\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001509\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.484\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.485213. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/sum.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.486\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.486\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000747\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.487\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.487\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002087\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.487\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.488491. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/code/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.570\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.610\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.039900\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.610\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.611\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.123018\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.611\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.613\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.612903. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.613\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/sum.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.613\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.613\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.614\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000530\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.614\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.614\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001848\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.614\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.616\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.616112. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.616\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/sum_sqd.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.616\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.617\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.617\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000553\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.617\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.617\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001867\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.618\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.619\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.619236. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.619\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/min.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.619\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.619\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.620\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000449\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.620\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.620\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001561\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.620\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.622\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.621973. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.622\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/sum_sqd.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.622\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.622\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.623\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000444\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.623\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.623\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001567\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.623\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.624\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.624635. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.624\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/sum.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.624\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.625\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.625\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000472\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.625\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.626\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001622\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.626\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.627\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.627487. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.627\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/max.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.627\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.628\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.628\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000565\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.628\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.629\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001945\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.629\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.631\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.631092. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.631\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/code/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.631\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:34.975\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.107\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.132318\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.108\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.109\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.478014\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.109\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.110\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.110579. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.110\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/max.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.111\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.111\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.112\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000835\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.112\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.112\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002405\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.113\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.114\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.114273. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.114\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/min.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.114\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.115\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000872\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.117\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002747\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.117\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.118\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.118304. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.118\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/sum_sqd.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.118\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.119\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.119\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000667\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.119\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.120\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001921\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.120\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.121\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.121639. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.121\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/none/static/present.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.121\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.124\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001168\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/none/static/present.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.004340\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.126\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/none/static/.present_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.127\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.127226. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.127\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/min.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.127\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.127\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.128\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000535\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.128\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.128\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001667\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.128\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.130\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.130180. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.130\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/max.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.130\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.131\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.131\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000638\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.131\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.132\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001985\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.132\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.133\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.133366. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.133\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/sum.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.133\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.134\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.134\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000595\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.134\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.135\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001750\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.135\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.136\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.136312. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.136\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/min.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.136\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000587\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.138\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001763\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.138\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.139\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.139237. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.139\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/sum.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.139\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.140\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.140\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000552\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.140\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.141\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001845\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.141\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.142\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.142795. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.143\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/none/static/present.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.143\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.145\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000699\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.145\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/none/static/present.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.003298\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/none/static/.present_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.147\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.147754. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.148\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/code/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.148\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.593\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.739\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.145727\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.739\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.740\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.593151\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.740\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.742\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.742465. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.742\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/code/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.742\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.810\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.840\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.030237\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.840\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.841\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.099197\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.841\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.843\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.843082. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.843\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/code/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.843\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.931\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.977\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.045663\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.977\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.978\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.135366\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.978\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.980\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.979866. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.980\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/min.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.980\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.980\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.981\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000733\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.981\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.982\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002257\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.982\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.983223. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/max.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.985\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000639\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.985\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.985\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002292\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.985\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.987\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.986869. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.987\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/sum.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.987\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.987\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000508\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001896\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.989928. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.991\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000468\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.991\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.991\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001743\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.991\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.992953. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.994\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000601\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.994\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.994\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001857\u001b[0m\n", + "\u001b[32m2024-12-14 19:31:35.994\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/value/.count_cache\u001b[0m\n" + ] + } + ], "source": [ - "!meds-tab-cache-task \"input_dir={MIMICIV_MEDS_DIR}/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" \"input_label_dir=$TASK_DIR/$TASK_NAME/\" \"task_name=$TASK_NAME\" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS" + "!meds-tab-cache-task \"input_dir={MEDS_DIR}/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" \"input_label_dir=$TASK_DIR/$TASK_NAME/\" \"task_name=$TASK_NAME\" do_overwrite=False \"$WINDOW_SIZES\" \"$AGGREGATIONS\"" ] }, { @@ -185,9 +1849,397 @@ "metadata": { "id": "dLIkOzTblBB2" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[32m[I 2024-12-14 19:36:45,225]\u001b[0m A new study created in memory with name: no-name-12520edb-1d03-4285-8296-9a50cda0c57f\u001b[0m\n", + "[2024-12-14 19:36:45,225][HYDRA] Study name: no-name-12520edb-1d03-4285-8296-9a50cda0c57f\n", + "[2024-12-14 19:36:45,225][HYDRA] Storage: None\n", + "[2024-12-14 19:36:45,225][HYDRA] Sampler: TPESampler\n", + "[2024-12-14 19:36:45,225][HYDRA] Directions: ['maximize']\n", + "[2024-12-14 19:36:45,228][HYDRA] Joblib.Parallel(n_jobs=-1,backend=loky,prefer=processes,require=None,verbose=0,timeout=None,pre_dispatch=2*n_jobs,batch_size=auto,temp_folder=None,max_nbytes=None,mmap_mode=r) is launching 2 jobs\n", + "[2024-12-14 19:36:45,228][HYDRA] Launching jobs, sweep output dir : /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/output/meds_tab/mortality/in_icu/first_24h/2024-12-14_19-36-44/hydra\n", + "[2024-12-14 19:36:45,228][HYDRA] \t#0 : +model_launcher.model.eta=0.3778393076431474 +model_launcher.model.lambda=0.02025881019876421 +model_launcher.model.alpha=0.0016680628084537149 +model_launcher.model.subsample=0.6402928897499296 +model_launcher.model.min_child_weight=72.18555499106307 +model_launcher.model.max_depth=13 model_launcher.training_params.num_boost_round=258 model_launcher.training_params.early_stopping_rounds=5 input_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data output_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/ output_model_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/ task_name=mortality/in_icu/first_24h do_overwrite=False tabularization.window_sizes=[1d,30d,365d] tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max] tabularization.min_code_inclusion_count=10\n", + "[2024-12-14 19:36:45,228][HYDRA] \t#1 : +model_launcher.model.eta=0.3174642123234027 +model_launcher.model.lambda=0.1822517129021365 +model_launcher.model.alpha=0.0034557035933266635 +model_launcher.model.subsample=0.6267376438851018 +model_launcher.model.min_child_weight=19.197298182330886 +model_launcher.model.max_depth=13 model_launcher.training_params.num_boost_round=709 model_launcher.training_params.early_stopping_rounds=3 input_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data output_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/ output_model_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/ task_name=mortality/in_icu/first_24h do_overwrite=False tabularization.window_sizes=[1d,30d,365d] tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max] tabularization.min_code_inclusion_count=10\n", + "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/joblib/externals/loky/backend/fork_exec.py:38: RuntimeWarning: Using fork() can cause Polars to deadlock in the child process.\n", + "In addition, using fork() with Python in general is a recipe for mysterious\n", + "deadlocks and crashes.\n", + "\n", + "The most likely reason you are seeing this error is because you are using the\n", + "multiprocessing module on Linux, which uses fork() by default. This will be\n", + "fixed in Python 3.14. Until then, you want to use the \"spawn\" context instead.\n", + "\n", + "See https://docs.pola.rs/user-guide/misc/multiprocessing/ for details.\n", + "\n", + "If you really know what your doing, you can silence this warning with the warning module\n", + "or by setting POLARS_ALLOW_FORKING_THREAD=1.\n", + "\n", + " pid = os.fork()\n", + "\u001b[32m2024-12-14 19:36:46.927\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mcurrent_script_name\u001b[0m:\u001b[36m433\u001b[0m - \u001b[33m\u001b[1mCan't find main function in __main__ module. Using sys.argv[0] as a fallback.\u001b[0m\n", + "\u001b[32m2024-12-14 19:36:46.930\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m446\u001b[0m - \u001b[1mRunning meds-tab-model with the following configuration:\n", + "input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "do_overwrite: false\n", + "seed: 1\n", + "tqdm: false\n", + "worker: 0\n", + "loguru_init: false\n", + "log_dir: ${output_dir}/.logs/\n", + "cache_dir: ${output_dir}/.cache\n", + "tabularization:\n", + " filtered_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + " allowed_codes: null\n", + " min_code_inclusion_count: 10\n", + " min_code_inclusion_frequency: null\n", + " min_correlation: null\n", + " max_by_correlation: null\n", + " max_included_codes: null\n", + " window_sizes:\n", + " - 1d\n", + " - 30d\n", + " - 365d\n", + " aggs:\n", + " - static/present\n", + " - code/count\n", + " - value/count\n", + " - value/sum\n", + " - value/sum_sqd\n", + " - value/min\n", + " - value/max\n", + " _resolved_codes: ${filter_to_codes:${tabularization.filtered_code_metadata_fp},${tabularization.allowed_codes},${tabularization.min_code_inclusion_count},${tabularization.min_code_inclusion_frequency},${tabularization.max_included_codes}}\n", + "path:\n", + " input_tabularized_cache_dir: ${input_tabularized_cache_dir}\n", + " input_label_cache_dir: ${input_label_cache_dir}\n", + " model_file_stem: xgboost\n", + " model_file_extension: .json\n", + " cache_dir: ${cache_dir}\n", + " sweep_results_dir: ${time_output_model_dir}/sweep_results/\n", + " best_trial_dir: ${time_output_model_dir}/best_trial/\n", + " performance_log_stem: performance\n", + " config_log_stem: config\n", + "data_processing_params:\n", + " imputer:\n", + " imputer_target: null\n", + " normalization:\n", + " normalizer: null\n", + "data_loading_params:\n", + " keep_data_in_memory: true\n", + " binarize_task: true\n", + "model_launcher:\n", + " path: ${path}\n", + " data_processing_params: ${data_processing_params}\n", + " data_loading_params: ${data_loading_params}\n", + " tabularization: ${tabularization}\n", + " _target_: MEDS_tabular_automl.xgboost_model.XGBoostModel.initialize\n", + " model:\n", + " booster: gbtree\n", + " device: cpu\n", + " nthread: 1\n", + " tree_method: hist\n", + " objective: binary:logistic\n", + " eta: 0.3778393076431474\n", + " lambda: 0.02025881019876421\n", + " alpha: 0.0016680628084537149\n", + " subsample: 0.6402928897499296\n", + " min_child_weight: 72.18555499106307\n", + " max_depth: 13\n", + " training_params:\n", + " num_boost_round: 258\n", + " early_stopping_rounds: 5\n", + "task_name: mortality/in_icu/first_24h\n", + "input_tabularized_cache_dir: ${output_dir}/${task_name}/task_cache\n", + "input_label_cache_dir: ${output_dir}/${task_name}/labels\n", + "output_model_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/\n", + "time_output_model_dir: ${output_model_dir}/${now:%Y-%m-%d_%H-%M-%S}\n", + "delete_below_top_k: -1\n", + "name: launch_model\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 19:36:46.930\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mcurrent_script_name\u001b[0m:\u001b[36m433\u001b[0m - \u001b[33m\u001b[1mCan't find main function in __main__ module. Using sys.argv[0] as a fallback.\u001b[0m\n", + "\u001b[32m2024-12-14 19:36:46.933\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m446\u001b[0m - \u001b[1mRunning meds-tab-model with the following configuration:\n", + "input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "do_overwrite: false\n", + "seed: 1\n", + "tqdm: false\n", + "worker: 0\n", + "loguru_init: false\n", + "log_dir: ${output_dir}/.logs/\n", + "cache_dir: ${output_dir}/.cache\n", + "tabularization:\n", + " filtered_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + " allowed_codes: null\n", + " min_code_inclusion_count: 10\n", + " min_code_inclusion_frequency: null\n", + " min_correlation: null\n", + " max_by_correlation: null\n", + " max_included_codes: null\n", + " window_sizes:\n", + " - 1d\n", + " - 30d\n", + " - 365d\n", + " aggs:\n", + " - static/present\n", + " - code/count\n", + " - value/count\n", + " - value/sum\n", + " - value/sum_sqd\n", + " - value/min\n", + " - value/max\n", + " _resolved_codes: ${filter_to_codes:${tabularization.filtered_code_metadata_fp},${tabularization.allowed_codes},${tabularization.min_code_inclusion_count},${tabularization.min_code_inclusion_frequency},${tabularization.max_included_codes}}\n", + "path:\n", + " input_tabularized_cache_dir: ${input_tabularized_cache_dir}\n", + " input_label_cache_dir: ${input_label_cache_dir}\n", + " model_file_stem: xgboost\n", + " model_file_extension: .json\n", + " cache_dir: ${cache_dir}\n", + " sweep_results_dir: ${time_output_model_dir}/sweep_results/\n", + " best_trial_dir: ${time_output_model_dir}/best_trial/\n", + " performance_log_stem: performance\n", + " config_log_stem: config\n", + "data_processing_params:\n", + " imputer:\n", + " imputer_target: null\n", + " normalization:\n", + " normalizer: null\n", + "data_loading_params:\n", + " keep_data_in_memory: true\n", + " binarize_task: true\n", + "model_launcher:\n", + " path: ${path}\n", + " data_processing_params: ${data_processing_params}\n", + " data_loading_params: ${data_loading_params}\n", + " tabularization: ${tabularization}\n", + " _target_: MEDS_tabular_automl.xgboost_model.XGBoostModel.initialize\n", + " model:\n", + " booster: gbtree\n", + " device: cpu\n", + " nthread: 1\n", + " tree_method: hist\n", + " objective: binary:logistic\n", + " eta: 0.3174642123234027\n", + " lambda: 0.1822517129021365\n", + " alpha: 0.0034557035933266635\n", + " subsample: 0.6267376438851018\n", + " min_child_weight: 19.197298182330886\n", + " max_depth: 13\n", + " training_params:\n", + " num_boost_round: 709\n", + " early_stopping_rounds: 3\n", + "task_name: mortality/in_icu/first_24h\n", + "input_tabularized_cache_dir: ${output_dir}/${task_name}/task_cache\n", + "input_label_cache_dir: ${output_dir}/${task_name}/labels\n", + "output_model_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/\n", + "time_output_model_dir: ${output_model_dir}/${now:%Y-%m-%d_%H-%M-%S}\n", + "delete_below_top_k: -1\n", + "name: launch_model\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 19:36:46.933\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m467\u001b[0m - \u001b[34m\u001b[1mStage config:\n", + "input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "do_overwrite: false\n", + "seed: 1\n", + "tqdm: false\n", + "worker: 0\n", + "loguru_init: false\n", + "log_dir: ${output_dir}/.logs/\n", + "cache_dir: ${output_dir}/.cache\n", + "tabularization:\n", + " filtered_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + " allowed_codes: null\n", + " min_code_inclusion_count: 10\n", + " min_code_inclusion_frequency: null\n", + " min_correlation: null\n", + " max_by_correlation: null\n", + " max_included_codes: null\n", + " window_sizes:\n", + " - 1d\n", + " - 30d\n", + " - 365d\n", + " aggs:\n", + " - static/present\n", + " - code/count\n", + " - value/count\n", + " - value/sum\n", + " - value/sum_sqd\n", + " - value/min\n", + " - value/max\n", + " _resolved_codes: ${filter_to_codes:${tabularization.filtered_code_metadata_fp},${tabularization.allowed_codes},${tabularization.min_code_inclusion_count},${tabularization.min_code_inclusion_frequency},${tabularization.max_included_codes}}\n", + "path:\n", + " input_tabularized_cache_dir: ${input_tabularized_cache_dir}\n", + " input_label_cache_dir: ${input_label_cache_dir}\n", + " model_file_stem: xgboost\n", + " model_file_extension: .json\n", + " cache_dir: ${cache_dir}\n", + " sweep_results_dir: ${time_output_model_dir}/sweep_results/\n", + " best_trial_dir: ${time_output_model_dir}/best_trial/\n", + " performance_log_stem: performance\n", + " config_log_stem: config\n", + "data_processing_params:\n", + " imputer:\n", + " imputer_target: null\n", + " normalization:\n", + " normalizer: null\n", + "data_loading_params:\n", + " keep_data_in_memory: true\n", + " binarize_task: true\n", + "model_launcher:\n", + " path: ${path}\n", + " data_processing_params: ${data_processing_params}\n", + " data_loading_params: ${data_loading_params}\n", + " tabularization: ${tabularization}\n", + " _target_: MEDS_tabular_automl.xgboost_model.XGBoostModel.initialize\n", + " model:\n", + " booster: gbtree\n", + " device: cpu\n", + " nthread: 1\n", + " tree_method: hist\n", + " objective: binary:logistic\n", + " eta: 0.3778393076431474\n", + " lambda: 0.02025881019876421\n", + " alpha: 0.0016680628084537149\n", + " subsample: 0.6402928897499296\n", + " min_child_weight: 72.18555499106307\n", + " max_depth: 13\n", + " training_params:\n", + " num_boost_round: 258\n", + " early_stopping_rounds: 5\n", + "task_name: mortality/in_icu/first_24h\n", + "input_tabularized_cache_dir: ${output_dir}/${task_name}/task_cache\n", + "input_label_cache_dir: ${output_dir}/${task_name}/labels\n", + "output_model_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/\n", + "time_output_model_dir: ${output_model_dir}/${now:%Y-%m-%d_%H-%M-%S}\n", + "delete_below_top_k: -1\n", + "name: launch_model\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", + " - input_label_cache_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels\n", + " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized\n", + " - tabularization.filtered_code_metadata_fp: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 19:36:46.937\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m467\u001b[0m - \u001b[34m\u001b[1mStage config:\n", + "input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "do_overwrite: false\n", + "seed: 1\n", + "tqdm: false\n", + "worker: 0\n", + "loguru_init: false\n", + "log_dir: ${output_dir}/.logs/\n", + "cache_dir: ${output_dir}/.cache\n", + "tabularization:\n", + " filtered_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + " allowed_codes: null\n", + " min_code_inclusion_count: 10\n", + " min_code_inclusion_frequency: null\n", + " min_correlation: null\n", + " max_by_correlation: null\n", + " max_included_codes: null\n", + " window_sizes:\n", + " - 1d\n", + " - 30d\n", + " - 365d\n", + " aggs:\n", + " - static/present\n", + " - code/count\n", + " - value/count\n", + " - value/sum\n", + " - value/sum_sqd\n", + " - value/min\n", + " - value/max\n", + " _resolved_codes: ${filter_to_codes:${tabularization.filtered_code_metadata_fp},${tabularization.allowed_codes},${tabularization.min_code_inclusion_count},${tabularization.min_code_inclusion_frequency},${tabularization.max_included_codes}}\n", + "path:\n", + " input_tabularized_cache_dir: ${input_tabularized_cache_dir}\n", + " input_label_cache_dir: ${input_label_cache_dir}\n", + " model_file_stem: xgboost\n", + " model_file_extension: .json\n", + " cache_dir: ${cache_dir}\n", + " sweep_results_dir: ${time_output_model_dir}/sweep_results/\n", + " best_trial_dir: ${time_output_model_dir}/best_trial/\n", + " performance_log_stem: performance\n", + " config_log_stem: config\n", + "data_processing_params:\n", + " imputer:\n", + " imputer_target: null\n", + " normalization:\n", + " normalizer: null\n", + "data_loading_params:\n", + " keep_data_in_memory: true\n", + " binarize_task: true\n", + "model_launcher:\n", + " path: ${path}\n", + " data_processing_params: ${data_processing_params}\n", + " data_loading_params: ${data_loading_params}\n", + " tabularization: ${tabularization}\n", + " _target_: MEDS_tabular_automl.xgboost_model.XGBoostModel.initialize\n", + " model:\n", + " booster: gbtree\n", + " device: cpu\n", + " nthread: 1\n", + " tree_method: hist\n", + " objective: binary:logistic\n", + " eta: 0.3174642123234027\n", + " lambda: 0.1822517129021365\n", + " alpha: 0.0034557035933266635\n", + " subsample: 0.6267376438851018\n", + " min_child_weight: 19.197298182330886\n", + " max_depth: 13\n", + " training_params:\n", + " num_boost_round: 709\n", + " early_stopping_rounds: 3\n", + "task_name: mortality/in_icu/first_24h\n", + "input_tabularized_cache_dir: ${output_dir}/${task_name}/task_cache\n", + "input_label_cache_dir: ${output_dir}/${task_name}/labels\n", + "output_model_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/\n", + "time_output_model_dir: ${output_model_dir}/${now:%Y-%m-%d_%H-%M-%S}\n", + "delete_below_top_k: -1\n", + "name: launch_model\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", + " - input_label_cache_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels\n", + " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized\n", + " - tabularization.filtered_code_metadata_fp: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\u001b[0m\n", + "Error executing job with overrides: ['+model_launcher.model.eta=0.3778393076431474', '+model_launcher.model.lambda=0.02025881019876421', '+model_launcher.model.alpha=0.0016680628084537149', '+model_launcher.model.subsample=0.6402928897499296', '+model_launcher.model.min_child_weight=72.18555499106307', '+model_launcher.model.max_depth=13', 'model_launcher.training_params.num_boost_round=258', 'model_launcher.training_params.early_stopping_rounds=5', 'input_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data', 'output_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/', 'output_model_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/', 'task_name=mortality/in_icu/first_24h', 'do_overwrite=False', 'tabularization.window_sizes=[1d,30d,365d]', 'tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max]', 'tabularization.min_code_inclusion_count=10']\n", + "Traceback (most recent call last):\n", + " File \"/Users/sim/miniconda3/envs/dev/bin/meds-tab-model\", line 8, in \n", + " sys.exit(main())\n", + " ^^^^^^\n", + " File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/hydra/main.py\", line 94, in decorated_main\n", + " _run_hydra(\n", + " File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/hydra/_internal/utils.py\", line 394, in _run_hydra\n", + " _run_app(\n", + " File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/hydra/_internal/utils.py\", line 465, in _run_app\n", + " run_and_report(\n", + " File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/hydra/_internal/utils.py\", line 223, in run_and_report\n", + " raise ex\n", + " File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/hydra/_internal/utils.py\", line 220, in run_and_report\n", + " return func()\n", + " ^^^^^^\n", + " File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/hydra/_internal/utils.py\", line 466, in \n", + " lambda: hydra.multirun(\n", + " ^^^^^^^^^^^^^^^\n", + " File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/hydra/_internal/hydra.py\", line 162, in multirun\n", + " ret = sweeper.sweep(arguments=task_overrides)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/hydra_plugins/hydra_optuna_sweeper/optuna_sweeper.py\", line 52, in sweep\n", + " return self.sweeper.sweep(arguments)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/hydra_plugins/hydra_optuna_sweeper/_impl.py\", line 391, in sweep\n", + " raise e\n", + " File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/hydra_plugins/hydra_optuna_sweeper/_impl.py\", line 357, in sweep\n", + " values = [float(ret.return_value)]\n", + " ^^^^^^^^^^^^^^^^\n", + " File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/hydra/core/utils.py\", line 260, in return_value\n", + " raise self._return_value\n", + "hydra.errors.InstantiationException: Error locating target 'MEDS_tabular_automl.xgboost_model.XGBoostModel.initialize', set env var HYDRA_FULL_ERROR=1 to see chained exception.\n", + "full_key: model_launcher\n" + ] + } + ], "source": [ - "!meds-tab-xgboost --multirun \"input_dir=$MIMICIV_MEDS_DIR/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" \"output_model_dir=$OUTPUT_MODEL_DIR/$TASK_NAME/\" \"task_name=$TASK_NAME\" do_overwrite=False \"hydra.sweeper.n_trials=10\" $WINDOW_SIZES $AGGREGATIONS \"tabularization.min_code_inclusion_count=10\"" + "!meds-tab-model --multirun \"input_dir=$MEDS_DIR/data\" \"output_dir=$OUTPUT_TABULARIZATION_DIR\" \"output_model_dir=$OUTPUT_MODEL_DIR/$TASK_NAME/\" \"task_name=$TASK_NAME\" do_overwrite=False \"hydra.sweeper.n_trials=10\" \"$WINDOW_SIZES\" \"$AGGREGATIONS\" \"tabularization.min_code_inclusion_count=10\"" ] } ], @@ -196,11 +2248,21 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3", + "display_name": "dev", + "language": "python", "name": "python3" }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.8" } }, "nbformat": 4, diff --git a/demo/meds_tab.py b/demo/meds_tab.py deleted file mode 100644 index 4fae3ae..0000000 --- a/demo/meds_tab.py +++ /dev/null @@ -1,240 +0,0 @@ -# --- -# jupyter: -# jupytext: -# text_representation: -# extension: .py -# format_name: percent -# format_version: '1.3' -# jupytext_version: 1.16.4 -# kernelspec: -# display_name: Python 3 -# name: python3 -# --- - -# %% [Colab-only] Switch Colab to python 3.12 -# !sudo apt-get install python3.12 python3.12-venv -# import sys -# !python3.12 -m venv meds_env -# import os -# os.environ['PATH'] = '/content/meds_env/bin:' + os.environ['PATH'] -# !pip install --upgrade pip - -# # Then in a new code cell: -# import sys -# sys.executable = '/content/meds_env/bin/python' - -# # Confirm python version is 3.12 -# !python --version - -# %% -!pwd # Should be .../src/MEDS_DEV/demo - -# %% [markdown] -# ## Install dependencies - -# %% -!pip -q install meds_etl==0.3.6 meds_transforms==0.0.7 - -# TODO install meds-evaluation - -# %% [markdown] -# # Download MIMIC-IV demo - -# %% -# macOS users should install wget (e.g. through brew) -!wget -q -r -N -c --no-host-directories --cut-dirs=1 -np -P ./content/download https://physionet.org/files/mimic-iv-demo/2.2/ - -# %% -# Download pre-meds script, event config (defining how raw data is converted to meds data), and meds-transform config -!mkdir -p ./content/meds-transform/ -!git clone --depth 1 https://github.com/mmcdermott/MEDS_transforms.git ./content/tmp/ -!mv ./content/tmp/MIMIC-IV_Example ./content/MIMIC-IV_Example - -# %% -# Download MIMIC-IV metadata -MIMICIV_RAW_DIR = "https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map" -MIMICIV_PRE_MEDS_DIR = "./content/pre_meds/" -!mkdir {MIMICIV_PRE_MEDS_DIR} - -OUTPUT_DIR = "./content/download/mimic-iv-demo/2.2" - -files = [ - 'd_labitems_to_loinc.csv', - 'inputevents_to_rxnorm.csv', - 'lab_itemid_to_loinc.csv', - 'meas_chartevents_main.csv', - 'meas_chartevents_value.csv', - 'numerics-summary.csv', - 'outputevents_to_loinc.csv', - 'proc_datetimeevents.csv', - 'proc_itemid.csv', - 'waveforms-summary.csv' -] - -for file in files: - !wget -O {OUTPUT_DIR}/{file} {MIMICIV_RAW_DIR}/{file} - !wget -O {MIMICIV_PRE_MEDS_DIR}/{file} {MIMICIV_RAW_DIR}/{file} - -# %% -# Convert to MEDS -CURRENT_DIR = !pwd -CURRENT_DIR = CURRENT_DIR[0] -# %% -TUTORIAL_DIR = CURRENT_DIR + "/content/MIMIC-IV_Example" -MIMICIV_RAW_DIR = CURRENT_DIR + "/content/download/mimic-iv-demo/2.2" -MIMICIV_PRE_MEDS_DIR = CURRENT_DIR + "/content/pre_meds" -MIMICIV_MEDS_DIR = CURRENT_DIR + "/content/meds" - -EVENT_CONVERSION_CONFIG_FP = CURRENT_DIR + "/content/MIMIC-IV_Example/configs/event_config.yaml" -PIPELINE_CONFIG_PATH = CURRENT_DIR + "/content/MIMIC-IV_Example/configs/pipeline_config.yaml" -!cd {TUTORIAL_DIR} && ./run.sh {MIMICIV_RAW_DIR} {MIMICIV_PRE_MEDS_DIR} {MIMICIV_MEDS_DIR} do_unzip=true - -# %% [markdown] -# # Examine MEDS data - -# %% -import polars as pl - -data = pl.read_parquet('./content/meds/data/**/*.parquet') - -data[['subject_id', 'time', 'code', 'numeric_value']] - -# %% [markdown] -# # A simple Polars analysis - -# %% -icd10_events = data.filter(pl.col('code').str.starts_with('DIAGNOSIS//ICD//10//')) -icd10_events.group_by('code').count().sort('count', descending=True) - -# %% -df = pl.read_parquet("./content/meds/metadata/codes.parquet") -df - -# %% [markdown] -# ## Using an example MEDS tool, ACES for labeling - -# %% [markdown] -# ## Install ACES - -# %% -!pip install es-aces - -# %% - -# From ACES documentation -task_config = """ -description: >- - This file specifies the base configuration for the prediction of a hospital los being greater than 3days, - leveraging only the first 48 hours of data after admission, with a 24 hour gap between the input window - and the target window. Patients who die or are discharged in the gap window are excluded. Note that this - task is in-**hospital** los, not in-**ICU** los which is a different task. - -predicates: - hospital_admission: - code: {regex: "HOSPITAL_ADMISSION//.*"} - hospital_discharge: - code: {regex: "HOSPITAL_DISCHARGE//.*"} - death: - code: MEDS_DEATH - discharge_or_death: - expr: or(hospital_discharge, death) - -trigger: hospital_admission - -windows: - input: - start: NULL - end: trigger + 48h - start_inclusive: True - end_inclusive: True - index_timestamp: end - gap: - start: input.end - end: start + 24h - start_inclusive: False - end_inclusive: True - has: - hospital_admission: (None, 0) - discharge_or_death: (None, 0) - target: - start: trigger - end: start + 3d - start_inclusive: False - end_inclusive: True - label: discharge_or_death -""" - -!mkdir ./content/tasks/ -p -TASK_NAME = "in_hospital_3d_los_after_48h" -TASK_CONFIG_FP = f"./content/tasks/{TASK_NAME}.yaml" -with open(TASK_CONFIG_FP, 'w') as f: - f.write(task_config) - -# %% -!pip install es-aces - -# %% -!aces-cli --multirun data=sharded data.standard=meds data.root="$MIMICIV_MEDS_DIR/data" "data.shard=$(expand_shards ./content/meds/data/)" cohort_dir=" ./content/tasks" cohort_name="$TASK_NAME" config_path="$TASK_CONFIG_FP" - -# %% -# TODO: reimporting polars due to dependencies? -import polars as pl - -# Execute query and get results -df = pl.read_parquet(f"./content/tasks/{TASK_NAME}/**/*.parquet") - -print("train prevalence: " + str(round(pl.read_parquet(f"./content/tasks/{TASK_NAME}/train/*.parquet")['boolean_value'].mean(), 3))) -print("tuning prevalence: " + str(round(pl.read_parquet(f"./content/tasks/{TASK_NAME}/tuning/*.parquet")['boolean_value'].mean(), 3))) -print("held_out prevalence: " + str(round(pl.read_parquet(f"./content/tasks/{TASK_NAME}/held_out/*.parquet")['boolean_value'].mean(), 3))) - - -df.sort('boolean_value') - -# %% -# ## Install meds-tab - -!pip uninstall es-aces -y # TODO ??? -!pip install meds-tab - -# %% -MIMICIV_MEDS_DIR = "./content/meds/" -OUTPUT_TABULARIZATION_DIR="./content/tabularized/" -TASK_DIR="./content/tasks/" -TASK_NAME="in_hospital_3d_los_after_48h" -OUTPUT_MODEL_DIR="./content/output/meds_tab/" - -# %% -!meds-tab-describe input_dir={MIMICIV_MEDS_DIR}/data output_dir={OUTPUT_TABULARIZATION_DIR} - -# %% -# Define the window sizes and aggregations to generate features for -# TODO define this as system variables or make sure the shell -# commands can find these -WINDOW_SIZES = "tabularization.window_sizes=[1d,30d,365d]" -AGGREGATIONS = "tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max]" - -# %% -!rm -rf ./content/tabularized/tabularize/ - -# %% -# TODO shell vs python variables -!echo {OUTPUT_TABULARIZATION_DIR} - -# %% -# TODO shell vs python variables -!echo WINDOW_SIZES -# %% -# TODO shell vs python variables -!meds-tab-tabularize-static "input_dir=$MIMICIV_MEDS_DIR/data" "output_dir=$OUTPUT_TABULARIZATION_DIR" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS - -# %% -# TODO shell vs python variables -!meds-tab-tabularize-time-series --multirun "worker=range(0,2)" "hydra/launcher=joblib" "input_dir=$MIMICIV_MEDS_DIR/data" "output_dir=$OUTPUT_TABULARIZATION_DIR" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS - -# %% -# TODO shell vs python variables -!meds-tab-cache-task "input_dir={MIMICIV_MEDS_DIR}/data" "output_dir=$OUTPUT_TABULARIZATION_DIR" "input_label_dir=$TASK_DIR/$TASK_NAME/" "task_name=$TASK_NAME" do_overwrite=False $WINDOW_SIZES $AGGREGATIONS - -# %% -# TODO shell vs python variables -!meds-tab-xgboost --multirun "input_dir=$MIMICIV_MEDS_DIR/data" "output_dir=$OUTPUT_TABULARIZATION_DIR" "output_model_dir=$OUTPUT_MODEL_DIR/$TASK_NAME/" "task_name=$TASK_NAME" do_overwrite=False "hydra.sweeper.n_trials=10" $WINDOW_SIZES $AGGREGATIONS "tabularization.min_code_inclusion_count=10" From 685a3e67b684b70bc8017d7ef3fe1d5ac6426807 Mon Sep 17 00:00:00 2001 From: Nassim Oufattole Date: Sat, 14 Dec 2024 19:54:30 -0800 Subject: [PATCH 5/8] Updates file paths for meds_cehrbert (still needs to be tested) --- demo/meds_cehrbert.ipynb | 172 +++++++++++++++++++++++++++++++++++---- 1 file changed, 158 insertions(+), 14 deletions(-) diff --git a/demo/meds_cehrbert.ipynb b/demo/meds_cehrbert.ipynb index a378210..ea1e302 100644 --- a/demo/meds_cehrbert.ipynb +++ b/demo/meds_cehrbert.ipynb @@ -12,16 +12,138 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "e4c2dca6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting meds_reader==0.1.9\n", + " Downloading meds_reader-0.1.9-cp312-cp312-macosx_12_0_x86_64.whl.metadata (3.0 kB)\n", + "Requirement already satisfied: pyarrow>=9 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds_reader==0.1.9) (17.0.0)\n", + "Requirement already satisfied: meds==0.3.3 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds_reader==0.1.9) (0.3.3)\n", + "Collecting numpy<2,>=1.16 (from meds_reader==0.1.9)\n", + " Using cached numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl.metadata (61 kB)\n", + "Requirement already satisfied: pandas>=2.2 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds_reader==0.1.9) (2.2.3)\n", + "Requirement already satisfied: jsonschema>=4.0.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds==0.3.3->meds_reader==0.1.9) (4.23.0)\n", + "Requirement already satisfied: typing-extensions>=4.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds==0.3.3->meds_reader==0.1.9) (4.12.2)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from pandas>=2.2->meds_reader==0.1.9) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from pandas>=2.2->meds_reader==0.1.9) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from pandas>=2.2->meds_reader==0.1.9) (2024.2)\n", + "Requirement already satisfied: attrs>=22.2.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds==0.3.3->meds_reader==0.1.9) (24.2.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds==0.3.3->meds_reader==0.1.9) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds==0.3.3->meds_reader==0.1.9) (0.35.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds==0.3.3->meds_reader==0.1.9) (0.22.3)\n", + "Requirement already satisfied: six>=1.5 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas>=2.2->meds_reader==0.1.9) (1.17.0)\n", + "Downloading meds_reader-0.1.9-cp312-cp312-macosx_12_0_x86_64.whl (3.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.3/3.3 MB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mta \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hUsing cached numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl (20.3 MB)\n", + "Installing collected packages: numpy, meds_reader\n", + " Attempting uninstall: numpy\n", + " Found existing installation: numpy 2.2.0\n", + " Uninstalling numpy-2.2.0:\n", + " Successfully uninstalled numpy-2.2.0\n", + "Successfully installed meds_reader-0.1.9 numpy-1.26.4\n", + "Requirement already satisfied: setuptools in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (75.1.0)\n", + "Collecting cehrbert==1.3.1\n", + " Downloading cehrbert-1.3.1-py3-none-any.whl.metadata (9.9 kB)\n", + "Collecting dask==2024.1.1 (from cehrbert==1.3.1)\n", + " Downloading dask-2024.1.1-py3-none-any.whl.metadata (3.7 kB)\n", + "Collecting datasets==2.16.1 (from cehrbert==1.3.1)\n", + " Downloading datasets-2.16.1-py3-none-any.whl.metadata (20 kB)\n", + "Collecting evaluate==0.4.1 (from cehrbert==1.3.1)\n", + " Downloading evaluate-0.4.1-py3-none-any.whl.metadata (9.4 kB)\n", + "Collecting fast-ml==3.68 (from cehrbert==1.3.1)\n", + " Downloading fast_ml-3.68-py3-none-any.whl.metadata (12 kB)\n", + "Collecting femr==0.2.0 (from cehrbert==1.3.1)\n", + " Downloading femr-0.2.0-py3-none-any.whl.metadata (11 kB)\n", + "Collecting Jinja2==3.1.3 (from cehrbert==1.3.1)\n", + " Downloading Jinja2-3.1.3-py3-none-any.whl.metadata (3.3 kB)\n", + "Requirement already satisfied: meds==0.3.3 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from cehrbert==1.3.1) (0.3.3)\n", + "Requirement already satisfied: meds_reader==0.1.9 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from cehrbert==1.3.1) (0.1.9)\n", + "Collecting networkx==3.2.1 (from cehrbert==1.3.1)\n", + " Downloading networkx-3.2.1-py3-none-any.whl.metadata (5.2 kB)\n", + "Collecting numpy==1.24.3 (from cehrbert==1.3.1)\n", + " Downloading numpy-1.24.3.tar.gz (10.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.9/10.9 MB\u001b[0m \u001b[31m30.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m0:01\u001b[0m\n", + "\u001b[?25h Installing build dependencies ... \u001b[?25ldone\n", + "\u001b[?25h Getting requirements to build wheel ... \u001b[?25lerror\n", + " \u001b[1;31merror\u001b[0m: \u001b[1msubprocess-exited-with-error\u001b[0m\n", + " \n", + " \u001b[31m×\u001b[0m \u001b[32mGetting requirements to build wheel\u001b[0m did not run successfully.\n", + " \u001b[31m│\u001b[0m exit code: \u001b[1;36m1\u001b[0m\n", + " \u001b[31m╰─>\u001b[0m \u001b[31m[33 lines of output]\u001b[0m\n", + " \u001b[31m \u001b[0m Traceback (most recent call last):\n", + " \u001b[31m \u001b[0m File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/pip/_vendor/pyproject_hooks/_in_process/_in_process.py\", line 353, in \n", + " \u001b[31m \u001b[0m main()\n", + " \u001b[31m \u001b[0m File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/pip/_vendor/pyproject_hooks/_in_process/_in_process.py\", line 335, in main\n", + " \u001b[31m \u001b[0m json_out['return_val'] = hook(**hook_input['kwargs'])\n", + " \u001b[31m \u001b[0m ^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " \u001b[31m \u001b[0m File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/pip/_vendor/pyproject_hooks/_in_process/_in_process.py\", line 112, in get_requires_for_build_wheel\n", + " \u001b[31m \u001b[0m backend = _build_backend()\n", + " \u001b[31m \u001b[0m ^^^^^^^^^^^^^^^^\n", + " \u001b[31m \u001b[0m File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/pip/_vendor/pyproject_hooks/_in_process/_in_process.py\", line 77, in _build_backend\n", + " \u001b[31m \u001b[0m obj = import_module(mod_path)\n", + " \u001b[31m \u001b[0m ^^^^^^^^^^^^^^^^^^^^^^^\n", + " \u001b[31m \u001b[0m File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/importlib/__init__.py\", line 90, in import_module\n", + " \u001b[31m \u001b[0m return _bootstrap._gcd_import(name[level:], package, level)\n", + " \u001b[31m \u001b[0m ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " \u001b[31m \u001b[0m File \"\", line 1387, in _gcd_import\n", + " \u001b[31m \u001b[0m File \"\", line 1360, in _find_and_load\n", + " \u001b[31m \u001b[0m File \"\", line 1310, in _find_and_load_unlocked\n", + " \u001b[31m \u001b[0m File \"\", line 488, in _call_with_frames_removed\n", + " \u001b[31m \u001b[0m File \"\", line 1387, in _gcd_import\n", + " \u001b[31m \u001b[0m File \"\", line 1360, in _find_and_load\n", + " \u001b[31m \u001b[0m File \"\", line 1331, in _find_and_load_unlocked\n", + " \u001b[31m \u001b[0m File \"\", line 935, in _load_unlocked\n", + " \u001b[31m \u001b[0m File \"\", line 999, in exec_module\n", + " \u001b[31m \u001b[0m File \"\", line 488, in _call_with_frames_removed\n", + " \u001b[31m \u001b[0m File \"/private/var/folders/fm/gwbmjrfx6gxg6x7xxcxbnrth0000gn/T/pip-build-env-4n11flhi/overlay/lib/python3.12/site-packages/setuptools/__init__.py\", line 16, in \n", + " \u001b[31m \u001b[0m import setuptools.version\n", + " \u001b[31m \u001b[0m File \"/private/var/folders/fm/gwbmjrfx6gxg6x7xxcxbnrth0000gn/T/pip-build-env-4n11flhi/overlay/lib/python3.12/site-packages/setuptools/version.py\", line 1, in \n", + " \u001b[31m \u001b[0m import pkg_resources\n", + " \u001b[31m \u001b[0m File \"/private/var/folders/fm/gwbmjrfx6gxg6x7xxcxbnrth0000gn/T/pip-build-env-4n11flhi/overlay/lib/python3.12/site-packages/pkg_resources/__init__.py\", line 2172, in \n", + " \u001b[31m \u001b[0m register_finder(pkgutil.ImpImporter, find_on_path)\n", + " \u001b[31m \u001b[0m ^^^^^^^^^^^^^^^^^^^\n", + " \u001b[31m \u001b[0m AttributeError: module 'pkgutil' has no attribute 'ImpImporter'. Did you mean: 'zipimporter'?\n", + " \u001b[31m \u001b[0m \u001b[31m[end of output]\u001b[0m\n", + " \n", + " \u001b[1;35mnote\u001b[0m: This error originates from a subprocess, and is likely not a problem with pip.\n", + "\u001b[?25h\u001b[1;31merror\u001b[0m: \u001b[1msubprocess-exited-with-error\u001b[0m\n", + "\n", + "\u001b[31m×\u001b[0m \u001b[32mGetting requirements to build wheel\u001b[0m did not run successfully.\n", + "\u001b[31m│\u001b[0m exit code: \u001b[1;36m1\u001b[0m\n", + "\u001b[31m╰─>\u001b[0m See above for output.\n", + "\n", + "\u001b[1;35mnote\u001b[0m: This error originates from a subprocess, and is likely not a problem with pip.\n" + ] + } + ], "source": [ "!pip install meds_reader==0.1.9\n", "!pip install setuptools\n", "!pip install cehrbert==1.3.1" ] }, + { + "cell_type": "code", + "execution_count": 3, + "id": "ff2f8638", + "metadata": {}, + "outputs": [], + "source": [ + "#@title Download E-ICU demo\n", + "import os\n", + "from pathlib import Path\n", + "notebook_dir = os.getcwd()\n", + "\n", + "ROOT_DIR=f\"{notebook_dir}/work_dir/mimiciv_demo/\"\n", + "# ROOT_DIR=f\"{notebook_dir}/work_dir/eicu_demo/\"\n", + "Path(ROOT_DIR).mkdir(parents=True, exist_ok=True)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -29,13 +151,14 @@ "metadata": {}, "outputs": [], "source": [ - "MIMICIV_MEDS_DIR = \"./content/meds/\"\n", - "MIMICIV_MEDS_READER_DIR = \"./content/meds_reader/\"\n", - "TASK_DIR=\"./content/tasks/\"\n", - "TASK_NAME=\"in_hospital_3d_los_after_48h\"\n", - "OUTPUT_PRETRAIN_MODEL_DIR=\"./content/output/cehrbert/\"\n", + "MEDS_DIR = ROOT_DIR + \"/meds/\"\n", + "MEDS_READER_DIR = ROOT_DIR + \"/meds_reader/\"\n", + "TASK_DIR = MEDS_DIR + \"/task_labels\"\n", + "TASK_NAME=\"mortality/in_icu/first_24h\"\n", + "# TASK_NAME=\"los_in_hospital_first_48h\"\n", + "OUTPUT_PRETRAIN_MODEL_DIR= ROOT_DIR + \"/output/cehrbert/\"\n", "# TODO this variable has an identical name?\n", - "OUTPUT_PRETRAIN_MODEL_DIR=\"./content/output/cehrbert_finetuned/\"" + "OUTPUT_PRETRAIN_MODEL_DIR= ROOT_DIR + \"/output/cehrbert_finetuned/\"" ] }, { @@ -53,7 +176,7 @@ "metadata": {}, "outputs": [], "source": [ - "!meds_reader_convert $MIMICIV_MEDS_DIR $MIMICIV_MEDS_READER_DIR" + "!meds_reader_convert $MEDS_DIR $MEDS_READER_DIR" ] }, { @@ -152,7 +275,7 @@ "\n", "report_to: \"none\"\n", "\"\"\"\n", - "PRETRAIN_CONFIG_FP = f\"./content/output/cehrbert/cehrbert_pretrain_config.yaml\"\n", + "PRETRAIN_CONFIG_FP = ROOT_DIR + \"/output/cehrbert/cehrbert_pretrain_config.yaml\"\n", "with open(PRETRAIN_CONFIG_FP, 'w') as f:\n", " f.write(cehrbert_pretrain_config)" ] @@ -252,10 +375,18 @@ "execution_count": null, "id": "bf0b1d0f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "python3.11 -m cehrbert.runners.hf_cehrbert_finetune_runner /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/cehrbert/cehrbert_finetune_config.yaml\n" + ] + } + ], "source": [ "# ## Finetune cehrbert for the downstream task\n", - "!python3.11 -m cehrbert.runners.hf_cehrbert_finetune_runner ./content/output/cehrbert/cehrbert_finetune_config.yaml" + "!python3.11 -m cehrbert.runners.hf_cehrbert_finetune_runner {ROOT_DIR}/output/cehrbert/cehrbert_finetune_config.yaml" ] }, { @@ -277,7 +408,7 @@ "metadata": {}, "outputs": [], "source": [ - "!cat ./content/output/cehrbert_finetuned/test_results.json" + "!cat {ROOT_DIR}/output/cehrbert_finetuned/test_results.json" ] } ], @@ -286,8 +417,21 @@ "main_language": "python" }, "kernelspec": { - "display_name": "Python 3", + "display_name": "dev", + "language": "python", "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.8" } }, "nbformat": 4, From 9d144d7910ec2dcc28b6fe544ada9d0725880fc2 Mon Sep 17 00:00:00 2001 From: Nassim Date: Sat, 14 Dec 2024 23:42:35 -0500 Subject: [PATCH 6/8] added pip install -e . to the aces notebook, and reran from scratch --- demo/aces.ipynb | 543 +-- demo/extract_meds_data.ipynb | 7072 +++++++++++++++++----------------- demo/meds_tab.ipynb | 4379 ++++++++++++++------- 3 files changed, 6710 insertions(+), 5284 deletions(-) diff --git a/demo/aces.ipynb b/demo/aces.ipynb index 03bacc3..5173b83 100644 --- a/demo/aces.ipynb +++ b/demo/aces.ipynb @@ -11,7 +11,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 19, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -25,28 +25,66 @@ "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: es-aces in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (0.6.1)\n", - "Requirement already satisfied: polars<=1.17.1,>=1.0.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from es-aces) (1.17.1)\n", - "Requirement already satisfied: bigtree==0.18.* in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from es-aces) (0.18.3)\n", - "Requirement already satisfied: ruamel.yaml==0.18.* in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from es-aces) (0.18.6)\n", - "Requirement already satisfied: loguru==0.7.* in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from es-aces) (0.7.3)\n", - "Requirement already satisfied: hydra-core==1.3.* in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from es-aces) (1.3.2)\n", - "Requirement already satisfied: pytimeparse==1.1.* in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from es-aces) (1.1.8)\n", - "Requirement already satisfied: networkx==3.3.* in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from es-aces) (3.3)\n", - "Requirement already satisfied: pyarrow==17.* in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from es-aces) (17.0.0)\n", - "Requirement already satisfied: meds==0.3.3 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from es-aces) (0.3.3)\n", - "Requirement already satisfied: omegaconf<2.4,>=2.2 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from hydra-core==1.3.*->es-aces) (2.3.0)\n", - "Requirement already satisfied: antlr4-python3-runtime==4.9.* in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from hydra-core==1.3.*->es-aces) (4.9.3)\n", - "Requirement already satisfied: packaging in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from hydra-core==1.3.*->es-aces) (24.2)\n", - "Requirement already satisfied: jsonschema>=4.0.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds==0.3.3->es-aces) (4.23.0)\n", - "Requirement already satisfied: typing-extensions>=4.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds==0.3.3->es-aces) (4.12.2)\n", - "Requirement already satisfied: numpy>=1.16.6 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from pyarrow==17.*->es-aces) (2.2.0)\n", - "Requirement already satisfied: ruamel.yaml.clib>=0.2.7 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from ruamel.yaml==0.18.*->es-aces) (0.2.12)\n", - "Requirement already satisfied: attrs>=22.2.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds==0.3.3->es-aces) (24.2.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds==0.3.3->es-aces) (2024.10.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds==0.3.3->es-aces) (0.35.1)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds==0.3.3->es-aces) (0.22.3)\n", - "Requirement already satisfied: PyYAML>=5.1.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from omegaconf<2.4,>=2.2->hydra-core==1.3.*->es-aces) (6.0.2)\n" + "Requirement already satisfied: es-aces in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (0.6.1)\n", + "Requirement already satisfied: polars<=1.17.1,>=1.0.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from es-aces) (1.17.1)\n", + "Requirement already satisfied: bigtree==0.18.* in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from es-aces) (0.18.3)\n", + "Requirement already satisfied: ruamel.yaml==0.18.* in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from es-aces) (0.18.6)\n", + "Requirement already satisfied: loguru==0.7.* in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from es-aces) (0.7.3)\n", + "Requirement already satisfied: hydra-core==1.3.* in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from es-aces) (1.3.2)\n", + "Requirement already satisfied: pytimeparse==1.1.* in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from es-aces) (1.1.8)\n", + "Requirement already satisfied: networkx==3.3.* in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from es-aces) (3.3)\n", + "Requirement already satisfied: pyarrow==17.* in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from es-aces) (17.0.0)\n", + "Requirement already satisfied: meds==0.3.3 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from es-aces) (0.3.3)\n", + "Requirement already satisfied: omegaconf<2.4,>=2.2 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from hydra-core==1.3.*->es-aces) (2.3.0)\n", + "Requirement already satisfied: antlr4-python3-runtime==4.9.* in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from hydra-core==1.3.*->es-aces) (4.9.3)\n", + "Requirement already satisfied: packaging in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from hydra-core==1.3.*->es-aces) (24.2)\n", + "Requirement already satisfied: jsonschema>=4.0.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from meds==0.3.3->es-aces) (4.23.0)\n", + "Requirement already satisfied: typing-extensions>=4.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from meds==0.3.3->es-aces) (4.12.2)\n", + "Requirement already satisfied: numpy>=1.16.6 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from pyarrow==17.*->es-aces) (2.2.0)\n", + "Requirement already satisfied: ruamel.yaml.clib>=0.2.7 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from ruamel.yaml==0.18.*->es-aces) (0.2.12)\n", + "Requirement already satisfied: attrs>=22.2.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds==0.3.3->es-aces) (24.2.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds==0.3.3->es-aces) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds==0.3.3->es-aces) (0.35.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds==0.3.3->es-aces) (0.22.3)\n", + "Requirement already satisfied: PyYAML>=5.1.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from omegaconf<2.4,>=2.2->hydra-core==1.3.*->es-aces) (6.0.2)\n", + "Obtaining file:///storage/nassim/projects/MEDS-DEV\n", + " Installing build dependencies ... \u001b[?25ldone\n", + "\u001b[?25h Checking if build backend supports build_editable ... \u001b[?25ldone\n", + "\u001b[?25h Getting requirements to build editable ... \u001b[?25ldone\n", + "\u001b[?25h Preparing editable metadata (pyproject.toml) ... \u001b[?25ldone\n", + "\u001b[?25hRequirement already satisfied: meds>=0.3.3 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from MEDS_DEV==0.1.dev159+g685a3e6) (0.3.3)\n", + "Requirement already satisfied: es-aces>=0.5.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from MEDS_DEV==0.1.dev159+g685a3e6) (0.6.1)\n", + "Requirement already satisfied: polars<=1.17.1,>=1.0.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from es-aces>=0.5.0->MEDS_DEV==0.1.dev159+g685a3e6) (1.17.1)\n", + "Requirement already satisfied: bigtree==0.18.* in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from es-aces>=0.5.0->MEDS_DEV==0.1.dev159+g685a3e6) (0.18.3)\n", + "Requirement already satisfied: ruamel.yaml==0.18.* in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from es-aces>=0.5.0->MEDS_DEV==0.1.dev159+g685a3e6) (0.18.6)\n", + "Requirement already satisfied: loguru==0.7.* in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from es-aces>=0.5.0->MEDS_DEV==0.1.dev159+g685a3e6) (0.7.3)\n", + "Requirement already satisfied: hydra-core==1.3.* in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from es-aces>=0.5.0->MEDS_DEV==0.1.dev159+g685a3e6) (1.3.2)\n", + "Requirement already satisfied: pytimeparse==1.1.* in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from es-aces>=0.5.0->MEDS_DEV==0.1.dev159+g685a3e6) (1.1.8)\n", + "Requirement already satisfied: networkx==3.3.* in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from es-aces>=0.5.0->MEDS_DEV==0.1.dev159+g685a3e6) (3.3)\n", + "Requirement already satisfied: pyarrow==17.* in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from es-aces>=0.5.0->MEDS_DEV==0.1.dev159+g685a3e6) (17.0.0)\n", + "Requirement already satisfied: jsonschema>=4.0.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from meds>=0.3.3->MEDS_DEV==0.1.dev159+g685a3e6) (4.23.0)\n", + "Requirement already satisfied: typing-extensions>=4.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from meds>=0.3.3->MEDS_DEV==0.1.dev159+g685a3e6) (4.12.2)\n", + "Requirement already satisfied: omegaconf<2.4,>=2.2 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from hydra-core==1.3.*->es-aces>=0.5.0->MEDS_DEV==0.1.dev159+g685a3e6) (2.3.0)\n", + "Requirement already satisfied: antlr4-python3-runtime==4.9.* in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from hydra-core==1.3.*->es-aces>=0.5.0->MEDS_DEV==0.1.dev159+g685a3e6) (4.9.3)\n", + "Requirement already satisfied: packaging in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from hydra-core==1.3.*->es-aces>=0.5.0->MEDS_DEV==0.1.dev159+g685a3e6) (24.2)\n", + "Requirement already satisfied: numpy>=1.16.6 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from pyarrow==17.*->es-aces>=0.5.0->MEDS_DEV==0.1.dev159+g685a3e6) (2.2.0)\n", + "Requirement already satisfied: ruamel.yaml.clib>=0.2.7 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from ruamel.yaml==0.18.*->es-aces>=0.5.0->MEDS_DEV==0.1.dev159+g685a3e6) (0.2.12)\n", + "Requirement already satisfied: attrs>=22.2.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds>=0.3.3->MEDS_DEV==0.1.dev159+g685a3e6) (24.2.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds>=0.3.3->MEDS_DEV==0.1.dev159+g685a3e6) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds>=0.3.3->MEDS_DEV==0.1.dev159+g685a3e6) (0.35.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds>=0.3.3->MEDS_DEV==0.1.dev159+g685a3e6) (0.22.3)\n", + "Requirement already satisfied: PyYAML>=5.1.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from omegaconf<2.4,>=2.2->hydra-core==1.3.*->es-aces>=0.5.0->MEDS_DEV==0.1.dev159+g685a3e6) (6.0.2)\n", + "Building wheels for collected packages: MEDS_DEV\n", + " Building editable for MEDS_DEV (pyproject.toml) ... \u001b[?25ldone\n", + "\u001b[?25h Created wheel for MEDS_DEV: filename=MEDS_DEV-0.1.dev159+g685a3e6-0.editable-py3-none-any.whl size=5217 sha256=02557ddafb9ef3654b379ce955ebc9b25abdf44395ec08f292260b0b8c05c422\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-2m4br255/wheels/f3/cd/e7/d6c66344946b3cec8631003801f957ed99735fb1b583af99ba\n", + "Successfully built MEDS_DEV\n", + "Installing collected packages: MEDS_DEV\n", + " Attempting uninstall: MEDS_DEV\n", + " Found existing installation: MEDS_DEV 0.1.dev159+g685a3e6\n", + " Uninstalling MEDS_DEV-0.1.dev159+g685a3e6:\n", + " Successfully uninstalled MEDS_DEV-0.1.dev159+g685a3e6\n", + "Successfully installed MEDS_DEV-0.1.dev159+g685a3e6\n" ] } ], @@ -54,19 +92,20 @@ "#@title Install ACES\n", "\n", "\n", - "!pip install es-aces" + "!pip install es-aces\n", + "!cd .. && pip install -e . # pip installs meds dev" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/\n" + "/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/\n" ] } ], @@ -86,19 +125,11 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 21, "metadata": { "id": "H6fqe217XDhi" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "TASK_DIR\n" - ] - } - ], + "outputs": [], "source": [ "# From the ACES documentation\n", "\n", @@ -154,7 +185,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 22, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -167,51 +198,51 @@ "name": "stdout", "output_type": "stream", "text": [ - "[2024-12-14 17:33:45,826][HYDRA] Launching 3 jobs locally\n", - "[2024-12-14 17:33:45,826][HYDRA] \t#0 : data=sharded data.standard=meds data.root=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data data.shard=held_out/0 cohort_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels cohort_name=los_in_hospital_first_48h config_path=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml\n", - "\u001b[32m2024-12-14 17:33:46.042\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml'\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.045\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.045\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.045\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.047\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", + "[2024-12-14 23:34:59,188][HYDRA] Launching 3 jobs locally\n", + "[2024-12-14 23:34:59,188][HYDRA] \t#0 : data=sharded data.standard=meds data.root=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data data.shard=held_out/0 cohort_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels cohort_name=los_in_hospital_first_48h config_path=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml\n", + "\u001b[32m2024-12-14 23:34:59.390\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.395\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", "standard: meds\n", "ts_format: '%m/%d/%Y %H:%M'\n", - "root: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data\n", + "root: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data\n", "shard: held_out/0\n", "path: ${data.root}/${data.shard}.parquet\n", "_prefix: /${data.shard}\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.047\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.074\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_admission'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.076\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_discharge'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.077\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.077\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.085\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.086\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.086\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.086\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", + "\u001b[32m2024-12-14 23:34:59.397\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.420\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.426\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_admission'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.430\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_discharge'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.431\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.432\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.450\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", "trigger\n", "┣━━ input.end\n", "┃ ┣━━ input.start\n", "┃ ┗━━ gap.end\n", "┗━━ target.end\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.090\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.091\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 12,159 rows as they failed to satisfy '1 <= hospital_admission <= None'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.092\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.110\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.155\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= hospital_admission <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.155\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 8 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.160\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.173\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 39 valid rows returned corresponding to 23 subjects.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.173\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'discharge_or_death' from window 'target'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.174\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.176\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", + "\u001b[32m2024-12-14 23:34:59.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 12,159 rows as they failed to satisfy '1 <= hospital_admission <= None'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.476\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.515\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.534\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= hospital_admission <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.534\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 8 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.544\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.568\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 39 valid rows returned corresponding to 23 subjects.\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.569\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'discharge_or_death' from window 'target'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.569\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.571\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", "If you need these columns, please comment on https://github.com/justin13601/ACES/issues/97\n", "Columns:\n", " - trigger\n", @@ -219,51 +250,51 @@ " - input.start_summary\n", " - gap.end_summary\n", " - target.end_summary\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.322\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.279571. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h/held_out/0.parquet'.\u001b[0m\n", - "[2024-12-14 17:33:46,323][HYDRA] \t#1 : data=sharded data.standard=meds data.root=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data data.shard=train/0 cohort_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels cohort_name=los_in_hospital_first_48h config_path=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml\n", - "\u001b[32m2024-12-14 17:33:46.391\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml'\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.395\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", + "\u001b[32m2024-12-14 23:34:59.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.244924. Results saved to '/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h/held_out/0.parquet'.\u001b[0m\n", + "[2024-12-14 23:34:59,636][HYDRA] \t#1 : data=sharded data.standard=meds data.root=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data data.shard=train/0 cohort_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels cohort_name=los_in_hospital_first_48h config_path=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml\n", + "\u001b[32m2024-12-14 23:34:59.723\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.726\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.727\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.727\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.728\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", "standard: meds\n", "ts_format: '%m/%d/%Y %H:%M'\n", - "root: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data\n", + "root: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data\n", "shard: train/0\n", "path: ${data.root}/${data.shard}.parquet\n", "_prefix: /${data.shard}\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.395\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.422\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.429\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_admission'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_discharge'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.450\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.452\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", + "\u001b[32m2024-12-14 23:34:59.728\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.752\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.766\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_admission'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.777\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_discharge'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.781\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.781\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.805\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.806\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.806\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.806\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.809\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", "trigger\n", "┣━━ input.end\n", "┃ ┣━━ input.start\n", "┃ ┗━━ gap.end\n", "┗━━ target.end\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.452\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.452\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.452\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.453\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 47,886 rows as they failed to satisfy '1 <= hospital_admission <= None'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.453\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.528\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.556\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= hospital_admission <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.556\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 18 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.595\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 145 valid rows returned corresponding to 46 subjects.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.595\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'discharge_or_death' from window 'target'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.595\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.597\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", + "\u001b[32m2024-12-14 23:34:59.809\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.809\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.809\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.810\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 47,886 rows as they failed to satisfy '1 <= hospital_admission <= None'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.810\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.862\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.945\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= hospital_admission <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:34:59.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 18 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.005\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.064\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 145 valid rows returned corresponding to 46 subjects.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.064\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'discharge_or_death' from window 'target'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.065\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.069\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", "If you need these columns, please comment on https://github.com/justin13601/ACES/issues/97\n", "Columns:\n", " - trigger\n", @@ -271,51 +302,51 @@ " - input.start_summary\n", " - gap.end_summary\n", " - target.end_summary\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.600\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.208261. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h/train/0.parquet'.\u001b[0m\n", - "[2024-12-14 17:33:46,600][HYDRA] \t#2 : data=sharded data.standard=meds data.root=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data data.shard=tuning/0 cohort_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels cohort_name=los_in_hospital_first_48h config_path=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml\n", - "\u001b[32m2024-12-14 17:33:46.670\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml'\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.674\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", + "\u001b[32m2024-12-14 23:35:00.075\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.351311. Results saved to '/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h/train/0.parquet'.\u001b[0m\n", + "[2024-12-14 23:35:00,076][HYDRA] \t#2 : data=sharded data.standard=meds data.root=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data data.shard=tuning/0 cohort_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels cohort_name=los_in_hospital_first_48h config_path=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml\n", + "\u001b[32m2024-12-14 23:35:00.168\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.172\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.172\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.173\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.174\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", "standard: meds\n", "ts_format: '%m/%d/%Y %H:%M'\n", - "root: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data\n", + "root: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/data\n", "shard: tuning/0\n", "path: ${data.root}/${data.shard}.parquet\n", "_prefix: /${data.shard}\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.674\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.680\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.683\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_admission'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.686\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_discharge'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.692\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.693\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.693\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.693\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.694\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", + "\u001b[32m2024-12-14 23:35:00.175\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.193\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.200\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_admission'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.206\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'hospital_discharge'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.211\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.211\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.230\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.231\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.231\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.231\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.235\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", "trigger\n", "┣━━ input.end\n", "┃ ┣━━ input.start\n", "┃ ┗━━ gap.end\n", "┗━━ target.end\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.694\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.694\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.694\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.695\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 22,999 rows as they failed to satisfy '1 <= hospital_admission <= None'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.695\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.713\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.742\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.760\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= hospital_admission <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.760\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 6 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.764\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.792\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 59 valid rows returned corresponding to 25 subjects.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.792\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'discharge_or_death' from window 'target'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.792\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.793\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", + "\u001b[32m2024-12-14 23:35:00.235\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.235\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.236\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.237\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 22,999 rows as they failed to satisfy '1 <= hospital_admission <= None'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.238\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.273\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.323\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.349\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= hospital_admission <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.349\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 6 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.359\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.390\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 59 valid rows returned corresponding to 25 subjects.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.390\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'discharge_or_death' from window 'target'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.390\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:00.392\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", "If you need these columns, please comment on https://github.com/justin13601/ACES/issues/97\n", "Columns:\n", " - trigger\n", @@ -323,7 +354,7 @@ " - input.start_summary\n", " - gap.end_summary\n", " - target.end_summary\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:46.796\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.125503. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h/tuning/0.parquet'.\u001b[0m\n" + "\u001b[32m2024-12-14 23:35:00.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.227448. Results saved to '/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/los_in_hospital_first_48h/tuning/0.parquet'.\u001b[0m\n" ] } ], @@ -333,7 +364,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 23, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -395,7 +426,7 @@ "└────────────┴───────────────────┴───────────────┴───────────────┴─────────────┴───────────────────┘" ] }, - "execution_count": 5, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -423,42 +454,22 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 24, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/sim/miniconda3/envs/dev/lib/python3.12/pty.py:95: RuntimeWarning: Using fork() can cause Polars to deadlock in the child process.\n", - "In addition, using fork() with Python in general is a recipe for mysterious\n", - "deadlocks and crashes.\n", - "\n", - "The most likely reason you are seeing this error is because you are using the\n", - "multiprocessing module on Linux, which uses fork() by default. This will be\n", - "fixed in Python 3.14. Until then, you want to use the \"spawn\" context instead.\n", - "\n", - "See https://docs.pola.rs/user-guide/misc/multiprocessing/ for details.\n", - "\n", - "If you really know what your doing, you can silence this warning with the warning module\n", - "or by setting POLARS_ALLOW_FORKING_THREAD=1.\n", - "\n", - " pid, fd = os.forkpty()\n" - ] - }, { "name": "stdout", "output_type": "stream", "text": [ - "Running task mortality/in_icu/first_24h on dataset MIMIC-IV with MEDS_ROOT_DIR=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds and SHARDS=held_out/0,train/0,tuning/0\n", - "[2024-12-14 17:33:48,126][HYDRA] Launching 3 jobs locally\n", - "[2024-12-14 17:33:48,126][HYDRA] \t#0 : data.shard=held_out/0\n", - "\u001b[32m2024-12-14 17:33:48.285\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/tasks/criteria/mortality/in_icu/first_24h.yaml'\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.286\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m151\u001b[0m - \u001b[1mOverriding predicates and/or demographics from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/datasets/MIMIC-IV/predicates.yaml'\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.304\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.304\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.304\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.306\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", + "Running task mortality/in_icu/first_24h on dataset MIMIC-IV with MEDS_ROOT_DIR=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds and SHARDS=held_out/0,train/0,tuning/0\n", + "[2024-12-14 23:35:02,269][HYDRA] Launching 3 jobs locally\n", + "[2024-12-14 23:35:02,269][HYDRA] \t#0 : data.shard=held_out/0\n", + "\u001b[32m2024-12-14 23:35:02.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/storage/nassim/projects/MEDS-DEV/src/MEDS_DEV/tasks/criteria/mortality/in_icu/first_24h.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.473\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m151\u001b[0m - \u001b[1mOverriding predicates and/or demographics from '/storage/nassim/projects/MEDS-DEV/src/MEDS_DEV/datasets/MIMIC-IV/predicates.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.494\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.494\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.494\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.496\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", "standard: meds\n", "ts_format: '%m/%d/%Y %H:%M'\n", "root: ${oc.env:MEDS_ROOT_DIR}/data\n", @@ -466,37 +477,37 @@ "path: ${data.root}/${data.shard}.parquet\n", "_prefix: /${data.shard}\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.306\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.323\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.332\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_admission'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.335\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_discharge'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.336\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.336\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.344\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.346\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.346\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.346\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.348\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", + "\u001b[32m2024-12-14 23:35:02.497\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.521\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.529\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_admission'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.533\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_discharge'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.535\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.535\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.552\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.552\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.552\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.552\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.555\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", "trigger\n", "┣━━ input.end\n", "┃ ┗━━ input.start\n", "┗━━ gap.end\n", " ┗━━ target.end\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.348\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.348\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.349\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.350\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 12,173 rows as they failed to satisfy '1 <= icu_admission <= None'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.351\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.364\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= icu_admission <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 17 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.406\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.429\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 16 valid rows returned corresponding to 11 subjects.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.429\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'death' from window 'target'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.429\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.433\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", + "\u001b[32m2024-12-14 23:35:02.555\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.555\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.555\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.556\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 12,173 rows as they failed to satisfy '1 <= icu_admission <= None'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.556\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.577\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.621\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.641\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= icu_admission <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 17 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.643\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 16 valid rows returned corresponding to 11 subjects.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'death' from window 'target'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.690\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", "If you need these columns, please comment on https://github.com/justin13601/ACES/issues/97\n", "Columns:\n", " - trigger\n", @@ -504,14 +515,14 @@ " - input.start_summary\n", " - gap.end_summary\n", " - target.end_summary\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.516\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.231224. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/mortality/in_icu/first_24h/held_out/0.parquet'.\u001b[0m\n", - "[2024-12-14 17:33:48,517][HYDRA] \t#1 : data.shard=train/0\n", - "\u001b[32m2024-12-14 17:33:48.590\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/tasks/criteria/mortality/in_icu/first_24h.yaml'\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.592\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m151\u001b[0m - \u001b[1mOverriding predicates and/or demographics from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/datasets/MIMIC-IV/predicates.yaml'\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.607\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.607\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.607\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.608\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", + "\u001b[32m2024-12-14 23:35:02.730\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.259441. Results saved to '/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/mortality/in_icu/first_24h/held_out/0.parquet'.\u001b[0m\n", + "[2024-12-14 23:35:02,731][HYDRA] \t#1 : data.shard=train/0\n", + "\u001b[32m2024-12-14 23:35:02.821\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/storage/nassim/projects/MEDS-DEV/src/MEDS_DEV/tasks/criteria/mortality/in_icu/first_24h.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.822\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m151\u001b[0m - \u001b[1mOverriding predicates and/or demographics from '/storage/nassim/projects/MEDS-DEV/src/MEDS_DEV/datasets/MIMIC-IV/predicates.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.841\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.842\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.842\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.843\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", "standard: meds\n", "ts_format: '%m/%d/%Y %H:%M'\n", "root: ${oc.env:MEDS_ROOT_DIR}/data\n", @@ -519,37 +530,37 @@ "path: ${data.root}/${data.shard}.parquet\n", "_prefix: /${data.shard}\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.609\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.625\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.636\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_admission'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.646\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_discharge'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.659\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.659\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.659\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.659\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", + "\u001b[32m2024-12-14 23:35:02.843\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.868\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.885\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_admission'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.900\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_discharge'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.931\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.931\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.932\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.932\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.935\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", "trigger\n", "┣━━ input.end\n", "┃ ┗━━ input.start\n", "┗━━ gap.end\n", " ┗━━ target.end\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 47,976 rows as they failed to satisfy '1 <= icu_admission <= None'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.688\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.778\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.805\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 2 rows as they failed to satisfy 'None <= icu_admission <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.806\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 37 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.806\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.852\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 36 valid rows returned corresponding to 27 subjects.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.852\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'death' from window 'target'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.852\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.854\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", + "\u001b[32m2024-12-14 23:35:02.935\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.935\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.936\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.936\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 47,976 rows as they failed to satisfy '1 <= icu_admission <= None'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.937\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:02.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.080\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 2 rows as they failed to satisfy 'None <= icu_admission <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 37 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.138\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.224\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 36 valid rows returned corresponding to 27 subjects.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.224\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'death' from window 'target'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.224\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.227\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", "If you need these columns, please comment on https://github.com/justin13601/ACES/issues/97\n", "Columns:\n", " - trigger\n", @@ -557,14 +568,14 @@ " - input.start_summary\n", " - gap.end_summary\n", " - target.end_summary\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.858\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.267562. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/mortality/in_icu/first_24h/train/0.parquet'.\u001b[0m\n", - "[2024-12-14 17:33:48,859][HYDRA] \t#2 : data.shard=tuning/0\n", - "\u001b[32m2024-12-14 17:33:48.931\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/tasks/criteria/mortality/in_icu/first_24h.yaml'\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.932\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m151\u001b[0m - \u001b[1mOverriding predicates and/or demographics from '/Users/sim/Documents/projects/MEDS-DEV/src/MEDS_DEV/datasets/MIMIC-IV/predicates.yaml'\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.947\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.948\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.948\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.949\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", + "\u001b[32m2024-12-14 23:35:03.231\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.409892. Results saved to '/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/mortality/in_icu/first_24h/train/0.parquet'.\u001b[0m\n", + "[2024-12-14 23:35:03,232][HYDRA] \t#2 : data.shard=tuning/0\n", + "\u001b[32m2024-12-14 23:35:03.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mLoading config from '/storage/nassim/projects/MEDS-DEV/src/MEDS_DEV/tasks/criteria/mortality/in_icu/first_24h.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.323\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m151\u001b[0m - \u001b[1mOverriding predicates and/or demographics from '/storage/nassim/projects/MEDS-DEV/src/MEDS_DEV/datasets/MIMIC-IV/predicates.yaml'\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1341\u001b[0m - \u001b[1mParsing windows...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.343\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1350\u001b[0m - \u001b[1mParsing trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.343\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.config\u001b[0m:\u001b[36mload\u001b[0m:\u001b[36m1392\u001b[0m - \u001b[1mParsing predicates...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.344\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m159\u001b[0m - \u001b[1mAttempting to get predicates dataframe given:\n", "standard: meds\n", "ts_format: '%m/%d/%Y %H:%M'\n", "root: ${oc.env:MEDS_ROOT_DIR}/data\n", @@ -572,37 +583,37 @@ "path: ${data.root}/${data.shard}.parquet\n", "_prefix: /${data.shard}\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.949\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.953\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_admission'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.963\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_discharge'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.963\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.963\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.968\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.969\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.969\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.969\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.970\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", + "\u001b[32m2024-12-14 23:35:03.345\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m269\u001b[0m - \u001b[1mLoading MEDS data...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.360\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m273\u001b[0m - \u001b[1mGenerating plain predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.369\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_admission'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.375\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'icu_discharge'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.377\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m277\u001b[0m - \u001b[1mAdded predicate column 'death'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.377\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mgenerate_plain_predicates_from_meds\u001b[0m:\u001b[36m280\u001b[0m - \u001b[1mCleaning up predicates dataframe...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.395\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m703\u001b[0m - \u001b[1mLoaded plain predicates. Generating derived predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.395\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m717\u001b[0m - \u001b[1mAdded predicate column 'discharge_or_death'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.395\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.predicates\u001b[0m:\u001b[36mget_predicates_df\u001b[0m:\u001b[36m724\u001b[0m - \u001b[1mGenerating special predicate columns...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.395\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m76\u001b[0m - \u001b[1mChecking if '(subject_id, timestamp)' columns are unique...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.utils\u001b[0m:\u001b[36mlog_tree\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1m\n", "trigger\n", "┣━━ input.end\n", "┃ ┗━━ input.start\n", "┗━━ gap.end\n", " ┗━━ target.end\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.970\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.970\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.970\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.971\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 23,030 rows as they failed to satisfy '1 <= icu_admission <= None'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.971\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:48.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:49.018\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:49.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= icu_admission <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:49.049\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 12 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:49.049\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:49.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 22 valid rows returned corresponding to 18 subjects.\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:49.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'death' from window 'target'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:49.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:49.085\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", + "\u001b[32m2024-12-14 23:35:03.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m85\u001b[0m - \u001b[1mBeginning query...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m92\u001b[0m - \u001b[1mNo static variable criteria specified, removing all rows with null timestamps...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.399\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mIdentifying possible trigger nodes based on the specified trigger event...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.399\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 23,030 rows as they failed to satisfy '1 <= icu_admission <= None'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.399\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.427\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'input.start'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'gap.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.511\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 0 rows as they failed to satisfy 'None <= icu_admission <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.511\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.constraints\u001b[0m:\u001b[36mcheck_constraints\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mExcluding 12 rows as they failed to satisfy 'None <= discharge_or_death <= 0'.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.512\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.extract_subtree\u001b[0m:\u001b[36mextract_subtree\u001b[0m:\u001b[36m252\u001b[0m - \u001b[1mSummarizing subtree rooted at 'target.end'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.566\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mDone. 22 valid rows returned corresponding to 18 subjects.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.566\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m129\u001b[0m - \u001b[1mExtracting label 'death' from window 'target'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.566\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.query\u001b[0m:\u001b[36mquery\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mSetting index timestamp as 'end' of window 'input'...\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:03.569\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mget_and_validate_label_schema\u001b[0m:\u001b[36m114\u001b[0m - \u001b[33m\u001b[1mOutput contains columns that are not valid MEDS label columns. For now, we are dropping them.\n", "If you need these columns, please comment on https://github.com/justin13601/ACES/issues/97\n", "Columns:\n", " - trigger\n", @@ -610,7 +621,7 @@ " - input.start_summary\n", " - gap.end_summary\n", " - target.end_summary\u001b[0m\n", - "\u001b[32m2024-12-14 17:33:49.089\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.157569. Results saved to '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/mortality/in_icu/first_24h/tuning/0.parquet'.\u001b[0m\n" + "\u001b[32m2024-12-14 23:35:03.574\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36maces.__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m191\u001b[0m - \u001b[1mCompleted in 0:00:00.252108. Results saved to '/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/mortality/in_icu/first_24h/tuning/0.parquet'.\u001b[0m\n" ] } ], @@ -621,14 +632,14 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/mortality/in_icu/first_24h/**/*.parquet\n" + "/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds/task_labels/mortality/in_icu/first_24h/**/*.parquet\n" ] } ], @@ -638,7 +649,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 26, "metadata": {}, "outputs": [ { @@ -693,7 +704,7 @@ "└────────────┴───────────────────┴───────────────┴───────────────┴─────────────┴───────────────────┘" ] }, - "execution_count": 8, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -726,7 +737,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "dev", + "display_name": "meds_demo", "language": "python", "name": "python3" }, diff --git a/demo/extract_meds_data.ipynb b/demo/extract_meds_data.ipynb index ac77546..f65f39d 100644 --- a/demo/extract_meds_data.ipynb +++ b/demo/extract_meds_data.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 36, + "execution_count": 1, "metadata": { "id": "ikPVQZOnPcI0" }, @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 2, "metadata": { "collapsed": true, "id": "rjqK4CuRPfnE" @@ -24,7 +24,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/\n" + "/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/\n" ] } ], @@ -43,7 +43,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -52,7 +52,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -65,15 +65,15 @@ "name": "stdout", "output_type": "stream", "text": [ - "Cloning into '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tmp'...\n", + "Cloning into '/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tmp'...\n", "remote: Enumerating objects: 144, done.\u001b[K\n", "remote: Counting objects: 100% (144/144), done.\u001b[K\n", "remote: Compressing objects: 100% (129/129), done.\u001b[K\n", "remote: Total 144 (delta 22), reused 70 (delta 7), pack-reused 0 (from 0)\u001b[K\n", - "Receiving objects: 100% (144/144), 211.41 KiB | 571.00 KiB/s, done.\n", + "Receiving objects: 100% (144/144), 211.41 KiB | 7.05 MiB/s, done.\n", "Resolving deltas: 100% (22/22), done.\n", - "usage: cp [-R [-H | -L | -P]] [-fi | -n] [-aclpSsvXx] source_file target_file\n", - " cp [-R [-H | -L | -P]] [-fi | -n] [-aclpSsvXx] source_file ... target_directory\n" + "cp: missing destination file operand after '/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//MIMIC-IV_Example/'\n", + "Try 'cp --help' for more information.\n" ] } ], @@ -87,7 +87,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -100,225 +100,225 @@ "name": "stdout", "output_type": "stream", "text": [ - "--2024-12-14 17:15:14-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/d_labitems_to_loinc.csv\n", + "--2024-12-14 23:31:59-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/d_labitems_to_loinc.csv\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 361048 (353K) [text/plain]\n", - "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/d_labitems_to_loinc.csv’\n", + "Saving to: ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/d_labitems_to_loinc.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 352.59K 1.04MB/s in 0.3s \n", + "/storage/nassim/pro 100%[===================>] 352.59K --.-KB/s in 0.02s \n", "\n", - "2024-12-14 17:15:14 (1.04 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/d_labitems_to_loinc.csv’ saved [361048/361048]\n", + "2024-12-14 23:31:59 (18.7 MB/s) - ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/d_labitems_to_loinc.csv’ saved [361048/361048]\n", "\n", - "--2024-12-14 17:15:15-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/d_labitems_to_loinc.csv\n", + "--2024-12-14 23:32:00-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/d_labitems_to_loinc.csv\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 361048 (353K) [text/plain]\n", - "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//d_labitems_to_loinc.csv’\n", + "Saving to: ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//d_labitems_to_loinc.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 352.59K 561KB/s in 0.6s \n", + "/storage/nassim/pro 100%[===================>] 352.59K --.-KB/s in 0.02s \n", "\n", - "2024-12-14 17:15:15 (561 KB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//d_labitems_to_loinc.csv’ saved [361048/361048]\n", + "2024-12-14 23:32:00 (18.5 MB/s) - ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//d_labitems_to_loinc.csv’ saved [361048/361048]\n", "\n", - "--2024-12-14 17:15:16-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/inputevents_to_rxnorm.csv\n", + "--2024-12-14 23:32:01-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/inputevents_to_rxnorm.csv\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 79195 (77K) [text/plain]\n", - "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/inputevents_to_rxnorm.csv’\n", + "Saving to: ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/inputevents_to_rxnorm.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 77.34K 109KB/s in 0.7s \n", + "/storage/nassim/pro 100%[===================>] 77.34K --.-KB/s in 0.007s \n", "\n", - "2024-12-14 17:15:17 (109 KB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/inputevents_to_rxnorm.csv’ saved [79195/79195]\n", + "2024-12-14 23:32:01 (10.4 MB/s) - ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/inputevents_to_rxnorm.csv’ saved [79195/79195]\n", "\n", - "--2024-12-14 17:15:17-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/inputevents_to_rxnorm.csv\n", + "--2024-12-14 23:32:01-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/inputevents_to_rxnorm.csv\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 79195 (77K) [text/plain]\n", - "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//inputevents_to_rxnorm.csv’\n", + "Saving to: ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//inputevents_to_rxnorm.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 77.34K --.-KB/s in 0.009s \n", + "/storage/nassim/pro 100%[===================>] 77.34K --.-KB/s in 0.008s \n", "\n", - "2024-12-14 17:15:17 (8.59 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//inputevents_to_rxnorm.csv’ saved [79195/79195]\n", + "2024-12-14 23:32:01 (9.87 MB/s) - ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//inputevents_to_rxnorm.csv’ saved [79195/79195]\n", "\n", - "--2024-12-14 17:15:18-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/lab_itemid_to_loinc.csv\n", + "--2024-12-14 23:32:02-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/lab_itemid_to_loinc.csv\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 79970 (78K) [text/plain]\n", - "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/lab_itemid_to_loinc.csv’\n", + "Saving to: ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/lab_itemid_to_loinc.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 78.10K --.-KB/s in 0.08s \n", + "/storage/nassim/pro 100%[===================>] 78.10K --.-KB/s in 0.008s \n", "\n", - "2024-12-14 17:15:18 (986 KB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/lab_itemid_to_loinc.csv’ saved [79970/79970]\n", + "2024-12-14 23:32:02 (9.83 MB/s) - ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/lab_itemid_to_loinc.csv’ saved [79970/79970]\n", "\n", - "--2024-12-14 17:15:18-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/lab_itemid_to_loinc.csv\n", + "--2024-12-14 23:32:03-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/lab_itemid_to_loinc.csv\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 79970 (78K) [text/plain]\n", - "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//lab_itemid_to_loinc.csv’\n", + "Saving to: ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//lab_itemid_to_loinc.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 78.10K --.-KB/s in 0.01s \n", + "/storage/nassim/pro 100%[===================>] 78.10K --.-KB/s in 0.007s \n", "\n", - "2024-12-14 17:15:18 (5.33 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//lab_itemid_to_loinc.csv’ saved [79970/79970]\n", + "2024-12-14 23:32:03 (10.4 MB/s) - ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//lab_itemid_to_loinc.csv’ saved [79970/79970]\n", "\n", - "--2024-12-14 17:15:19-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/meas_chartevents_main.csv\n", + "--2024-12-14 23:32:04-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/meas_chartevents_main.csv\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 34862 (34K) [text/plain]\n", - "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/meas_chartevents_main.csv’\n", + "Saving to: ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/meas_chartevents_main.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 34.04K --.-KB/s in 0.003s \n", + "/storage/nassim/pro 100%[===================>] 34.04K --.-KB/s in 0.002s \n", "\n", - "2024-12-14 17:15:19 (9.53 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/meas_chartevents_main.csv’ saved [34862/34862]\n", + "2024-12-14 23:32:04 (22.0 MB/s) - ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/meas_chartevents_main.csv’ saved [34862/34862]\n", "\n", - "--2024-12-14 17:15:19-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/meas_chartevents_main.csv\n", + "--2024-12-14 23:32:05-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/meas_chartevents_main.csv\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 34862 (34K) [text/plain]\n", - "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//meas_chartevents_main.csv’\n", + "Saving to: ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//meas_chartevents_main.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 34.04K --.-KB/s in 0.004s \n", + "/storage/nassim/pro 100%[===================>] 34.04K --.-KB/s in 0.002s \n", "\n", - "2024-12-14 17:15:19 (7.46 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//meas_chartevents_main.csv’ saved [34862/34862]\n", + "2024-12-14 23:32:05 (16.8 MB/s) - ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//meas_chartevents_main.csv’ saved [34862/34862]\n", "\n", - "--2024-12-14 17:15:20-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/meas_chartevents_value.csv\n", + "--2024-12-14 23:32:06-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/meas_chartevents_value.csv\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 5902 (5.8K) [text/plain]\n", - "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/meas_chartevents_value.csv’\n", + "Saving to: ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/meas_chartevents_value.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 5.76K --.-KB/s in 0.002s \n", + "/storage/nassim/pro 100%[===================>] 5.76K --.-KB/s in 0s \n", "\n", - "2024-12-14 17:15:20 (3.64 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/meas_chartevents_value.csv’ saved [5902/5902]\n", + "2024-12-14 23:32:06 (28.1 MB/s) - ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/meas_chartevents_value.csv’ saved [5902/5902]\n", "\n", - "--2024-12-14 17:15:20-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/meas_chartevents_value.csv\n", + "--2024-12-14 23:32:06-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/meas_chartevents_value.csv\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 5902 (5.8K) [text/plain]\n", - "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//meas_chartevents_value.csv’\n", + "Saving to: ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//meas_chartevents_value.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 5.76K --.-KB/s in 0.001s \n", + "/storage/nassim/pro 100%[===================>] 5.76K --.-KB/s in 0s \n", "\n", - "2024-12-14 17:15:20 (3.79 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//meas_chartevents_value.csv’ saved [5902/5902]\n", + "2024-12-14 23:32:07 (28.3 MB/s) - ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//meas_chartevents_value.csv’ saved [5902/5902]\n", "\n", - "--2024-12-14 17:15:21-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/numerics-summary.csv\n", + "--2024-12-14 23:32:07-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/numerics-summary.csv\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 32353 (32K) [text/plain]\n", - "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/numerics-summary.csv’\n", + "Saving to: ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/numerics-summary.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 31.59K --.-KB/s in 0.002s \n", + "/storage/nassim/pro 100%[===================>] 31.59K --.-KB/s in 0.003s \n", "\n", - "2024-12-14 17:15:21 (12.5 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/numerics-summary.csv’ saved [32353/32353]\n", + "2024-12-14 23:32:07 (12.1 MB/s) - ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/numerics-summary.csv’ saved [32353/32353]\n", "\n", - "--2024-12-14 17:15:21-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/numerics-summary.csv\n", + "--2024-12-14 23:32:08-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/numerics-summary.csv\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 32353 (32K) [text/plain]\n", - "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//numerics-summary.csv’\n", + "Saving to: ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//numerics-summary.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 31.59K --.-KB/s in 0.004s \n", + "/storage/nassim/pro 100%[===================>] 31.59K --.-KB/s in 0.002s \n", "\n", - "2024-12-14 17:15:21 (7.26 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//numerics-summary.csv’ saved [32353/32353]\n", + "2024-12-14 23:32:08 (16.8 MB/s) - ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//numerics-summary.csv’ saved [32353/32353]\n", "\n", - "--2024-12-14 17:15:22-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/outputevents_to_loinc.csv\n", + "--2024-12-14 23:32:09-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/outputevents_to_loinc.csv\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 34008 (33K) [text/plain]\n", - "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/outputevents_to_loinc.csv’\n", + "Saving to: ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/outputevents_to_loinc.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 33.21K --.-KB/s in 0.006s \n", + "/storage/nassim/pro 100%[===================>] 33.21K --.-KB/s in 0.002s \n", "\n", - "2024-12-14 17:15:22 (5.46 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/outputevents_to_loinc.csv’ saved [34008/34008]\n", + "2024-12-14 23:32:09 (17.6 MB/s) - ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/outputevents_to_loinc.csv’ saved [34008/34008]\n", "\n", - "--2024-12-14 17:15:22-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/outputevents_to_loinc.csv\n", + "--2024-12-14 23:32:10-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/outputevents_to_loinc.csv\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 34008 (33K) [text/plain]\n", - "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//outputevents_to_loinc.csv’\n", + "Saving to: ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//outputevents_to_loinc.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 33.21K --.-KB/s in 0.004s \n", + "/storage/nassim/pro 100%[===================>] 33.21K --.-KB/s in 0.002s \n", "\n", - "2024-12-14 17:15:23 (9.19 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//outputevents_to_loinc.csv’ saved [34008/34008]\n", + "2024-12-14 23:32:10 (16.6 MB/s) - ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//outputevents_to_loinc.csv’ saved [34008/34008]\n", "\n", - "--2024-12-14 17:15:23-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/proc_datetimeevents.csv\n", + "--2024-12-14 23:32:11-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/proc_datetimeevents.csv\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 25205 (25K) [text/plain]\n", - "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/proc_datetimeevents.csv’\n", + "Saving to: ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/proc_datetimeevents.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 24.61K --.-KB/s in 0.002s \n", + "/storage/nassim/pro 100%[===================>] 24.61K --.-KB/s in 0.001s \n", "\n", - "2024-12-14 17:15:23 (10.2 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/proc_datetimeevents.csv’ saved [25205/25205]\n", + "2024-12-14 23:32:11 (31.5 MB/s) - ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/proc_datetimeevents.csv’ saved [25205/25205]\n", "\n", - "--2024-12-14 17:15:24-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/proc_datetimeevents.csv\n", + "--2024-12-14 23:32:12-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/proc_datetimeevents.csv\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 25205 (25K) [text/plain]\n", - "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//proc_datetimeevents.csv’\n", + "Saving to: ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//proc_datetimeevents.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 24.61K --.-KB/s in 0.004s \n", + "/storage/nassim/pro 100%[===================>] 24.61K --.-KB/s in 0.001s \n", "\n", - "2024-12-14 17:15:24 (6.67 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//proc_datetimeevents.csv’ saved [25205/25205]\n", + "2024-12-14 23:32:12 (22.3 MB/s) - ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//proc_datetimeevents.csv’ saved [25205/25205]\n", "\n", - "--2024-12-14 17:15:24-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/proc_itemid.csv\n", + "--2024-12-14 23:32:13-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/proc_itemid.csv\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 21414 (21K) [text/plain]\n", - "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/proc_itemid.csv’\n", + "Saving to: ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/proc_itemid.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 20.91K --.-KB/s in 0.003s \n", + "/storage/nassim/pro 100%[===================>] 20.91K --.-KB/s in 0.001s \n", "\n", - "2024-12-14 17:15:24 (8.16 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/proc_itemid.csv’ saved [21414/21414]\n", + "2024-12-14 23:32:13 (30.5 MB/s) - ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/proc_itemid.csv’ saved [21414/21414]\n", "\n", - "--2024-12-14 17:15:24-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/proc_itemid.csv\n", + "--2024-12-14 23:32:14-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/proc_itemid.csv\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 21414 (21K) [text/plain]\n", - "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//proc_itemid.csv’\n", + "Saving to: ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//proc_itemid.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 20.91K --.-KB/s in 0.002s \n", + "/storage/nassim/pro 100%[===================>] 20.91K --.-KB/s in 0.001s \n", "\n", - "2024-12-14 17:15:25 (8.29 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//proc_itemid.csv’ saved [21414/21414]\n", + "2024-12-14 23:32:14 (25.3 MB/s) - ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//proc_itemid.csv’ saved [21414/21414]\n", "\n", - "--2024-12-14 17:15:25-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/waveforms-summary.csv\n", + "--2024-12-14 23:32:15-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/waveforms-summary.csv\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 5743 (5.6K) [text/plain]\n", - "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/waveforms-summary.csv’\n", + "Saving to: ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/waveforms-summary.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 5.61K --.-KB/s in 0.001s \n", + "/storage/nassim/pro 100%[===================>] 5.61K --.-KB/s in 0s \n", "\n", - "2024-12-14 17:15:25 (4.44 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/waveforms-summary.csv’ saved [5743/5743]\n", + "2024-12-14 23:32:15 (27.8 MB/s) - ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/waveforms-summary.csv’ saved [5743/5743]\n", "\n", - "--2024-12-14 17:15:25-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/waveforms-summary.csv\n", + "--2024-12-14 23:32:16-- https://raw.githubusercontent.com/MIT-LCP/mimic-code/v2.4.0/mimic-iv/concepts/concept_map/waveforms-summary.csv\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 5743 (5.6K) [text/plain]\n", - "Saving to: ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//waveforms-summary.csv’\n", + "Saving to: ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//waveforms-summary.csv’\n", "\n", - "/Users/sim/Document 100%[===================>] 5.61K --.-KB/s in 0s \n", + "/storage/nassim/pro 100%[===================>] 5.61K --.-KB/s in 0s \n", "\n", - "2024-12-14 17:15:25 (53.7 MB/s) - ‘/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//waveforms-summary.csv’ saved [5743/5743]\n", + "2024-12-14 23:32:16 (28.1 MB/s) - ‘/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds//waveforms-summary.csv’ saved [5743/5743]\n", "\n" ] } @@ -351,7 +351,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -360,7 +360,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -373,100 +373,100 @@ "name": "stdout", "output_type": "stream", "text": [ - "/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//MIMIC-IV_Example\n", + "/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//MIMIC-IV_Example\n", "Unsetting SLURM_CPU_BIND in case you're running this on a slurm interactive node with slurm parallelism\n", "Setting DO_UNZIP=true\n", - "Unzipping csv.gz files matching /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/*/*.csv.gz.\n", + "Unzipping csv.gz files matching /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//raw_data/mimic-iv-demo/2.2/*/*.csv.gz.\n", "Running pre-MEDS conversion.\n", - "\u001b[32m2024-12-14 17:15:27.980\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/poe_detail.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.980\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/poe_detail: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/poe_detail.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/poe_detail.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.981\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/provider.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.981\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/provider: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/provider.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/provider.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.982\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.982\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/pharmacy: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.982\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m243\u001b[0m - \u001b[1mSkipping hosp/index @ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/index.html as no compatible dataframe file was found.\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.982\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/emar: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.983\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/microbiologyevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/microbiologyevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/microbiologyevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/microbiologyevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.984\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/labevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.984\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/admissions: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.985\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_labitems.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.985\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/d_labitems: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_labitems.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_labitems.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.985\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/prescriptions.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.986\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/prescriptions: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/prescriptions.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/prescriptions.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.986\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.987\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/procedures_icd: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.987\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/poe.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.987\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/poe: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/poe.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/poe.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.987\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_hcpcs.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/d_hcpcs: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_hcpcs.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_hcpcs.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.988\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/omr: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.989\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/transfers: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.989\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/diagnoses_icd.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.989\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/services.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/services: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/services.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/services.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.990\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/hcpcsevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.991\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/drgcodes.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.991\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/patients.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.991\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_icd_diagnoses.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.992\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_icd_procedures.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.992\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar_detail.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/emar_detail: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar_detail.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar_detail.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m243\u001b[0m - \u001b[1mSkipping icu/index @ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/index.html as no compatible dataframe file was found.\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.993\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/d_items.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.994\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/d_items: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/d_items.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/d_items.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.994\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.994\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/procedureevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.995\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/inputevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.995\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/datetimeevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/datetimeevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/datetimeevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/datetimeevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.996\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/ingredientevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.996\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/ingredientevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/ingredientevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/ingredientevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.996\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/chartevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.997\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/caregiver.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/caregiver: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/caregiver.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/caregiver.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.998\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.998\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/outputevents: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.998\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.998\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/icustays: Symlinking /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.999\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:27.999\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m300\u001b[0m - \u001b[1mLoading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv for manipulating other dataframes...\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:28.000\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m305\u001b[0m - \u001b[1m Loaded in 0:00:00.001182\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:28.000\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m311\u001b[0m - \u001b[1m Processing dependent df @ hosp/patients...\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:28.000\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1m Loading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/patients.csv...\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:28.000\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m317\u001b[0m - \u001b[1m Loaded in 0:00:00.000155\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:28.062\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m320\u001b[0m - \u001b[1m Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet in 0:00:00.061956\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:28.062\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m311\u001b[0m - \u001b[1m Processing dependent df @ hosp/drgcodes...\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:28.062\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1m Loading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/drgcodes.csv...\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:28.063\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m317\u001b[0m - \u001b[1m Loaded in 0:00:00.000196\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:28.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m320\u001b[0m - \u001b[1m Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet in 0:00:00.004755\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:28.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m311\u001b[0m - \u001b[1m Processing dependent df @ hosp/diagnoses_icd...\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:28.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1m Loading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/diagnoses_icd.csv...\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:28.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m317\u001b[0m - \u001b[1m Loaded in 0:00:00.000183\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:28.073\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m320\u001b[0m - \u001b[1m Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet in 0:00:00.005889\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:28.073\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_icd_diagnoses.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:28.073\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m334\u001b[0m - \u001b[1mProcessing hosp/d_icd_diagnoses...\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:28.161\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1m Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet in 0:00:00.087207\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:28.161\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_icd_procedures.csv\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:28.161\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m334\u001b[0m - \u001b[1mProcessing hosp/d_icd_procedures...\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:28.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1m Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet in 0:00:00.053073\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:28.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mDone! All dataframes processed and written to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.925\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar_detail.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.926\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/emar_detail: Symlinking /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar_detail.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar_detail.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.927\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.927\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/pharmacy: Symlinking /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.928\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/drgcodes.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.928\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/provider.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.929\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/provider: Symlinking /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/provider.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/provider.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.929\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_icd_diagnoses.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.930\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.930\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/omr: Symlinking /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.931\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_icd_procedures.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.931\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/microbiologyevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.932\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/microbiologyevents: Symlinking /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/microbiologyevents.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/microbiologyevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.932\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/services.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.933\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/services: Symlinking /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/services.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/services.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.933\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m243\u001b[0m - \u001b[1mSkipping hosp/index @ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/index.html as no compatible dataframe file was found.\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.934\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.934\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/labevents: Symlinking /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.935\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_hcpcs.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.935\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/d_hcpcs: Symlinking /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_hcpcs.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_hcpcs.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.936\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/poe.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.936\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/poe: Symlinking /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/poe.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/poe.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.937\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_labitems.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.937\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/d_labitems: Symlinking /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_labitems.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_labitems.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.938\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/prescriptions.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.938\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/prescriptions: Symlinking /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/prescriptions.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/prescriptions.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.939\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.939\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/hcpcsevents: Symlinking /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.940\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/diagnoses_icd.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.940\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.941\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/emar: Symlinking /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.941\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/poe_detail.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.942\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/poe_detail: Symlinking /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/poe_detail.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/poe_detail.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.942\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/patients.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.943\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.943\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/procedures_icd: Symlinking /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.944\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.944\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/transfers: Symlinking /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.945\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.945\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for hosp/admissions: Symlinking /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.946\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/d_items.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.946\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/d_items: Symlinking /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/d_items.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/d_items.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.947\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.947\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/procedureevents: Symlinking /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.948\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m243\u001b[0m - \u001b[1mSkipping icu/index @ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/index.html as no compatible dataframe file was found.\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.948\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.949\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/outputevents: Symlinking /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.949\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/datetimeevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/datetimeevents: Symlinking /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/datetimeevents.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/datetimeevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.950\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.951\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/chartevents: Symlinking /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.951\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.952\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/inputevents: Symlinking /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.952\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/caregiver.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.953\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/caregiver: Symlinking /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/caregiver.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/caregiver.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.953\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/ingredientevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.954\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/ingredientevents: Symlinking /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/ingredientevents.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/ingredientevents.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.954\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.955\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mNo function needed for icu/icustays: Symlinking /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.955\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.955\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m300\u001b[0m - \u001b[1mLoading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv for manipulating other dataframes...\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.956\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m305\u001b[0m - \u001b[1m Loaded in 0:00:00.001028\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.957\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m311\u001b[0m - \u001b[1m Processing dependent df @ hosp/patients...\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.957\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1m Loading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/patients.csv...\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:19.957\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m317\u001b[0m - \u001b[1m Loaded in 0:00:00.000307\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:20.015\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m320\u001b[0m - \u001b[1m Processed and wrote to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet in 0:00:00.058129\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:20.015\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m311\u001b[0m - \u001b[1m Processing dependent df @ hosp/diagnoses_icd...\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:20.015\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1m Loading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/diagnoses_icd.csv...\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:20.016\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m317\u001b[0m - \u001b[1m Loaded in 0:00:00.000431\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:20.062\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m320\u001b[0m - \u001b[1m Processed and wrote to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet in 0:00:00.046532\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:20.062\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m311\u001b[0m - \u001b[1m Processing dependent df @ hosp/drgcodes...\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:20.062\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1m Loading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/drgcodes.csv...\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:20.063\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m317\u001b[0m - \u001b[1m Loaded in 0:00:00.000474\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:20.115\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m320\u001b[0m - \u001b[1m Processed and wrote to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet in 0:00:00.052418\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:20.115\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_icd_diagnoses.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:20.115\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m334\u001b[0m - \u001b[1mProcessing hosp/d_icd_diagnoses...\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:20.411\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1m Processed and wrote to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet in 0:00:00.295349\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:20.411\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_transforms.extract.utils\u001b[0m:\u001b[36mget_supported_fp\u001b[0m:\u001b[36m126\u001b[0m - \u001b[34m\u001b[1mFound file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_icd_procedures.csv\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:20.411\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m334\u001b[0m - \u001b[1mProcessing hosp/d_icd_procedures...\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:20.723\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1m Processed and wrote to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet in 0:00:00.311290\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:20.723\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mDone! All dataframes processed and written to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds\u001b[0m\n", "Setting N_WORKERS to 1 to avoid issues with the runners.\n", "Running extraction pipeline.\n", - "\u001b[32m2024-12-14 17:15:28.756\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: shard_events\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:28.775\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-shard_events --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=shard_events\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:31.395\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 23:32:21.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: shard_events\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:21.437\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-shard_events --config-dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=shard_events\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:27.581\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:31.395\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 17:15:29.323 | INFO | MEDS_transforms.extract.shard_events:main:330 - Running with config:\n", + "\u001b[32m2024-12-14 23:32:27.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 23:32:22.094 | INFO | MEDS_transforms.extract.shard_events:main:330 - Running with config:\n", "input_dir: ${oc.env:MIMICIV_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:MIMICIV_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -540,374 +540,374 @@ "Stage config:\n", "row_chunksize: 200000000\n", "infer_schema_length: 999999999\n", - "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds/\n", + "data_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//pre_meds/\n", "is_metadata: false\n", - "metadata_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/metadata\n", - "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events\n", + "metadata_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/metadata\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events\n", "reducer_output_dir: null\n", "\n", - "2024-12-14 17:15:29.328 | INFO | MEDS_transforms.extract.shard_events:main:342 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs/event_configs.yaml to identify needed columns.\n", - "2024-12-14 17:15:29.358 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet as it is not specified in the event conversion configuration.\n", - "2024-12-14 17:15:29.358 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet as it is not specified in the event conversion configuration.\n", - "2024-12-14 17:15:29.359 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/ingredientevents.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 17:15:29.360 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/prescriptions.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 17:15:29.360 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_datetimeevents.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 17:15:29.360 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_labitems.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 17:15:29.361 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/numerics-summary.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 17:15:29.361 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/lab_itemid_to_loinc.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 17:15:29.361 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/microbiologyevents.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 17:15:29.361 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_itemid.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 17:15:29.361 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/provider.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 17:15:29.361 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/datetimeevents.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 17:15:29.361 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/waveforms-summary.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 17:15:29.361 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/meas_chartevents_value.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 17:15:29.362 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/services.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 17:15:29.362 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/outputevents_to_loinc.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 17:15:29.362 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/d_items.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 17:15:29.362 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/poe.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 17:15:29.362 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/poe_detail.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 17:15:29.362 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/meas_chartevents_main.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 17:15:29.362 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/d_labitems_to_loinc.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 17:15:29.363 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/caregiver.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 17:15:29.363 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar_detail.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 17:15:29.363 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/inputevents_to_rxnorm.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 17:15:29.363 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_hcpcs.csv as it is not specified in the event conversion configuration.\n", - "2024-12-14 17:15:29.364 | INFO | MEDS_transforms.extract.shard_events:main:368 - Starting event sub-sharding. Sub-sharding 16 files:\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv\n", - "2024-12-14 17:15:29.366 | INFO | MEDS_transforms.extract.shard_events:main:372 - Will read raw data from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/$IN_FILE.parquet and write sub-sharded data to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/$IN_FILE/$ROW_START-$ROW_END.parquet\n", - "2024-12-14 17:15:29.369 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients.\n", - "2024-12-14 17:15:29.369 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet to determine row count.\n", - "2024-12-14 17:15:29.371 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:15:29.371 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet as Parquet with kwargs:\n", + "2024-12-14 23:32:22.100 | INFO | MEDS_transforms.extract.shard_events:main:342 - Reading event conversion config from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs/event_configs.yaml to identify needed columns.\n", + "2024-12-14 23:32:22.143 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet as it is not specified in the event conversion configuration.\n", + "2024-12-14 23:32:22.143 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet as it is not specified in the event conversion configuration.\n", + "2024-12-14 23:32:22.144 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/services.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 23:32:22.144 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/d_items.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 23:32:22.144 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/microbiologyevents.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 23:32:22.145 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/poe_detail.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 23:32:22.145 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/meas_chartevents_main.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 23:32:22.145 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/datetimeevents.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 23:32:22.146 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar_detail.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 23:32:22.146 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/poe.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 23:32:22.147 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/ingredientevents.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 23:32:22.147 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/lab_itemid_to_loinc.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 23:32:22.147 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/meas_chartevents_value.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 23:32:22.147 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/waveforms-summary.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 23:32:22.148 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_datetimeevents.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 23:32:22.148 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_labitems.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 23:32:22.148 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/caregiver.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 23:32:22.148 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/inputevents_to_rxnorm.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 23:32:22.149 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/outputevents_to_loinc.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 23:32:22.149 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/prescriptions.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 23:32:22.149 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/provider.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 23:32:22.149 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_hcpcs.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 23:32:22.149 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_itemid.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 23:32:22.150 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/numerics-summary.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 23:32:22.150 | WARNING | MEDS_transforms.extract.shard_events:main:356 - Skipping /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/d_labitems_to_loinc.csv as it is not specified in the event conversion configuration.\n", + "2024-12-14 23:32:22.152 | INFO | MEDS_transforms.extract.shard_events:main:368 - Starting event sub-sharding. Sub-sharding 16 files:\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv\n", + "2024-12-14 23:32:22.155 | INFO | MEDS_transforms.extract.shard_events:main:372 - Will read raw data from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/$IN_FILE.parquet and write sub-sharded data to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/$IN_FILE/$ROW_START-$ROW_END.parquet\n", + "2024-12-14 23:32:22.159 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients.\n", + "2024-12-14 23:32:22.159 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet to determine row count.\n", + "2024-12-14 23:32:22.162 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:32:22.162 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:15:29.372 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['dod', 'gender', 'subject_id', 'year_of_birth']\n", - "2024-12-14 17:15:29.378 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, dod, gender, subject_id, year_of_birth\n", - "2024-12-14 17:15:29.384 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 100 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet.\n", - "2024-12-14 17:15:29.384 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:15:29.384 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet row-chunk [0-100) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet.\n", - "2024-12-14 17:15:29.387 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:29.387587. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:29.389 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet\n", - "2024-12-14 17:15:29.389 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:15:29.389 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet as Parquet with kwargs:\n", + "2024-12-14 23:32:22.163 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['dod', 'gender', 'subject_id', 'year_of_birth']\n", + "2024-12-14 23:32:22.165 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, dod, gender, subject_id, year_of_birth\n", + "2024-12-14 23:32:22.176 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 100 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet.\n", + "2024-12-14 23:32:22.176 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:32:22.176 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet row-chunk [0-100) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet.\n", + "2024-12-14 23:32:22.180 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:22.180275. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:22.182 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet\n", + "2024-12-14 23:32:22.182 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:32:22.182 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/patients.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:15:29.389 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['dod', 'gender', 'subject_id', 'year_of_birth']\n", - "2024-12-14 17:15:29.389 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, dod, gender, subject_id, year_of_birth\n", - "2024-12-14 17:15:29.389 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:29.390 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", - "2024-12-14 17:15:29.398 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.011062\n", - "2024-12-14 17:15:29.398 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/.[0-100).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/.[0-100).parquet_cache/locks/2024-12-14T17:15:29.387587.json\n", - "2024-12-14 17:15:29.401 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes.\n", - "2024-12-14 17:15:29.401 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet to determine row count.\n", - "2024-12-14 17:15:29.403 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:15:29.403 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet as Parquet with kwargs:\n", + "2024-12-14 23:32:22.182 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['dod', 'gender', 'subject_id', 'year_of_birth']\n", + "2024-12-14 23:32:22.182 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, dod, gender, subject_id, year_of_birth\n", + "2024-12-14 23:32:22.182 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:22.183 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", + "2024-12-14 23:32:22.190 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010485\n", + "2024-12-14 23:32:22.191 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/.[0-100).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/.[0-100).parquet_cache/locks/2024-12-14T23:32:22.180275.json\n", + "2024-12-14 23:32:22.199 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd.\n", + "2024-12-14 23:32:22.199 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet to determine row count.\n", + "2024-12-14 23:32:22.204 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:32:22.204 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:15:29.404 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['description', 'drg_code', 'drg_mortality', 'drg_severity', 'drg_type', 'hadm_discharge_time', 'hadm_id', 'subject_id']\n", - "2024-12-14 17:15:29.404 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, description, drg_code, drg_mortality, drg_severity, drg_type, hadm_discharge_time, hadm_id, subject_id\n", - "2024-12-14 17:15:29.404 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 454 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet.\n", - "2024-12-14 17:15:29.404 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:15:29.404 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet row-chunk [0-454) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet.\n", - "2024-12-14 17:15:29.407 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:29.407233. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:29.407 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet\n", - "2024-12-14 17:15:29.407 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:15:29.407 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet as Parquet with kwargs:\n", + "2024-12-14 23:32:22.204 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['hadm_discharge_time', 'hadm_id', 'icd_code', 'icd_version', 'subject_id']\n", + "2024-12-14 23:32:22.205 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, hadm_discharge_time, hadm_id, icd_code, icd_version, subject_id\n", + "2024-12-14 23:32:22.206 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 4506 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet.\n", + "2024-12-14 23:32:22.206 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:32:22.206 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet row-chunk [0-4506) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet.\n", + "2024-12-14 23:32:22.210 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:22.209931. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:22.210 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet\n", + "2024-12-14 23:32:22.210 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:32:22.210 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:15:29.407 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['description', 'drg_code', 'drg_mortality', 'drg_severity', 'drg_type', 'hadm_discharge_time', 'hadm_id', 'subject_id']\n", - "2024-12-14 17:15:29.407 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, description, drg_code, drg_mortality, drg_severity, drg_type, hadm_discharge_time, hadm_id, subject_id\n", - "2024-12-14 17:15:29.407 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:29.408 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 17:15:29.410 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.003108\n", - "2024-12-14 17:15:29.410 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/.[0-454).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/.[0-454).parquet_cache/locks/2024-12-14T17:15:29.407233.json\n", - "2024-12-14 17:15:29.412 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd.\n", - "2024-12-14 17:15:29.413 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv to determine row count.\n", - "2024-12-14 17:15:29.415 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv as CSV with kwargs:\n", + "2024-12-14 23:32:22.210 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['hadm_discharge_time', 'hadm_id', 'icd_code', 'icd_version', 'subject_id']\n", + "2024-12-14 23:32:22.210 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, hadm_discharge_time, hadm_id, icd_code, icd_version, subject_id\n", + "2024-12-14 23:32:22.210 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:22.211 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 23:32:22.216 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006606\n", + "2024-12-14 23:32:22.216 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/.[0-4506).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/.[0-4506).parquet_cache/locks/2024-12-14T23:32:22.209931.json\n", + "2024-12-14 23:32:22.220 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents.\n", + "2024-12-14 23:32:22.221 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv to determine row count.\n", + "2024-12-14 23:32:22.225 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 17:15:29.415 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'hadm_id', 'icd_code', 'icd_version', 'subject_id']\n", - "2024-12-14 17:15:29.419 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, hadm_id, icd_code, icd_version, subject_id\n", - "2024-12-14 17:15:29.420 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 722 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv.\n", - "2024-12-14 17:15:29.420 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:15:29.420 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv row-chunk [0-722) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet.\n", - "2024-12-14 17:15:29.423 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:29.423035. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:29.423 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv\n", - "2024-12-14 17:15:29.423 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv as CSV with kwargs:\n", + "2024-12-14 23:32:22.225 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['endtime', 'hadm_id', 'itemid', 'starttime', 'stay_id', 'subject_id']\n", + "2024-12-14 23:32:22.238 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, endtime, hadm_id, itemid, starttime, stay_id, subject_id\n", + "2024-12-14 23:32:22.242 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 1468 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv.\n", + "2024-12-14 23:32:22.242 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:32:22.242 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv row-chunk [0-1468) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet.\n", + "2024-12-14 23:32:22.247 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:22.246968. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:22.247 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv\n", + "2024-12-14 23:32:22.247 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 17:15:29.423 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'hadm_id', 'icd_code', 'icd_version', 'subject_id']\n", - "2024-12-14 17:15:29.423 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, hadm_id, icd_code, icd_version, subject_id\n", - "2024-12-14 17:15:29.423 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:29.424 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 17:15:29.427 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.004191\n", - "2024-12-14 17:15:29.427 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/.[0-722).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/.[0-722).parquet_cache/locks/2024-12-14T17:15:29.423035.json\n", - "2024-12-14 17:15:29.429 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents.\n", - "2024-12-14 17:15:29.429 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv to determine row count.\n", - "2024-12-14 17:15:29.431 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv as CSV with kwargs:\n", + "2024-12-14 23:32:22.247 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['endtime', 'hadm_id', 'itemid', 'starttime', 'stay_id', 'subject_id']\n", + "2024-12-14 23:32:22.257 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, endtime, hadm_id, itemid, starttime, stay_id, subject_id\n", + "2024-12-14 23:32:22.257 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:22.257 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 23:32:22.288 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.041347\n", + "2024-12-14 23:32:22.288 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/.[0-1468).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/.[0-1468).parquet_cache/locks/2024-12-14T23:32:22.246968.json\n", + "2024-12-14 23:32:22.295 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr.\n", + "2024-12-14 23:32:22.295 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv to determine row count.\n", + "2024-12-14 23:32:22.301 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 17:15:29.432 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'stay_id', 'subject_id', 'value', 'valueuom']\n", - "2024-12-14 17:15:29.438 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, stay_id, subject_id, value, valueuom\n", - "2024-12-14 17:15:29.439 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 9362 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv.\n", - "2024-12-14 17:15:29.439 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:15:29.439 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv row-chunk [0-9362) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet.\n", - "2024-12-14 17:15:29.442 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:29.442133. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:29.442 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv\n", - "2024-12-14 17:15:29.442 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv as CSV with kwargs:\n", + "2024-12-14 23:32:22.301 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'result_name', 'result_value', 'subject_id']\n", + "2024-12-14 23:32:22.303 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, result_name, result_value, subject_id\n", + "2024-12-14 23:32:22.305 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 2964 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv.\n", + "2024-12-14 23:32:22.305 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:32:22.305 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv row-chunk [0-2964) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet.\n", + "2024-12-14 23:32:22.309 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:22.309265. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:22.309 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv\n", + "2024-12-14 23:32:22.309 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 17:15:29.442 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'stay_id', 'subject_id', 'value', 'valueuom']\n", - "2024-12-14 17:15:29.448 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, stay_id, subject_id, value, valueuom\n", - "2024-12-14 17:15:29.449 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:29.449 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 17:15:29.470 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.028809\n", - "2024-12-14 17:15:29.471 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/.[0-9362).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/.[0-9362).parquet_cache/locks/2024-12-14T17:15:29.442133.json\n", - "2024-12-14 17:15:29.473 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays.\n", - "2024-12-14 17:15:29.474 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv to determine row count.\n", - "2024-12-14 17:15:29.486 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv as CSV with kwargs:\n", + "2024-12-14 23:32:22.310 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'result_name', 'result_value', 'subject_id']\n", + "2024-12-14 23:32:22.312 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, result_name, result_value, subject_id\n", + "2024-12-14 23:32:22.313 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:22.313 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", + "2024-12-14 23:32:22.326 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.017685\n", + "2024-12-14 23:32:22.327 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/.[0-2964).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/.[0-2964).parquet_cache/locks/2024-12-14T23:32:22.309265.json\n", + "2024-12-14 23:32:22.337 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar.\n", + "2024-12-14 23:32:22.337 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv to determine row count.\n", + "2024-12-14 23:32:22.341 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 17:15:29.487 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['first_careunit', 'hadm_id', 'intime', 'last_careunit', 'outtime', 'stay_id', 'subject_id']\n", - "2024-12-14 17:15:29.489 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, first_careunit, hadm_id, intime, last_careunit, outtime, stay_id, subject_id\n", - "2024-12-14 17:15:29.490 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 140 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv.\n", - "2024-12-14 17:15:29.490 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:15:29.491 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv row-chunk [0-140) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet.\n", - "2024-12-14 17:15:29.498 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:29.498163. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:29.498 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv\n", - "2024-12-14 17:15:29.498 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv as CSV with kwargs:\n", + "2024-12-14 23:32:22.341 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'emar_id', 'emar_seq', 'event_txt', 'hadm_id', 'medication', 'subject_id']\n", + "2024-12-14 23:32:22.423 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, emar_id, emar_seq, event_txt, hadm_id, medication, subject_id\n", + "2024-12-14 23:32:22.433 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 35835 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv.\n", + "2024-12-14 23:32:22.433 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:32:22.433 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv row-chunk [0-35835) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet.\n", + "2024-12-14 23:32:22.438 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:22.438672. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:22.439 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv\n", + "2024-12-14 23:32:22.439 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 17:15:29.499 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['first_careunit', 'hadm_id', 'intime', 'last_careunit', 'outtime', 'stay_id', 'subject_id']\n", - "2024-12-14 17:15:29.499 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, first_careunit, hadm_id, intime, last_careunit, outtime, stay_id, subject_id\n", - "2024-12-14 17:15:29.499 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:29.500 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", - "2024-12-14 17:15:29.504 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.005838\n", - "2024-12-14 17:15:29.504 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/.[0-140).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/.[0-140).parquet_cache/locks/2024-12-14T17:15:29.498163.json\n", - "2024-12-14 17:15:29.506 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents.\n", - "2024-12-14 17:15:29.506 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv to determine row count.\n", - "2024-12-14 17:15:29.509 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv as CSV with kwargs:\n", + "2024-12-14 23:32:22.439 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'emar_id', 'emar_seq', 'event_txt', 'hadm_id', 'medication', 'subject_id']\n", + "2024-12-14 23:32:22.540 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, emar_id, emar_seq, event_txt, hadm_id, medication, subject_id\n", + "2024-12-14 23:32:22.540 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:22.541 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", + "2024-12-14 23:32:22.926 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.487534\n", + "2024-12-14 23:32:22.926 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/.[0-35835).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/.[0-35835).parquet_cache/locks/2024-12-14T23:32:22.438672.json\n", + "2024-12-14 23:32:22.933 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays.\n", + "2024-12-14 23:32:22.934 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv to determine row count.\n", + "2024-12-14 23:32:22.943 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 17:15:29.509 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['endtime', 'hadm_id', 'itemid', 'starttime', 'stay_id', 'subject_id']\n", - "2024-12-14 17:15:29.511 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, endtime, hadm_id, itemid, starttime, stay_id, subject_id\n", - "2024-12-14 17:15:29.512 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 1468 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv.\n", - "2024-12-14 17:15:29.512 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:15:29.512 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv row-chunk [0-1468) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet.\n", - "2024-12-14 17:15:29.515 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:29.515574. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:29.515 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/procedureevents.csv\n", - "2024-12-14 17:15:29.516 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/procedureevents.csv as CSV with kwargs:\n", + "2024-12-14 23:32:22.943 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['first_careunit', 'hadm_id', 'intime', 'last_careunit', 'outtime', 'stay_id', 'subject_id']\n", + "2024-12-14 23:32:22.945 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, first_careunit, hadm_id, intime, last_careunit, outtime, stay_id, subject_id\n", + "2024-12-14 23:32:22.945 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 140 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv.\n", + "2024-12-14 23:32:22.946 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:32:22.946 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv row-chunk [0-140) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet.\n", + "2024-12-14 23:32:22.951 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:22.951056. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:22.951 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/icustays.csv\n", + "2024-12-14 23:32:22.952 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/icustays.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 17:15:29.516 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['endtime', 'hadm_id', 'itemid', 'starttime', 'stay_id', 'subject_id']\n", - "2024-12-14 17:15:29.518 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, endtime, hadm_id, itemid, starttime, stay_id, subject_id\n", - "2024-12-14 17:15:29.518 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:29.518 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 17:15:29.527 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.011919\n", - "2024-12-14 17:15:29.527 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/.[0-1468).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/.[0-1468).parquet_cache/locks/2024-12-14T17:15:29.515574.json\n", - "2024-12-14 17:15:29.530 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar.\n", - "2024-12-14 17:15:29.530 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv to determine row count.\n", - "2024-12-14 17:15:29.532 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv as CSV with kwargs:\n", + "2024-12-14 23:32:22.952 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['first_careunit', 'hadm_id', 'intime', 'last_careunit', 'outtime', 'stay_id', 'subject_id']\n", + "2024-12-14 23:32:22.953 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, first_careunit, hadm_id, intime, last_careunit, outtime, stay_id, subject_id\n", + "2024-12-14 23:32:22.953 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:22.953 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", + "2024-12-14 23:32:22.958 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007794\n", + "2024-12-14 23:32:22.959 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/.[0-140).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/.[0-140).parquet_cache/locks/2024-12-14T23:32:22.951056.json\n", + "2024-12-14 23:32:22.964 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers.\n", + "2024-12-14 23:32:22.964 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv to determine row count.\n", + "2024-12-14 23:32:22.969 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 17:15:29.532 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'emar_id', 'emar_seq', 'event_txt', 'hadm_id', 'medication', 'subject_id']\n", - "2024-12-14 17:15:29.562 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, emar_id, emar_seq, event_txt, hadm_id, medication, subject_id\n", - "2024-12-14 17:15:29.565 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 35835 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv.\n", - "2024-12-14 17:15:29.566 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:15:29.566 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv row-chunk [0-35835) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet.\n", - "2024-12-14 17:15:29.568 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:29.568485. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:29.568 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/emar.csv\n", - "2024-12-14 17:15:29.568 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/emar.csv as CSV with kwargs:\n", + "2024-12-14 23:32:22.969 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['careunit', 'eventtype', 'hadm_id', 'intime', 'subject_id']\n", + "2024-12-14 23:32:22.973 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, careunit, eventtype, hadm_id, intime, subject_id\n", + "2024-12-14 23:32:22.974 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 1190 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv.\n", + "2024-12-14 23:32:22.974 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:32:22.974 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv row-chunk [0-1190) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet.\n", + "2024-12-14 23:32:22.979 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:22.978267. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:22.979 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv\n", + "2024-12-14 23:32:22.979 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 17:15:29.569 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'emar_id', 'emar_seq', 'event_txt', 'hadm_id', 'medication', 'subject_id']\n", - "2024-12-14 17:15:29.600 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, emar_id, emar_seq, event_txt, hadm_id, medication, subject_id\n", - "2024-12-14 17:15:29.600 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:29.600 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", - "2024-12-14 17:15:29.640 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.071693\n", - "2024-12-14 17:15:29.640 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/.[0-35835).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/.[0-35835).parquet_cache/locks/2024-12-14T17:15:29.568485.json\n", - "2024-12-14 17:15:29.643 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers.\n", - "2024-12-14 17:15:29.643 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv to determine row count.\n", - "2024-12-14 17:15:29.645 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv as CSV with kwargs:\n", + "2024-12-14 23:32:22.980 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['careunit', 'eventtype', 'hadm_id', 'intime', 'subject_id']\n", + "2024-12-14 23:32:22.982 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, careunit, eventtype, hadm_id, intime, subject_id\n", + "2024-12-14 23:32:22.982 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:22.983 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 23:32:22.996 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.018333\n", + "2024-12-14 23:32:22.996 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/.[0-1190).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/.[0-1190).parquet_cache/locks/2024-12-14T23:32:22.978267.json\n", + "2024-12-14 23:32:23.004 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd.\n", + "2024-12-14 23:32:23.004 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv to determine row count.\n", + "2024-12-14 23:32:23.008 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 17:15:29.645 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['careunit', 'eventtype', 'hadm_id', 'intime', 'subject_id']\n", - "2024-12-14 17:15:29.646 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, careunit, eventtype, hadm_id, intime, subject_id\n", - "2024-12-14 17:15:29.647 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 1190 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv.\n", - "2024-12-14 17:15:29.647 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:15:29.647 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv row-chunk [0-1190) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet.\n", - "2024-12-14 17:15:29.650 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:29.649856. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:29.650 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/transfers.csv\n", - "2024-12-14 17:15:29.650 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/transfers.csv as CSV with kwargs:\n", + "2024-12-14 23:32:23.008 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'hadm_id', 'icd_code', 'icd_version', 'subject_id']\n", + "2024-12-14 23:32:23.010 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, hadm_id, icd_code, icd_version, subject_id\n", + "2024-12-14 23:32:23.010 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 722 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv.\n", + "2024-12-14 23:32:23.010 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:32:23.011 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv row-chunk [0-722) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet.\n", + "2024-12-14 23:32:23.014 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:23.014518. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:23.014 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/procedures_icd.csv\n", + "2024-12-14 23:32:23.015 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/procedures_icd.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 17:15:29.650 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['careunit', 'eventtype', 'hadm_id', 'intime', 'subject_id']\n", - "2024-12-14 17:15:29.651 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, careunit, eventtype, hadm_id, intime, subject_id\n", - "2024-12-14 17:15:29.651 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:29.651 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 17:15:29.655 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.005833\n", - "2024-12-14 17:15:29.655 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/.[0-1190).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/.[0-1190).parquet_cache/locks/2024-12-14T17:15:29.649856.json\n", - "2024-12-14 17:15:29.658 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents.\n", - "2024-12-14 17:15:29.658 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv to determine row count.\n", - "2024-12-14 17:15:29.660 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv as CSV with kwargs:\n", + "2024-12-14 23:32:23.015 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'hadm_id', 'icd_code', 'icd_version', 'subject_id']\n", + "2024-12-14 23:32:23.016 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, hadm_id, icd_code, icd_version, subject_id\n", + "2024-12-14 23:32:23.016 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:23.016 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 23:32:23.020 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006345\n", + "2024-12-14 23:32:23.020 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/.[0-722).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/.[0-722).parquet_cache/locks/2024-12-14T23:32:23.014518.json\n", + "2024-12-14 23:32:23.024 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents.\n", + "2024-12-14 23:32:23.024 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv to determine row count.\n", + "2024-12-14 23:32:23.027 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 17:15:29.660 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'priority', 'subject_id', 'value', 'valuenum', 'valueuom']\n", - "2024-12-14 17:15:29.764 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, priority, subject_id, value, valuenum, valueuom\n", - "2024-12-14 17:15:29.771 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 107727 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv.\n", - "2024-12-14 17:15:29.771 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:15:29.771 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv row-chunk [0-107727) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet.\n", - "2024-12-14 17:15:29.774 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:29.773914. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:29.774 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv\n", - "2024-12-14 17:15:29.774 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv as CSV with kwargs:\n", + "2024-12-14 23:32:23.027 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'stay_id', 'subject_id', 'value', 'valueuom']\n", + "2024-12-14 23:32:23.046 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, stay_id, subject_id, value, valueuom\n", + "2024-12-14 23:32:23.049 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 9362 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv.\n", + "2024-12-14 23:32:23.049 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:32:23.049 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv row-chunk [0-9362) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet.\n", + "2024-12-14 23:32:23.055 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:23.055396. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:23.056 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/outputevents.csv\n", + "2024-12-14 23:32:23.056 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/outputevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 17:15:29.774 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'priority', 'subject_id', 'value', 'valuenum', 'valueuom']\n", - "2024-12-14 17:15:29.877 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, priority, subject_id, value, valuenum, valueuom\n", - "2024-12-14 17:15:29.877 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:29.878 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 17:15:29.928 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.154632\n", - "2024-12-14 17:15:29.928 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/.[0-107727).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/.[0-107727).parquet_cache/locks/2024-12-14T17:15:29.773914.json\n", - "2024-12-14 17:15:29.931 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents.\n", - "2024-12-14 17:15:29.931 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv to determine row count.\n", - "2024-12-14 17:15:29.933 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv as CSV with kwargs:\n", + "2024-12-14 23:32:23.056 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'stay_id', 'subject_id', 'value', 'valueuom']\n", + "2024-12-14 23:32:23.065 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, stay_id, subject_id, value, valueuom\n", + "2024-12-14 23:32:23.065 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:23.065 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 23:32:23.134 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.079348\n", + "2024-12-14 23:32:23.134 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/.[0-9362).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/.[0-9362).parquet_cache/locks/2024-12-14T23:32:23.055396.json\n", + "2024-12-14 23:32:23.139 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents.\n", + "2024-12-14 23:32:23.139 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv to determine row count.\n", + "2024-12-14 23:32:23.143 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 17:15:29.933 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'stay_id', 'subject_id', 'value', 'valuenum', 'valueuom']\n", - "2024-12-14 17:15:30.414 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, stay_id, subject_id, value, valuenum, valueuom\n", - "2024-12-14 17:15:30.444 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 668862 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv.\n", - "2024-12-14 17:15:30.444 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:15:30.444 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv row-chunk [0-668862) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet.\n", - "2024-12-14 17:15:30.448 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:30.447796. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:30.448 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv\n", - "2024-12-14 17:15:30.448 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv as CSV with kwargs:\n", + "2024-12-14 23:32:23.143 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'stay_id', 'subject_id', 'value', 'valuenum', 'valueuom']\n", + "2024-12-14 23:32:24.054 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, stay_id, subject_id, value, valuenum, valueuom\n", + "2024-12-14 23:32:24.085 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 668862 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv.\n", + "2024-12-14 23:32:24.085 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:32:24.085 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv row-chunk [0-668862) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet.\n", + "2024-12-14 23:32:24.090 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:24.090621. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:24.091 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/chartevents.csv\n", + "2024-12-14 23:32:24.091 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/chartevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 17:15:30.448 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'stay_id', 'subject_id', 'value', 'valuenum', 'valueuom']\n", - "2024-12-14 17:15:30.923 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, stay_id, subject_id, value, valuenum, valueuom\n", - "2024-12-14 17:15:30.923 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:30.923 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 17:15:31.111 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.664018\n", - "2024-12-14 17:15:31.111 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/.[0-668862).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/.[0-668862).parquet_cache/locks/2024-12-14T17:15:30.447796.json\n", - "2024-12-14 17:15:31.114 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents.\n", - "2024-12-14 17:15:31.115 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv to determine row count.\n", - "2024-12-14 17:15:31.117 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv as CSV with kwargs:\n", + "2024-12-14 23:32:24.091 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'stay_id', 'subject_id', 'value', 'valuenum', 'valueuom']\n", + "2024-12-14 23:32:24.776 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, stay_id, subject_id, value, valuenum, valueuom\n", + "2024-12-14 23:32:24.776 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:24.777 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 23:32:25.383 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:01.293250\n", + "2024-12-14 23:32:25.384 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/.[0-668862).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/.[0-668862).parquet_cache/locks/2024-12-14T23:32:24.090621.json\n", + "2024-12-14 23:32:25.388 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents.\n", + "2024-12-14 23:32:25.388 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv to determine row count.\n", + "2024-12-14 23:32:25.392 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 17:15:31.117 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'hadm_id', 'short_description', 'subject_id']\n", - "2024-12-14 17:15:31.117 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, hadm_id, short_description, subject_id\n", - "2024-12-14 17:15:31.117 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 61 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv.\n", - "2024-12-14 17:15:31.118 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:15:31.118 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv row-chunk [0-61) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet.\n", - "2024-12-14 17:15:31.120 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:31.120550. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:31.120 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv\n", - "2024-12-14 17:15:31.120 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv as CSV with kwargs:\n", + "2024-12-14 23:32:25.392 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'priority', 'subject_id', 'value', 'valuenum', 'valueuom']\n", + "2024-12-14 23:32:25.550 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, priority, subject_id, value, valuenum, valueuom\n", + "2024-12-14 23:32:25.563 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 107727 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv.\n", + "2024-12-14 23:32:25.563 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:32:25.563 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv row-chunk [0-107727) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet.\n", + "2024-12-14 23:32:25.572 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:25.571922. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:25.572 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/labevents.csv\n", + "2024-12-14 23:32:25.572 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/labevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 17:15:31.121 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'hadm_id', 'short_description', 'subject_id']\n", - "2024-12-14 17:15:31.121 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, hadm_id, short_description, subject_id\n", - "2024-12-14 17:15:31.121 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:31.121 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 17:15:31.122 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.002100\n", - "2024-12-14 17:15:31.122 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/.[0-61).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/.[0-61).parquet_cache/locks/2024-12-14T17:15:31.120550.json\n", - "2024-12-14 17:15:31.124 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions.\n", - "2024-12-14 17:15:31.125 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv to determine row count.\n", - "2024-12-14 17:15:31.127 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv as CSV with kwargs:\n", + "2024-12-14 23:32:25.573 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['charttime', 'hadm_id', 'itemid', 'priority', 'subject_id', 'value', 'valuenum', 'valueuom']\n", + "2024-12-14 23:32:25.716 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, charttime, hadm_id, itemid, priority, subject_id, value, valuenum, valueuom\n", + "2024-12-14 23:32:25.717 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:25.718 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 23:32:26.157 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.585378\n", + "2024-12-14 23:32:26.157 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/.[0-107727).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/.[0-107727).parquet_cache/locks/2024-12-14T23:32:25.571922.json\n", + "2024-12-14 23:32:26.161 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes.\n", + "2024-12-14 23:32:26.162 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet to determine row count.\n", + "2024-12-14 23:32:26.165 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:32:26.166 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 23:32:26.166 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['description', 'drg_code', 'drg_mortality', 'drg_severity', 'drg_type', 'hadm_discharge_time', 'hadm_id', 'subject_id']\n", + "2024-12-14 23:32:26.166 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, description, drg_code, drg_mortality, drg_severity, drg_type, hadm_discharge_time, hadm_id, subject_id\n", + "2024-12-14 23:32:26.167 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 454 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet.\n", + "2024-12-14 23:32:26.167 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:32:26.167 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet row-chunk [0-454) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet.\n", + "2024-12-14 23:32:26.171 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:26.170941. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:26.171 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet\n", + "2024-12-14 23:32:26.171 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:32:26.171 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/drgcodes.parquet as Parquet with kwargs:\n", + ".\n", + "2024-12-14 23:32:26.171 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['description', 'drg_code', 'drg_mortality', 'drg_severity', 'drg_type', 'hadm_discharge_time', 'hadm_id', 'subject_id']\n", + "2024-12-14 23:32:26.171 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, description, drg_code, drg_mortality, drg_severity, drg_type, hadm_discharge_time, hadm_id, subject_id\n", + "2024-12-14 23:32:26.172 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:26.172 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 23:32:26.176 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.005078\n", + "2024-12-14 23:32:26.176 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/.[0-454).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/.[0-454).parquet_cache/locks/2024-12-14T23:32:26.170941.json\n", + "2024-12-14 23:32:26.179 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy.\n", + "2024-12-14 23:32:26.179 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv to determine row count.\n", + "2024-12-14 23:32:26.183 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 17:15:31.127 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['admission_location', 'admission_type', 'admittime', 'discharge_location', 'dischtime', 'edouttime', 'edregtime', 'hadm_id', 'insurance', 'language', 'marital_status', 'race', 'subject_id']\n", - "2024-12-14 17:15:31.127 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, admission_location, admission_type, admittime, discharge_location, dischtime, edouttime, edregtime, hadm_id, insurance, language, marital_status, race, subject_id\n", - "2024-12-14 17:15:31.128 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 275 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv.\n", - "2024-12-14 17:15:31.128 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:15:31.128 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv row-chunk [0-275) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet.\n", - "2024-12-14 17:15:31.130 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:31.130847. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:31.131 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv\n", - "2024-12-14 17:15:31.131 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv as CSV with kwargs:\n", + "2024-12-14 23:32:26.183 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['doses_per_24_hrs', 'frequency', 'medication', 'poe_id', 'route', 'starttime', 'stoptime', 'subject_id']\n", + "2024-12-14 23:32:26.223 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, doses_per_24_hrs, frequency, medication, poe_id, route, starttime, stoptime, subject_id\n", + "2024-12-14 23:32:26.230 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 15306 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv.\n", + "2024-12-14 23:32:26.230 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:32:26.230 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv row-chunk [0-15306) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet.\n", + "2024-12-14 23:32:26.238 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:26.238641. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:26.239 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv\n", + "2024-12-14 23:32:26.239 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 17:15:31.131 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['admission_location', 'admission_type', 'admittime', 'discharge_location', 'dischtime', 'edouttime', 'edregtime', 'hadm_id', 'insurance', 'language', 'marital_status', 'race', 'subject_id']\n", - "2024-12-14 17:15:31.131 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, admission_location, admission_type, admittime, discharge_location, dischtime, edouttime, edregtime, hadm_id, insurance, language, marital_status, race, subject_id\n", - "2024-12-14 17:15:31.131 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:31.131 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", - "2024-12-14 17:15:31.135 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.004670\n", - "2024-12-14 17:15:31.135 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/.[0-275).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/.[0-275).parquet_cache/locks/2024-12-14T17:15:31.130847.json\n", - "2024-12-14 17:15:31.137 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents.\n", - "2024-12-14 17:15:31.138 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv to determine row count.\n", - "2024-12-14 17:15:31.141 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv as CSV with kwargs:\n", + "2024-12-14 23:32:26.239 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['doses_per_24_hrs', 'frequency', 'medication', 'poe_id', 'route', 'starttime', 'stoptime', 'subject_id']\n", + "2024-12-14 23:32:26.268 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, doses_per_24_hrs, frequency, medication, poe_id, route, starttime, stoptime, subject_id\n", + "2024-12-14 23:32:26.268 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:26.269 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 23:32:26.546 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.307435\n", + "2024-12-14 23:32:26.546 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/.[0-15306).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/.[0-15306).parquet_cache/locks/2024-12-14T23:32:26.238641.json\n", + "2024-12-14 23:32:26.550 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents.\n", + "2024-12-14 23:32:26.550 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv to determine row count.\n", + "2024-12-14 23:32:26.554 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 17:15:31.141 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['amount', 'amountuom', 'endtime', 'hadm_id', 'itemid', 'linkorderid', 'ordercategorydescription', 'orderid', 'patientweight', 'rate', 'rateuom', 'starttime', 'statusdescription', 'stay_id', 'subject_id']\n", - "2024-12-14 17:15:31.177 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, amount, amountuom, endtime, hadm_id, itemid, linkorderid, ordercategorydescription, orderid, patientweight, rate, rateuom, starttime, statusdescription, stay_id, subject_id\n", - "2024-12-14 17:15:31.180 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 20404 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv.\n", - "2024-12-14 17:15:31.180 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:15:31.180 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv row-chunk [0-20404) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet.\n", - "2024-12-14 17:15:31.183 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:31.183076. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:31.183 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv\n", - "2024-12-14 17:15:31.183 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv as CSV with kwargs:\n", + "2024-12-14 23:32:26.554 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'hadm_id', 'short_description', 'subject_id']\n", + "2024-12-14 23:32:26.555 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, hadm_id, short_description, subject_id\n", + "2024-12-14 23:32:26.556 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 61 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv.\n", + "2024-12-14 23:32:26.556 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:32:26.556 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv row-chunk [0-61) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet.\n", + "2024-12-14 23:32:26.559 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:26.559795. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:26.560 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/hcpcsevents.csv\n", + "2024-12-14 23:32:26.560 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/hcpcsevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 17:15:31.183 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['amount', 'amountuom', 'endtime', 'hadm_id', 'itemid', 'linkorderid', 'ordercategorydescription', 'orderid', 'patientweight', 'rate', 'rateuom', 'starttime', 'statusdescription', 'stay_id', 'subject_id']\n", - "2024-12-14 17:15:31.220 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, amount, amountuom, endtime, hadm_id, itemid, linkorderid, ordercategorydescription, orderid, patientweight, rate, rateuom, starttime, statusdescription, stay_id, subject_id\n", - "2024-12-14 17:15:31.220 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:31.220 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 17:15:31.240 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.057787\n", - "2024-12-14 17:15:31.240 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/.[0-20404).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/.[0-20404).parquet_cache/locks/2024-12-14T17:15:31.183076.json\n", - "2024-12-14 17:15:31.244 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd.\n", - "2024-12-14 17:15:31.244 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet to determine row count.\n", - "2024-12-14 17:15:31.246 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:15:31.246 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet as Parquet with kwargs:\n", - ".\n", - "2024-12-14 17:15:31.246 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['hadm_discharge_time', 'hadm_id', 'icd_code', 'icd_version', 'subject_id']\n", - "2024-12-14 17:15:31.246 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, hadm_discharge_time, hadm_id, icd_code, icd_version, subject_id\n", - "2024-12-14 17:15:31.247 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 4506 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet.\n", - "2024-12-14 17:15:31.247 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:15:31.247 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet row-chunk [0-4506) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet.\n", - "2024-12-14 17:15:31.249 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:31.249669. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:31.249 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet\n", - "2024-12-14 17:15:31.250 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:15:31.250 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/diagnoses_icd.parquet as Parquet with kwargs:\n", - ".\n", - "2024-12-14 17:15:31.250 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['hadm_discharge_time', 'hadm_id', 'icd_code', 'icd_version', 'subject_id']\n", - "2024-12-14 17:15:31.250 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, hadm_discharge_time, hadm_id, icd_code, icd_version, subject_id\n", - "2024-12-14 17:15:31.250 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:31.250 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 17:15:31.252 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.003216\n", - "2024-12-14 17:15:31.252 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/.[0-4506).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/.[0-4506).parquet_cache/locks/2024-12-14T17:15:31.249669.json\n", - "2024-12-14 17:15:31.255 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy.\n", - "2024-12-14 17:15:31.255 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv to determine row count.\n", - "2024-12-14 17:15:31.257 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv as CSV with kwargs:\n", + "2024-12-14 23:32:26.560 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'hadm_id', 'short_description', 'subject_id']\n", + "2024-12-14 23:32:26.561 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, hadm_id, short_description, subject_id\n", + "2024-12-14 23:32:26.561 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:26.561 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 23:32:26.567 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007607\n", + "2024-12-14 23:32:26.568 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/.[0-61).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/.[0-61).parquet_cache/locks/2024-12-14T23:32:26.559795.json\n", + "2024-12-14 23:32:26.578 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents.\n", + "2024-12-14 23:32:26.578 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv to determine row count.\n", + "2024-12-14 23:32:26.583 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 17:15:31.257 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['doses_per_24_hrs', 'frequency', 'medication', 'poe_id', 'route', 'starttime', 'stoptime', 'subject_id']\n", - "2024-12-14 17:15:31.278 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, doses_per_24_hrs, frequency, medication, poe_id, route, starttime, stoptime, subject_id\n", - "2024-12-14 17:15:31.280 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 15306 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv.\n", - "2024-12-14 17:15:31.280 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:15:31.280 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv row-chunk [0-15306) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet.\n", - "2024-12-14 17:15:31.283 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:31.283277. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:31.283 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/pharmacy.csv\n", - "2024-12-14 17:15:31.283 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/pharmacy.csv as CSV with kwargs:\n", + "2024-12-14 23:32:26.583 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['amount', 'amountuom', 'endtime', 'hadm_id', 'itemid', 'linkorderid', 'ordercategorydescription', 'orderid', 'patientweight', 'rate', 'rateuom', 'starttime', 'statusdescription', 'stay_id', 'subject_id']\n", + "2024-12-14 23:32:26.651 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, amount, amountuom, endtime, hadm_id, itemid, linkorderid, ordercategorydescription, orderid, patientweight, rate, rateuom, starttime, statusdescription, stay_id, subject_id\n", + "2024-12-14 23:32:26.659 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 20404 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv.\n", + "2024-12-14 23:32:26.659 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:32:26.659 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv row-chunk [0-20404) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet.\n", + "2024-12-14 23:32:26.667 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:26.666755. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:26.667 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/icu/inputevents.csv\n", + "2024-12-14 23:32:26.667 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/icu/inputevents.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 17:15:31.283 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['doses_per_24_hrs', 'frequency', 'medication', 'poe_id', 'route', 'starttime', 'stoptime', 'subject_id']\n", - "2024-12-14 17:15:31.304 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, doses_per_24_hrs, frequency, medication, poe_id, route, starttime, stoptime, subject_id\n", - "2024-12-14 17:15:31.304 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:31.305 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 17:15:31.337 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.054084\n", - "2024-12-14 17:15:31.337 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/.[0-15306).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/.[0-15306).parquet_cache/locks/2024-12-14T17:15:31.283277.json\n", - "2024-12-14 17:15:31.340 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr.\n", - "2024-12-14 17:15:31.340 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv to determine row count.\n", - "2024-12-14 17:15:31.342 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv as CSV with kwargs:\n", + "2024-12-14 23:32:26.667 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['amount', 'amountuom', 'endtime', 'hadm_id', 'itemid', 'linkorderid', 'ordercategorydescription', 'orderid', 'patientweight', 'rate', 'rateuom', 'starttime', 'statusdescription', 'stay_id', 'subject_id']\n", + "2024-12-14 23:32:26.716 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, amount, amountuom, endtime, hadm_id, itemid, linkorderid, ordercategorydescription, orderid, patientweight, rate, rateuom, starttime, statusdescription, stay_id, subject_id\n", + "2024-12-14 23:32:26.716 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:26.717 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 23:32:27.397 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.730991\n", + "2024-12-14 23:32:27.398 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/.[0-20404).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/.[0-20404).parquet_cache/locks/2024-12-14T23:32:26.666755.json\n", + "2024-12-14 23:32:27.402 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions.\n", + "2024-12-14 23:32:27.402 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv to determine row count.\n", + "2024-12-14 23:32:27.406 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 17:15:31.342 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'result_name', 'result_value', 'subject_id']\n", - "2024-12-14 17:15:31.343 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, result_name, result_value, subject_id\n", - "2024-12-14 17:15:31.344 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 2964 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv.\n", - "2024-12-14 17:15:31.344 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:15:31.344 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv row-chunk [0-2964) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet.\n", - "2024-12-14 17:15:31.346 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:31.346587. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:31.346 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/omr.csv\n", - "2024-12-14 17:15:31.346 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/omr.csv as CSV with kwargs:\n", + "2024-12-14 23:32:27.406 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['admission_location', 'admission_type', 'admittime', 'discharge_location', 'dischtime', 'edouttime', 'edregtime', 'hadm_id', 'insurance', 'language', 'marital_status', 'race', 'subject_id']\n", + "2024-12-14 23:32:27.408 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, admission_location, admission_type, admittime, discharge_location, dischtime, edouttime, edregtime, hadm_id, insurance, language, marital_status, race, subject_id\n", + "2024-12-14 23:32:27.409 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 275 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv.\n", + "2024-12-14 23:32:27.410 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:32:27.410 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv row-chunk [0-275) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet.\n", + "2024-12-14 23:32:27.420 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:27.420408. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:27.421 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/admissions.csv\n", + "2024-12-14 23:32:27.421 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:143 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/admissions.csv as CSV with kwargs:\n", " * infer_schema_length=999999999.\n", - "2024-12-14 17:15:31.347 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['chartdate', 'result_name', 'result_value', 'subject_id']\n", - "2024-12-14 17:15:31.348 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, chartdate, result_name, result_value, subject_id\n", - "2024-12-14 17:15:31.348 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:31.348 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", - "2024-12-14 17:15:31.352 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.005502\n", - "2024-12-14 17:15:31.352 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/.[0-2964).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/.[0-2964).parquet_cache/locks/2024-12-14T17:15:31.346587.json\n", - "2024-12-14 17:15:31.352 | INFO | MEDS_transforms.extract.shard_events:main:430 - Sub-sharding completed in 0:00:01.985358\n", + "2024-12-14 23:32:27.421 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['admission_location', 'admission_type', 'admittime', 'discharge_location', 'dischtime', 'edouttime', 'edregtime', 'hadm_id', 'insurance', 'language', 'marital_status', 'race', 'subject_id']\n", + "2024-12-14 23:32:27.423 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, admission_location, admission_type, admittime, discharge_location, dischtime, edouttime, edregtime, hadm_id, insurance, language, marital_status, race, subject_id\n", + "2024-12-14 23:32:27.423 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:27.423 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", + "2024-12-14 23:32:27.444 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.024277\n", + "2024-12-14 23:32:27.445 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/.[0-275).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/.[0-275).parquet_cache/locks/2024-12-14T23:32:27.420408.json\n", + "2024-12-14 23:32:27.445 | INFO | MEDS_transforms.extract.shard_events:main:430 - Sub-sharding completed in 0:00:05.290361\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:31.406\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: split_and_shard_subjects\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:31.413\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-split_and_shard_subjects --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=split_and_shard_subjects\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:32.147\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 23:32:27.590\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: split_and_shard_subjects\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:27.605\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-split_and_shard_subjects --config-dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=split_and_shard_subjects\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:28.537\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:32.147\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 17:15:31.960 | INFO | MEDS_transforms.utils:stage_init:73 - Running split_and_shard_subjects with the following configuration:\n", + "\u001b[32m2024-12-14 23:32:28.537\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 23:32:28.255 | INFO | MEDS_transforms.utils:stage_init:73 - Running split_and_shard_subjects with the following configuration:\n", "input_dir: ${oc.env:MIMICIV_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:MIMICIV_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -976,26 +976,26 @@ "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", "\n", - "2024-12-14 17:15:31.974 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "2024-12-14 23:32:28.275 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", "is_metadata: true\n", - "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//metadata\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//metadata\n", "n_subjects_per_shard: 1000\n", "external_splits_json_fp: null\n", "split_fracs:\n", " train: 0.5\n", " tuning: 0.25\n", " held_out: 0.25\n", - "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events\n", - "metadata_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/metadata\n", - "reducer_output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/split_and_shard_subjects\n", + "data_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events\n", + "metadata_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/metadata\n", + "reducer_output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/split_and_shard_subjects\n", "train_only: true\n", "\n", "Paths: (checkbox indicates if it exists)\n", - " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events\n", - " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata\n", - " - metadata_input_dir: ❌ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/metadata\n", - "2024-12-14 17:15:31.975 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:215 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs/event_configs.yaml (needed for subject ID columns)\n", - "2024-12-14 17:15:32.007 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:219 - Event conversion config:\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events\n", + " - output_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata\n", + " - metadata_input_dir: ❌ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/metadata\n", + "2024-12-14 23:32:28.276 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:215 - Reading event conversion config from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs/event_configs.yaml (needed for subject ID columns)\n", + "2024-12-14 23:32:28.316 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:219 - Event conversion config:\n", "subject_id_col: subject_id\n", "hosp/admissions:\n", " ed_registration:\n", @@ -1318,53 +1318,53 @@ " valueuom: unitname\n", " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", "\n", - "2024-12-14 17:15:32.008 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/admissions files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", - "2024-12-14 17:15:32.009 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/diagnoses_icd files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 17:15:32.009 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/drgcodes files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 17:15:32.010 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/emar files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", - "2024-12-14 17:15:32.010 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/hcpcsevents files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 17:15:32.010 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/labevents files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 17:15:32.010 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/omr files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", - "2024-12-14 17:15:32.010 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/patients files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", - "2024-12-14 17:15:32.011 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/pharmacy files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 17:15:32.011 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/procedures_icd files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 17:15:32.011 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/transfers files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 17:15:32.011 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/icustays files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", - "2024-12-14 17:15:32.011 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/chartevents files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 17:15:32.012 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/procedureevents files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 17:15:32.012 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/inputevents files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 17:15:32.012 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/outputevents files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 17:15:32.012 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:239 - Joining all subject IDs from 16 dataframes\n", - "2024-12-14 17:15:32.052 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:247 - Found 100 unique subject IDs of type int64\n", - "2024-12-14 17:15:32.056 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:262 - Sharding and splitting subjects\n", - "2024-12-14 17:15:32.101 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split train/0 has 50 subjects.\n", - "2024-12-14 17:15:32.101 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split tuning/0 has 25 subjects.\n", - "2024-12-14 17:15:32.101 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split held_out/0 has 25 subjects.\n", - "2024-12-14 17:15:32.102 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:273 - Writing sharded subjects to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata/.shards.json\n", - "2024-12-14 17:15:32.102 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:276 - Done writing sharded subjects\n", + "2024-12-14 23:32:28.317 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/admissions files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", + "2024-12-14 23:32:28.319 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/diagnoses_icd files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 23:32:28.320 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/drgcodes files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 23:32:28.320 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/emar files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", + "2024-12-14 23:32:28.321 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/hcpcsevents files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 23:32:28.321 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/labevents files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 23:32:28.321 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/omr files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", + "2024-12-14 23:32:28.322 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/patients files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", + "2024-12-14 23:32:28.322 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/pharmacy files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 23:32:28.323 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/procedures_icd files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 23:32:28.323 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from hosp/transfers files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 23:32:28.324 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/icustays files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", + "2024-12-14 23:32:28.324 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/chartevents files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 23:32:28.325 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/procedureevents files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 23:32:28.325 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/inputevents files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 23:32:28.326 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from icu/outputevents files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 23:32:28.326 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:239 - Joining all subject IDs from 16 dataframes\n", + "2024-12-14 23:32:28.423 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:247 - Found 100 unique subject IDs of type int64\n", + "2024-12-14 23:32:28.428 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:262 - Sharding and splitting subjects\n", + "2024-12-14 23:32:28.448 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split train/0 has 50 subjects.\n", + "2024-12-14 23:32:28.448 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split tuning/0 has 25 subjects.\n", + "2024-12-14 23:32:28.448 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split held_out/0 has 25 subjects.\n", + "2024-12-14 23:32:28.449 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:273 - Writing sharded subjects to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata/.shards.json\n", + "2024-12-14 23:32:28.449 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:276 - Done writing sharded subjects\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:32.150\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: convert_to_sharded_events\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:32.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-convert_to_sharded_events --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=convert_to_sharded_events\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:33.739\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 23:32:28.539\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: convert_to_sharded_events\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:28.554\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-convert_to_sharded_events --config-dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=convert_to_sharded_events\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:31.467\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:33.739\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 17:15:32.724 | INFO | MEDS_transforms.utils:stage_init:73 - Running convert_to_sharded_events with the following configuration:\n", + "\u001b[32m2024-12-14 23:32:31.468\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 23:32:29.252 | INFO | MEDS_transforms.utils:stage_init:73 - Running convert_to_sharded_events with the following configuration:\n", "input_dir: ${oc.env:MIMICIV_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:MIMICIV_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -1433,21 +1433,21 @@ "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", "\n", - "2024-12-14 17:15:32.738 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "2024-12-14 23:32:29.273 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", "do_dedup_text_and_numeric: true\n", "is_metadata: false\n", - "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events\n", - "metadata_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/split_and_shard_subjects\n", - "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events\n", + "data_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events\n", + "metadata_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/split_and_shard_subjects\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events\n", "reducer_output_dir: null\n", "\n", "Paths: (checkbox indicates if it exists)\n", - " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events\n", - " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events\n", - " - metadata_input_dir: ❌ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/split_and_shard_subjects\n", - "2024-12-14 17:15:32.739 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:769 - Starting event conversion.\n", - "2024-12-14 17:15:32.739 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:771 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs/event_configs.yaml\n", - "2024-12-14 17:15:32.771 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:773 - Event conversion config:\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events\n", + " - output_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events\n", + " - metadata_input_dir: ❌ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/split_and_shard_subjects\n", + "2024-12-14 23:32:29.275 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:769 - Starting event conversion.\n", + "2024-12-14 23:32:29.275 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:771 - Reading event conversion config from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs/event_configs.yaml\n", + "2024-12-14 23:32:29.317 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:773 - Event conversion config:\n", "subject_id_col: subject_id\n", "hosp/admissions:\n", " ed_registration:\n", @@ -1770,742 +1770,742 @@ " valueuom: unitname\n", " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", "\n", - "2024-12-14 17:15:32.780 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/patients/[0-100).parquet\n", - "2024-12-14 17:15:32.781 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.781478. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:32.783 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", - "2024-12-14 17:15:32.783 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:802: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + "2024-12-14 23:32:29.329 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 23:32:29.330 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:29.330428. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:29.332 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 23:32:29.332 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:802: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", " typed_subjects = pl.Series(subjects, dtype=df.schema[input_subject_id_column])\n", - "2024-12-14 17:15:32.790 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/patients/[0-100).parquet\n", - "2024-12-14 17:15:32.794 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", - "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:468: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + "2024-12-14 23:32:29.335 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/transfers/[0-1190).parquet\n", + "2024-12-14 23:32:29.341 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting transfer\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:468: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", " if col not in df.schema:\n", - "2024-12-14 17:15:32.796 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", - "2024-12-14 17:15:32.796 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", - "2024-12-14 17:15:32.796 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", - "2024-12-14 17:15:32.796 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column year_of_birth in possible formats %Y\n", - "2024-12-14 17:15:32.797 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"year_of_birth\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:32.797 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting death\n", - "2024-12-14 17:15:32.797 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dod in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", - "2024-12-14 17:15:32.797 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dod\").str.strptime([String(raise)]).coalesce([col(\"dod\").str.strptime([String(raise)])]).is_not_null()\n", - "2024-12-14 17:15:32.797 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/patients/[0-100).parquet\n", - "2024-12-14 17:15:32.826 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.044811\n", - "2024-12-14 17:15:32.826 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/patients/.[0-100).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/patients/.[0-100).parquet_cache/locks/2024-12-14T17:15:32.781478.json\n", - "2024-12-14 17:15:32.827 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 17:15:32.827 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.827805. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:32.828 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 17:15:32.828 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:32.828 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 17:15:32.832 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting procedure\n", - "2024-12-14 17:15:32.833 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", - "2024-12-14 17:15:32.833 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", - "2024-12-14 17:15:32.833 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", - "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:513: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + "2024-12-14 23:32:29.342 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column careunit\n", + "2024-12-14 23:32:29.342 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column eventtype\n", + "2024-12-14 23:32:29.342 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:513: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", " if v not in df.schema:\n", - "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:517: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:517: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", " is_numeric = df.schema[v].is_numeric()\n", - "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:518: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:518: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", " is_str = df.schema[v] == pl.Utf8\n", - "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:519: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:519: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", " is_cat = isinstance(df.schema[v], pl.Categorical)\n", - "2024-12-14 17:15:32.833 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:32.833 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 17:15:32.838 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010302\n", - "2024-12-14 17:15:32.838 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/procedures_icd/.[0-722).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/procedures_icd/.[0-722).parquet_cache/locks/2024-12-14T17:15:32.827805.json\n", - "2024-12-14 17:15:32.838 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 17:15:32.839 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.838982. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:32.839 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 17:15:32.839 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:32.839 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 17:15:32.843 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hcpcs\n", - "2024-12-14 17:15:32.843 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column short_description\n", - "2024-12-14 17:15:32.843 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", - "2024-12-14 17:15:32.843 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:32.843 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 17:15:32.845 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006657\n", - "2024-12-14 17:15:32.845 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/hcpcsevents/.[0-61).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/hcpcsevents/.[0-61).parquet_cache/locks/2024-12-14T17:15:32.838982.json\n", - "2024-12-14 17:15:32.846 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/icustays/[0-140).parquet\n", - "2024-12-14 17:15:32.846 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.846632. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:32.846 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", - "2024-12-14 17:15:32.846 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:32.847 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/icustays/[0-140).parquet\n", - "2024-12-14 17:15:32.850 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_admission\n", - "2024-12-14 17:15:32.851 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column first_careunit\n", - "2024-12-14 17:15:32.851 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:32.851 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:32.851 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_discharge\n", - "2024-12-14 17:15:32.851 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column last_careunit\n", - "2024-12-14 17:15:32.851 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column outtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:32.852 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"outtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:32.852 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/icustays/[0-140).parquet\n", - "2024-12-14 17:15:32.855 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008629\n", - "2024-12-14 17:15:32.855 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/icustays/.[0-140).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/icustays/.[0-140).parquet_cache/locks/2024-12-14T17:15:32.846632.json\n", - "2024-12-14 17:15:32.855 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 17:15:32.856 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.856164. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:32.856 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 17:15:32.856 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:32.856 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 17:15:32.860 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drg\n", - "2024-12-14 17:15:32.860 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_type\n", - "2024-12-14 17:15:32.861 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_code\n", - "2024-12-14 17:15:32.861 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column description\n", - "2024-12-14 17:15:32.861 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:32.861 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:32.861 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 17:15:32.863 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007601\n", - "2024-12-14 17:15:32.863 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/.[0-454).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/.[0-454).parquet_cache/locks/2024-12-14T17:15:32.856164.json\n", - "2024-12-14 17:15:32.864 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 17:15:32.865 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.865354. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:32.865 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 17:15:32.865 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:32.866 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/inputevents/[0-20404).parquet\n", - "2024-12-14 17:15:32.871 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_start\n", - "2024-12-14 17:15:32.871 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 17:15:32.871 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:32.872 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:32.872 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_end\n", - "2024-12-14 17:15:32.872 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 17:15:32.872 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:32.873 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:32.873 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting subject_weight\n", - "2024-12-14 17:15:32.873 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:32.874 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:32.874 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 17:15:32.888 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.023607\n", - "2024-12-14 17:15:32.889 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/.[0-20404).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/.[0-20404).parquet_cache/locks/2024-12-14T17:15:32.865354.json\n", - "2024-12-14 17:15:32.889 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 17:15:32.890 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.889940. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:32.890 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 17:15:32.890 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:32.890 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 17:15:32.894 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", - "2024-12-14 17:15:32.895 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", - "2024-12-14 17:15:32.895 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", - "2024-12-14 17:15:32.895 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:32.895 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:32.895 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 17:15:32.897 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007572\n", - "2024-12-14 17:15:32.897 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/.[0-4506).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/.[0-4506).parquet_cache/locks/2024-12-14T17:15:32.889940.json\n", - "2024-12-14 17:15:32.898 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 17:15:32.898 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.898624. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:32.898 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 17:15:32.898 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:32.899 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/labevents/[0-107727).parquet\n", - "2024-12-14 17:15:32.903 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", - "2024-12-14 17:15:32.904 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 17:15:32.904 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", - "2024-12-14 17:15:32.904 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:529: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + "2024-12-14 23:32:29.342 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:29.343 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 23:32:29.372 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.042348\n", + "2024-12-14 23:32:29.373 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/transfers/.[0-1190).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/transfers/.[0-1190).parquet_cache/locks/2024-12-14T23:32:29.330428.json\n", + "2024-12-14 23:32:29.375 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 23:32:29.376 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:29.376691. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:29.377 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 23:32:29.377 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:29.378 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/chartevents/[0-668862).parquet\n", + "2024-12-14 23:32:29.387 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting event\n", + "2024-12-14 23:32:29.388 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 23:32:29.388 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 23:32:29.388 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:529: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", " case \"text_value\" if not df.schema[v] == pl.Utf8:\n", - "2024-12-14 17:15:32.904 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:32.905 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 17:15:32.915 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.017034\n", - "2024-12-14 17:15:32.915 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/labevents/.[0-107727).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/labevents/.[0-107727).parquet_cache/locks/2024-12-14T17:15:32.898624.json\n", - "2024-12-14 17:15:32.916 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 17:15:32.917 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.917013. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:32.917 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 17:15:32.917 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:32.917 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 17:15:32.921 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting start\n", - "2024-12-14 17:15:32.922 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 17:15:32.922 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:32.922 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:32.922 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting end\n", - "2024-12-14 17:15:32.923 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 17:15:32.923 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:32.923 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:32.923 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 17:15:32.926 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.009541\n", - "2024-12-14 17:15:32.926 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/.[0-1468).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/.[0-1468).parquet_cache/locks/2024-12-14T17:15:32.917013.json\n", - "2024-12-14 17:15:32.927 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 17:15:32.927 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.927549. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:32.927 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 17:15:32.927 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:32.928 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 17:15:32.931 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_start\n", - "2024-12-14 17:15:32.932 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", - "2024-12-14 17:15:32.932 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", - "2024-12-14 17:15:32.932 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce([col(\"starttime\").str.strptime([String(raise)])]).is_not_null()\n", - "2024-12-14 17:15:32.932 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_stop\n", - "2024-12-14 17:15:32.933 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", - "2024-12-14 17:15:32.933 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column stoptime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", - "2024-12-14 17:15:32.933 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"stoptime\").str.strptime([String(raise)]).coalesce([col(\"stoptime\").str.strptime([String(raise)])]).is_not_null()\n", - "2024-12-14 17:15:32.933 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 17:15:32.940 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.013041\n", - "2024-12-14 17:15:32.940 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/pharmacy/.[0-15306).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/pharmacy/.[0-15306).parquet_cache/locks/2024-12-14T17:15:32.927549.json\n", - "2024-12-14 17:15:32.941 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/[0-275).parquet\n", - "2024-12-14 17:15:32.941 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.941739. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:32.942 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", - "2024-12-14 17:15:32.942 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:32.942 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/admissions/[0-275).parquet\n", - "2024-12-14 17:15:32.946 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_registration\n", - "2024-12-14 17:15:32.946 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edregtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:32.946 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edregtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:32.946 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_out\n", - "2024-12-14 17:15:32.946 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edouttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:32.946 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edouttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:32.946 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission\n", - "2024-12-14 17:15:32.947 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_location\n", - "2024-12-14 17:15:32.947 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_type\n", - "2024-12-14 17:15:32.947 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column admittime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:32.947 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admittime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:32.947 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting discharge\n", - "2024-12-14 17:15:32.948 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column discharge_location\n", - "2024-12-14 17:15:32.948 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dischtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:32.948 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dischtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:32.948 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/[0-275).parquet\n", - "2024-12-14 17:15:32.955 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.013267\n", - "2024-12-14 17:15:32.955 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/.[0-275).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/.[0-275).parquet_cache/locks/2024-12-14T17:15:32.941739.json\n", - "2024-12-14 17:15:32.955 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 17:15:32.956 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.955912. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:32.956 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 17:15:32.956 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:32.956 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/transfers/[0-1190).parquet\n", - "2024-12-14 17:15:32.973 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting transfer\n", - "2024-12-14 17:15:32.975 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column careunit\n", - "2024-12-14 17:15:32.976 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column eventtype\n", - "2024-12-14 17:15:32.977 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:32.978 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:32.979 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 17:15:32.981 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.025516\n", - "2024-12-14 17:15:32.981 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/transfers/.[0-1190).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/transfers/.[0-1190).parquet_cache/locks/2024-12-14T17:15:32.955912.json\n", - "2024-12-14 17:15:32.983 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 17:15:32.986 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:32.984685. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:32.987 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 17:15:32.987 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:32.987 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/chartevents/[0-668862).parquet\n", - "2024-12-14 17:15:32.991 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting event\n", - "2024-12-14 17:15:32.992 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 17:15:32.992 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", - "2024-12-14 17:15:32.992 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:32.992 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:32.992 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 17:15:33.040 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.056118\n", - "2024-12-14 17:15:33.040 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/chartevents/.[0-668862).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/chartevents/.[0-668862).parquet_cache/locks/2024-12-14T17:15:32.984685.json\n", - "2024-12-14 17:15:33.041 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/omr/[0-2964).parquet\n", - "2024-12-14 17:15:33.041 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.041805. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.042 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", - "2024-12-14 17:15:33.042 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.042 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/omr/[0-2964).parquet\n", - "2024-12-14 17:15:33.046 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting omr\n", - "2024-12-14 17:15:33.046 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column result_name\n", - "2024-12-14 17:15:33.046 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", - "2024-12-14 17:15:33.046 | WARNING | MEDS_transforms.extract.convert_to_sharded_events:extract_event:507 - Source column 'col(result_value)' for event column text_value is always interpreted as a column name. Removing col() function call and setting source column to result_value.\n", - "2024-12-14 17:15:33.046 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:558 - Filtering out rows with null codes via col(\"result_name\").is_not_null()\n", - "2024-12-14 17:15:33.047 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.047 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/omr/[0-2964).parquet\n", - "2024-12-14 17:15:33.049 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007480\n", - "2024-12-14 17:15:33.049 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/omr/.[0-2964).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/omr/.[0-2964).parquet_cache/locks/2024-12-14T17:15:33.041805.json\n", - "2024-12-14 17:15:33.050 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 17:15:33.050 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.050716. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.051 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 17:15:33.051 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.051 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/outputevents/[0-9362).parquet\n", - "2024-12-14 17:15:33.055 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting output\n", - "2024-12-14 17:15:33.056 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 17:15:33.056 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", - "2024-12-14 17:15:33.056 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.056 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.056 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 17:15:33.059 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008712\n", - "2024-12-14 17:15:33.059 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/.[0-9362).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/.[0-9362).parquet_cache/locks/2024-12-14T17:15:33.050716.json\n", - "2024-12-14 17:15:33.060 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/emar/[0-35835).parquet\n", - "2024-12-14 17:15:33.060 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.060372. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.060 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", - "2024-12-14 17:15:33.060 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.061 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/emar/[0-35835).parquet\n", - "2024-12-14 17:15:33.065 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication\n", - "2024-12-14 17:15:33.066 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column event_txt\n", - "2024-12-14 17:15:33.066 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", - "2024-12-14 17:15:33.066 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.066 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.066 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/emar/[0-35835).parquet\n", - "2024-12-14 17:15:33.071 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.011269\n", - "2024-12-14 17:15:33.071 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/emar/.[0-35835).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/emar/.[0-35835).parquet_cache/locks/2024-12-14T17:15:33.060372.json\n", - "2024-12-14 17:15:33.072 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/patients/[0-100).parquet\n", - "2024-12-14 17:15:33.072 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.072660. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.072 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", - "2024-12-14 17:15:33.072 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.073 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/patients/[0-100).parquet\n", - "2024-12-14 17:15:33.077 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", - "2024-12-14 17:15:33.077 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", - "2024-12-14 17:15:33.077 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", - "2024-12-14 17:15:33.077 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", - "2024-12-14 17:15:33.077 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column year_of_birth in possible formats %Y\n", - "2024-12-14 17:15:33.077 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"year_of_birth\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.077 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting death\n", - "2024-12-14 17:15:33.077 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dod in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", - "2024-12-14 17:15:33.077 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dod\").str.strptime([String(raise)]).coalesce([col(\"dod\").str.strptime([String(raise)])]).is_not_null()\n", - "2024-12-14 17:15:33.078 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/patients/[0-100).parquet\n", - "2024-12-14 17:15:33.080 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007929\n", - "2024-12-14 17:15:33.080 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/patients/.[0-100).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/patients/.[0-100).parquet_cache/locks/2024-12-14T17:15:33.072660.json\n", - "2024-12-14 17:15:33.081 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 17:15:33.081 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.081784. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.082 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 17:15:33.082 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.082 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 17:15:33.086 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting procedure\n", - "2024-12-14 17:15:33.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", - "2024-12-14 17:15:33.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", - "2024-12-14 17:15:33.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", - "2024-12-14 17:15:33.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.087 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 17:15:33.089 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007410\n", - "2024-12-14 17:15:33.089 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/procedures_icd/.[0-722).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/procedures_icd/.[0-722).parquet_cache/locks/2024-12-14T17:15:33.081784.json\n", - "2024-12-14 17:15:33.089 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 17:15:33.090 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.090034. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.090 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 17:15:33.090 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.090 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 17:15:33.094 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hcpcs\n", - "2024-12-14 17:15:33.094 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column short_description\n", - "2024-12-14 17:15:33.094 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", - "2024-12-14 17:15:33.095 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.095 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 17:15:33.096 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006395\n", - "2024-12-14 17:15:33.096 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/hcpcsevents/.[0-61).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/hcpcsevents/.[0-61).parquet_cache/locks/2024-12-14T17:15:33.090034.json\n", - "2024-12-14 17:15:33.096 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/icustays/[0-140).parquet\n", - "2024-12-14 17:15:33.097 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.097353. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.097 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", - "2024-12-14 17:15:33.097 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.097 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/icustays/[0-140).parquet\n", - "2024-12-14 17:15:33.101 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_admission\n", - "2024-12-14 17:15:33.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column first_careunit\n", - "2024-12-14 17:15:33.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_discharge\n", - "2024-12-14 17:15:33.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column last_careunit\n", - "2024-12-14 17:15:33.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column outtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.102 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"outtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.103 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/icustays/[0-140).parquet\n", - "2024-12-14 17:15:33.105 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007910\n", - "2024-12-14 17:15:33.105 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/icustays/.[0-140).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/icustays/.[0-140).parquet_cache/locks/2024-12-14T17:15:33.097353.json\n", - "2024-12-14 17:15:33.105 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 17:15:33.106 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.106087. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.106 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 17:15:33.106 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.106 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 17:15:33.110 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drg\n", - "2024-12-14 17:15:33.110 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_type\n", - "2024-12-14 17:15:33.110 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_code\n", - "2024-12-14 17:15:33.110 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column description\n", - "2024-12-14 17:15:33.111 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.111 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.111 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 17:15:33.113 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006962\n", - "2024-12-14 17:15:33.113 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/.[0-454).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/.[0-454).parquet_cache/locks/2024-12-14T17:15:33.106087.json\n", - "2024-12-14 17:15:33.114 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 17:15:33.114 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.114649. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.114 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 17:15:33.114 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.115 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/inputevents/[0-20404).parquet\n", - "2024-12-14 17:15:33.119 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_start\n", - "2024-12-14 17:15:33.120 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 17:15:33.120 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.120 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.120 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_end\n", - "2024-12-14 17:15:33.121 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 17:15:33.121 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.122 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.122 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting subject_weight\n", - "2024-12-14 17:15:33.122 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.122 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.122 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 17:15:33.134 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.020059\n", - "2024-12-14 17:15:33.134 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/.[0-20404).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/.[0-20404).parquet_cache/locks/2024-12-14T17:15:33.114649.json\n", - "2024-12-14 17:15:33.135 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 17:15:33.135 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.135615. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.135 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 17:15:33.135 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.136 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 17:15:33.140 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", - "2024-12-14 17:15:33.140 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", - "2024-12-14 17:15:33.140 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", - "2024-12-14 17:15:33.140 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.140 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.140 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 17:15:33.142 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007186\n", - "2024-12-14 17:15:33.142 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/.[0-4506).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/.[0-4506).parquet_cache/locks/2024-12-14T17:15:33.135615.json\n", - "2024-12-14 17:15:33.143 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 17:15:33.143 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.143737. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.143 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 17:15:33.144 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.144 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/labevents/[0-107727).parquet\n", - "2024-12-14 17:15:33.148 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", - "2024-12-14 17:15:33.149 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 17:15:33.149 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", - "2024-12-14 17:15:33.149 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.149 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.149 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 17:15:33.163 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.020243\n", - "2024-12-14 17:15:33.164 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/labevents/.[0-107727).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/labevents/.[0-107727).parquet_cache/locks/2024-12-14T17:15:33.143737.json\n", - "2024-12-14 17:15:33.165 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 17:15:33.165 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.165607. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.165 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 17:15:33.165 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.166 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 17:15:33.170 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting start\n", - "2024-12-14 17:15:33.171 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 17:15:33.171 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.171 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.171 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting end\n", - "2024-12-14 17:15:33.172 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 17:15:33.172 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.172 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.172 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 17:15:33.175 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.009748\n", - "2024-12-14 17:15:33.175 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/.[0-1468).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/.[0-1468).parquet_cache/locks/2024-12-14T17:15:33.165607.json\n", - "2024-12-14 17:15:33.176 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 17:15:33.176 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.176289. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.176 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 17:15:33.176 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.176 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 17:15:33.180 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_start\n", - "2024-12-14 17:15:33.181 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", - "2024-12-14 17:15:33.181 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", - "2024-12-14 17:15:33.181 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce([col(\"starttime\").str.strptime([String(raise)])]).is_not_null()\n", - "2024-12-14 17:15:33.181 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_stop\n", - "2024-12-14 17:15:33.181 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", - "2024-12-14 17:15:33.181 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column stoptime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", - "2024-12-14 17:15:33.182 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"stoptime\").str.strptime([String(raise)]).coalesce([col(\"stoptime\").str.strptime([String(raise)])]).is_not_null()\n", - "2024-12-14 17:15:33.182 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 17:15:33.190 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.014067\n", - "2024-12-14 17:15:33.190 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/pharmacy/.[0-15306).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/pharmacy/.[0-15306).parquet_cache/locks/2024-12-14T17:15:33.176289.json\n", - "2024-12-14 17:15:33.191 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/[0-275).parquet\n", - "2024-12-14 17:15:33.191 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.191592. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.191 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", - "2024-12-14 17:15:33.191 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.192 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/admissions/[0-275).parquet\n", - "2024-12-14 17:15:33.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_registration\n", - "2024-12-14 17:15:33.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edregtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edregtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_out\n", - "2024-12-14 17:15:33.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edouttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edouttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission\n", - "2024-12-14 17:15:33.197 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_location\n", - "2024-12-14 17:15:33.197 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_type\n", - "2024-12-14 17:15:33.197 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column admittime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.197 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admittime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.197 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting discharge\n", - "2024-12-14 17:15:33.198 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column discharge_location\n", - "2024-12-14 17:15:33.198 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dischtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.198 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dischtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.198 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/[0-275).parquet\n", - "2024-12-14 17:15:33.202 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.011248\n", - "2024-12-14 17:15:33.202 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/.[0-275).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/.[0-275).parquet_cache/locks/2024-12-14T17:15:33.191592.json\n", - "2024-12-14 17:15:33.203 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 17:15:33.203 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.203695. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.203 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 17:15:33.204 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/transfers/[0-1190).parquet\n", - "2024-12-14 17:15:33.207 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting transfer\n", - "2024-12-14 17:15:33.208 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column careunit\n", - "2024-12-14 17:15:33.208 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column eventtype\n", - "2024-12-14 17:15:33.208 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.208 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.208 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 17:15:33.210 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006506\n", - "2024-12-14 17:15:33.210 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/transfers/.[0-1190).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/transfers/.[0-1190).parquet_cache/locks/2024-12-14T17:15:33.203695.json\n", - "2024-12-14 17:15:33.210 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 17:15:33.211 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.211185. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.211 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 17:15:33.211 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.211 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/chartevents/[0-668862).parquet\n", - "2024-12-14 17:15:33.215 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting event\n", - "2024-12-14 17:15:33.216 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 17:15:33.216 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", - "2024-12-14 17:15:33.216 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.216 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.216 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 17:15:33.286 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.075577\n", - "2024-12-14 17:15:33.286 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/chartevents/.[0-668862).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/chartevents/.[0-668862).parquet_cache/locks/2024-12-14T17:15:33.211185.json\n", - "2024-12-14 17:15:33.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/omr/[0-2964).parquet\n", - "2024-12-14 17:15:33.287 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.287673. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.287 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", - "2024-12-14 17:15:33.288 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.288 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/omr/[0-2964).parquet\n", - "2024-12-14 17:15:33.291 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting omr\n", - "2024-12-14 17:15:33.292 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column result_name\n", - "2024-12-14 17:15:33.292 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", - "2024-12-14 17:15:33.292 | WARNING | MEDS_transforms.extract.convert_to_sharded_events:extract_event:507 - Source column 'col(result_value)' for event column text_value is always interpreted as a column name. Removing col() function call and setting source column to result_value.\n", - "2024-12-14 17:15:33.292 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:558 - Filtering out rows with null codes via col(\"result_name\").is_not_null()\n", - "2024-12-14 17:15:33.292 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.292 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/omr/[0-2964).parquet\n", - "2024-12-14 17:15:33.294 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007073\n", - "2024-12-14 17:15:33.294 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/omr/.[0-2964).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/omr/.[0-2964).parquet_cache/locks/2024-12-14T17:15:33.287673.json\n", - "2024-12-14 17:15:33.295 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 17:15:33.295 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.295797. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.296 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 17:15:33.296 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.296 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/outputevents/[0-9362).parquet\n", - "2024-12-14 17:15:33.300 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting output\n", - "2024-12-14 17:15:33.301 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 17:15:33.301 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", - "2024-12-14 17:15:33.301 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.301 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.301 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 17:15:33.304 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008910\n", - "2024-12-14 17:15:33.304 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/.[0-9362).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/.[0-9362).parquet_cache/locks/2024-12-14T17:15:33.295797.json\n", - "2024-12-14 17:15:33.305 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/emar/[0-35835).parquet\n", - "2024-12-14 17:15:33.305 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.305567. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.305 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", - "2024-12-14 17:15:33.305 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.306 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/emar/[0-35835).parquet\n", - "2024-12-14 17:15:33.310 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication\n", - "2024-12-14 17:15:33.311 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column event_txt\n", - "2024-12-14 17:15:33.311 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", - "2024-12-14 17:15:33.311 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.311 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.311 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/emar/[0-35835).parquet\n", - "2024-12-14 17:15:33.318 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.013312\n", - "2024-12-14 17:15:33.318 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/emar/.[0-35835).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/emar/.[0-35835).parquet_cache/locks/2024-12-14T17:15:33.305567.json\n", - "2024-12-14 17:15:33.319 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/patients/[0-100).parquet\n", - "2024-12-14 17:15:33.320 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.319931. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.320 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", - "2024-12-14 17:15:33.320 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.320 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/patients/[0-100).parquet\n", - "2024-12-14 17:15:33.324 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", - "2024-12-14 17:15:33.324 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", - "2024-12-14 17:15:33.324 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", - "2024-12-14 17:15:33.324 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", - "2024-12-14 17:15:33.324 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column year_of_birth in possible formats %Y\n", - "2024-12-14 17:15:33.324 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"year_of_birth\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.324 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting death\n", - "2024-12-14 17:15:33.325 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dod in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", - "2024-12-14 17:15:33.325 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dod\").str.strptime([String(raise)]).coalesce([col(\"dod\").str.strptime([String(raise)])]).is_not_null()\n", - "2024-12-14 17:15:33.325 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/patients/[0-100).parquet\n", - "2024-12-14 17:15:33.328 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008224\n", - "2024-12-14 17:15:33.328 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/patients/.[0-100).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/patients/.[0-100).parquet_cache/locks/2024-12-14T17:15:33.319931.json\n", - "2024-12-14 17:15:33.328 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 17:15:33.329 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.329021. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.329 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 17:15:33.329 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.329 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 17:15:33.333 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting procedure\n", - "2024-12-14 17:15:33.333 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", - "2024-12-14 17:15:33.333 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", - "2024-12-14 17:15:33.333 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", - "2024-12-14 17:15:33.334 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.334 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/procedures_icd/[0-722).parquet\n", - "2024-12-14 17:15:33.336 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006998\n", - "2024-12-14 17:15:33.336 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/procedures_icd/.[0-722).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/procedures_icd/.[0-722).parquet_cache/locks/2024-12-14T17:15:33.329021.json\n", - "2024-12-14 17:15:33.336 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 17:15:33.337 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.337131. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.337 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 17:15:33.337 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.337 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 17:15:33.341 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hcpcs\n", - "2024-12-14 17:15:33.342 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column short_description\n", - "2024-12-14 17:15:33.342 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", - "2024-12-14 17:15:33.342 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.342 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/hcpcsevents/[0-61).parquet\n", - "2024-12-14 17:15:33.344 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007560\n", - "2024-12-14 17:15:33.344 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/hcpcsevents/.[0-61).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/hcpcsevents/.[0-61).parquet_cache/locks/2024-12-14T17:15:33.337131.json\n", - "2024-12-14 17:15:33.345 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/icustays/[0-140).parquet\n", - "2024-12-14 17:15:33.345 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.345648. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.345 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", - "2024-12-14 17:15:33.345 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.346 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/icustays/[0-140).parquet\n", - "2024-12-14 17:15:33.350 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_admission\n", - "2024-12-14 17:15:33.350 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column first_careunit\n", - "2024-12-14 17:15:33.350 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.350 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.350 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_discharge\n", - "2024-12-14 17:15:33.351 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column last_careunit\n", - "2024-12-14 17:15:33.351 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column outtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.351 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"outtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.352 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/icustays/[0-140).parquet\n", - "2024-12-14 17:15:33.363 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.017417\n", - "2024-12-14 17:15:33.363 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/icustays/.[0-140).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/icustays/.[0-140).parquet_cache/locks/2024-12-14T17:15:33.345648.json\n", - "2024-12-14 17:15:33.366 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 17:15:33.369 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.368406. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.371 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 17:15:33.371 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.373 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 17:15:33.386 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drg\n", - "2024-12-14 17:15:33.387 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_type\n", - "2024-12-14 17:15:33.387 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_code\n", - "2024-12-14 17:15:33.387 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column description\n", - "2024-12-14 17:15:33.387 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.387 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.387 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/[0-454).parquet\n", - "2024-12-14 17:15:33.390 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.022108\n", - "2024-12-14 17:15:33.390 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/.[0-454).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/.[0-454).parquet_cache/locks/2024-12-14T17:15:33.368406.json\n", - "2024-12-14 17:15:33.391 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 17:15:33.392 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.392174. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.392 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 17:15:33.392 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.392 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/inputevents/[0-20404).parquet\n", - "2024-12-14 17:15:33.397 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_start\n", - "2024-12-14 17:15:33.398 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 17:15:33.398 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.398 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.398 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_end\n", - "2024-12-14 17:15:33.399 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 17:15:33.399 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.400 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.400 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting subject_weight\n", - "2024-12-14 17:15:33.400 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.401 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.401 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/[0-20404).parquet\n", - "2024-12-14 17:15:33.420 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.028113\n", - "2024-12-14 17:15:33.420 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/.[0-20404).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/.[0-20404).parquet_cache/locks/2024-12-14T17:15:33.392174.json\n", - "2024-12-14 17:15:33.421 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 17:15:33.421 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.421362. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.421 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 17:15:33.421 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.422 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 17:15:33.426 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", - "2024-12-14 17:15:33.426 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", - "2024-12-14 17:15:33.426 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", - "2024-12-14 17:15:33.426 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.426 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.426 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/[0-4506).parquet\n", - "2024-12-14 17:15:33.429 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008179\n", - "2024-12-14 17:15:33.429 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/.[0-4506).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/.[0-4506).parquet_cache/locks/2024-12-14T17:15:33.421362.json\n", - "2024-12-14 17:15:33.430 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 17:15:33.430 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.430538. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.430 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 17:15:33.430 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.431 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/labevents/[0-107727).parquet\n", - "2024-12-14 17:15:33.436 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", - "2024-12-14 17:15:33.436 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 17:15:33.436 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", - "2024-12-14 17:15:33.436 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.437 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.437 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/labevents/[0-107727).parquet\n", - "2024-12-14 17:15:33.458 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.028130\n", - "2024-12-14 17:15:33.458 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/labevents/.[0-107727).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/labevents/.[0-107727).parquet_cache/locks/2024-12-14T17:15:33.430538.json\n", - "2024-12-14 17:15:33.459 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 17:15:33.460 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.460250. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.460 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 17:15:33.460 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.460 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 17:15:33.465 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting start\n", - "2024-12-14 17:15:33.466 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 17:15:33.466 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.466 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.466 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting end\n", - "2024-12-14 17:15:33.466 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 17:15:33.467 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.467 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.467 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/[0-1468).parquet\n", - "2024-12-14 17:15:33.470 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010316\n", - "2024-12-14 17:15:33.470 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/.[0-1468).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/.[0-1468).parquet_cache/locks/2024-12-14T17:15:33.460250.json\n", - "2024-12-14 17:15:33.471 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 17:15:33.471 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.471585. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.471 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 17:15:33.471 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.472 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 17:15:33.476 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_start\n", - "2024-12-14 17:15:33.476 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", - "2024-12-14 17:15:33.476 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", - "2024-12-14 17:15:33.476 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce([col(\"starttime\").str.strptime([String(raise)])]).is_not_null()\n", - "2024-12-14 17:15:33.477 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_stop\n", - "2024-12-14 17:15:33.477 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", - "2024-12-14 17:15:33.477 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column stoptime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", - "2024-12-14 17:15:33.477 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"stoptime\").str.strptime([String(raise)]).coalesce([col(\"stoptime\").str.strptime([String(raise)])]).is_not_null()\n", - "2024-12-14 17:15:33.477 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/pharmacy/[0-15306).parquet\n", - "2024-12-14 17:15:33.490 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.019048\n", - "2024-12-14 17:15:33.490 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/pharmacy/.[0-15306).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/pharmacy/.[0-15306).parquet_cache/locks/2024-12-14T17:15:33.471585.json\n", - "2024-12-14 17:15:33.491 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/[0-275).parquet\n", - "2024-12-14 17:15:33.491 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.491829. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.492 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", - "2024-12-14 17:15:33.492 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.492 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/admissions/[0-275).parquet\n", - "2024-12-14 17:15:33.496 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_registration\n", - "2024-12-14 17:15:33.496 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edregtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.496 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edregtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.496 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_out\n", - "2024-12-14 17:15:33.496 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edouttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.496 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edouttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.497 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission\n", - "2024-12-14 17:15:33.497 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_location\n", - "2024-12-14 17:15:33.497 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_type\n", - "2024-12-14 17:15:33.497 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column admittime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.497 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admittime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.497 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting discharge\n", - "2024-12-14 17:15:33.498 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column discharge_location\n", - "2024-12-14 17:15:33.498 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dischtime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.498 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dischtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.498 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/[0-275).parquet\n", - "2024-12-14 17:15:33.503 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.011900\n", - "2024-12-14 17:15:33.503 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/.[0-275).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/.[0-275).parquet_cache/locks/2024-12-14T17:15:33.491829.json\n", - "2024-12-14 17:15:33.504 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 17:15:33.504 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.504429. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.504 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 17:15:33.504 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.504 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/transfers/[0-1190).parquet\n", - "2024-12-14 17:15:33.508 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting transfer\n", - "2024-12-14 17:15:33.508 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column careunit\n", - "2024-12-14 17:15:33.508 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column eventtype\n", - "2024-12-14 17:15:33.508 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.509 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.509 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/transfers/[0-1190).parquet\n", - "2024-12-14 17:15:33.510 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006418\n", - "2024-12-14 17:15:33.510 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/transfers/.[0-1190).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/transfers/.[0-1190).parquet_cache/locks/2024-12-14T17:15:33.504429.json\n", - "2024-12-14 17:15:33.511 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 17:15:33.511 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.511839. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.512 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 17:15:33.512 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.512 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/chartevents/[0-668862).parquet\n", - "2024-12-14 17:15:33.516 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting event\n", - "2024-12-14 17:15:33.516 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 17:15:33.516 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", - "2024-12-14 17:15:33.516 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.517 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.517 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/chartevents/[0-668862).parquet\n", - "2024-12-14 17:15:33.649 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.137863\n", - "2024-12-14 17:15:33.649 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/chartevents/.[0-668862).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/chartevents/.[0-668862).parquet_cache/locks/2024-12-14T17:15:33.511839.json\n", - "2024-12-14 17:15:33.650 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/omr/[0-2964).parquet\n", - "2024-12-14 17:15:33.650 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.650624. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.650 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", - "2024-12-14 17:15:33.651 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.651 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/omr/[0-2964).parquet\n", - "2024-12-14 17:15:33.655 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting omr\n", - "2024-12-14 17:15:33.655 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column result_name\n", - "2024-12-14 17:15:33.655 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", - "2024-12-14 17:15:33.655 | WARNING | MEDS_transforms.extract.convert_to_sharded_events:extract_event:507 - Source column 'col(result_value)' for event column text_value is always interpreted as a column name. Removing col() function call and setting source column to result_value.\n", - "2024-12-14 17:15:33.655 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:558 - Filtering out rows with null codes via col(\"result_name\").is_not_null()\n", - "2024-12-14 17:15:33.655 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.655 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/omr/[0-2964).parquet\n", - "2024-12-14 17:15:33.658 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007397\n", - "2024-12-14 17:15:33.658 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/omr/.[0-2964).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/omr/.[0-2964).parquet_cache/locks/2024-12-14T17:15:33.650624.json\n", - "2024-12-14 17:15:33.658 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 17:15:33.659 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.659087. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.659 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 17:15:33.659 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.659 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/outputevents/[0-9362).parquet\n", - "2024-12-14 17:15:33.663 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting output\n", - "2024-12-14 17:15:33.665 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", - "2024-12-14 17:15:33.665 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", - "2024-12-14 17:15:33.665 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.665 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.665 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 17:15:33.669 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010815\n", - "2024-12-14 17:15:33.669 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/.[0-9362).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/.[0-9362).parquet_cache/locks/2024-12-14T17:15:33.659087.json\n", - "2024-12-14 17:15:33.670 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/emar/[0-35835).parquet\n", - "2024-12-14 17:15:33.670 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:33.670832. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:33.671 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", - "2024-12-14 17:15:33.671 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:33.671 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/emar/[0-35835).parquet\n", - "2024-12-14 17:15:33.675 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication\n", - "2024-12-14 17:15:33.675 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column event_txt\n", - "2024-12-14 17:15:33.675 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", - "2024-12-14 17:15:33.675 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", - "2024-12-14 17:15:33.675 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", - "2024-12-14 17:15:33.675 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/emar/[0-35835).parquet\n", - "2024-12-14 17:15:33.688 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.018080\n", - "2024-12-14 17:15:33.689 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/emar/.[0-35835).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/emar/.[0-35835).parquet_cache/locks/2024-12-14T17:15:33.670832.json\n", - "2024-12-14 17:15:33.689 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:823 - Subsharded into converted events.\n", + "2024-12-14 23:32:29.389 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:29.389 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 23:32:29.520 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.143349\n", + "2024-12-14 23:32:29.520 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/chartevents/.[0-668862).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/chartevents/.[0-668862).parquet_cache/locks/2024-12-14T23:32:29.376691.json\n", + "2024-12-14 23:32:29.521 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 23:32:29.522 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:29.522514. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:29.523 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 23:32:29.523 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:29.524 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 23:32:29.533 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drg\n", + "2024-12-14 23:32:29.534 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_code\n", + "2024-12-14 23:32:29.534 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_type\n", + "2024-12-14 23:32:29.534 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column description\n", + "2024-12-14 23:32:29.535 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:29.535 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:29.535 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 23:32:29.544 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.022143\n", + "2024-12-14 23:32:29.544 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/.[0-454).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/.[0-454).parquet_cache/locks/2024-12-14T23:32:29.522514.json\n", + "2024-12-14 23:32:29.546 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 23:32:29.547 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:29.547451. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:29.548 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 23:32:29.548 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:29.548 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 23:32:29.556 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", + "2024-12-14 23:32:29.557 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", + "2024-12-14 23:32:29.557 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", + "2024-12-14 23:32:29.557 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:29.557 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:29.557 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 23:32:29.564 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.016835\n", + "2024-12-14 23:32:29.564 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/.[0-4506).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/.[0-4506).parquet_cache/locks/2024-12-14T23:32:29.547451.json\n", + "2024-12-14 23:32:29.565 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 23:32:29.566 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:29.566588. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:29.567 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 23:32:29.567 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:29.567 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/outputevents/[0-9362).parquet\n", + "2024-12-14 23:32:29.573 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting output\n", + "2024-12-14 23:32:29.574 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 23:32:29.574 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 23:32:29.574 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:29.575 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:29.575 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 23:32:29.585 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.019274\n", + "2024-12-14 23:32:29.586 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/.[0-9362).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/.[0-9362).parquet_cache/locks/2024-12-14T23:32:29.566588.json\n", + "2024-12-14 23:32:29.588 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 23:32:29.589 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:29.589050. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:29.589 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 23:32:29.589 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:29.590 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 23:32:29.598 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting procedure\n", + "2024-12-14 23:32:29.599 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", + "2024-12-14 23:32:29.599 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", + "2024-12-14 23:32:29.599 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 23:32:29.599 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:29.599 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 23:32:29.605 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.016272\n", + "2024-12-14 23:32:29.605 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/procedures_icd/.[0-722).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/procedures_icd/.[0-722).parquet_cache/locks/2024-12-14T23:32:29.589050.json\n", + "2024-12-14 23:32:29.606 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/[0-275).parquet\n", + "2024-12-14 23:32:29.607 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:29.607396. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:29.607 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", + "2024-12-14 23:32:29.607 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:29.608 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/admissions/[0-275).parquet\n", + "2024-12-14 23:32:29.614 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_registration\n", + "2024-12-14 23:32:29.615 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edregtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:29.615 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edregtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:29.615 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_out\n", + "2024-12-14 23:32:29.615 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edouttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:29.616 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edouttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:29.616 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission\n", + "2024-12-14 23:32:29.616 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_location\n", + "2024-12-14 23:32:29.616 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_type\n", + "2024-12-14 23:32:29.616 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column admittime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:29.617 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admittime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:29.617 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting discharge\n", + "2024-12-14 23:32:29.618 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column discharge_location\n", + "2024-12-14 23:32:29.618 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dischtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:29.618 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dischtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:29.618 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/[0-275).parquet\n", + "2024-12-14 23:32:29.639 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.032434\n", + "2024-12-14 23:32:29.640 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/.[0-275).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/.[0-275).parquet_cache/locks/2024-12-14T23:32:29.607396.json\n", + "2024-12-14 23:32:29.641 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/icustays/[0-140).parquet\n", + "2024-12-14 23:32:29.643 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:29.642754. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:29.643 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", + "2024-12-14 23:32:29.643 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:29.644 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/icustays/[0-140).parquet\n", + "2024-12-14 23:32:29.651 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_admission\n", + "2024-12-14 23:32:29.652 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column first_careunit\n", + "2024-12-14 23:32:29.652 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:29.652 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:29.652 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_discharge\n", + "2024-12-14 23:32:29.653 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column last_careunit\n", + "2024-12-14 23:32:29.653 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column outtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:29.653 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"outtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:29.653 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/icustays/[0-140).parquet\n", + "2024-12-14 23:32:29.663 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.020755\n", + "2024-12-14 23:32:29.663 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/icustays/.[0-140).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/icustays/.[0-140).parquet_cache/locks/2024-12-14T23:32:29.642754.json\n", + "2024-12-14 23:32:29.665 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/omr/[0-2964).parquet\n", + "2024-12-14 23:32:29.667 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:29.666585. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:29.667 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", + "2024-12-14 23:32:29.667 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:29.668 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/omr/[0-2964).parquet\n", + "2024-12-14 23:32:29.677 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting omr\n", + "2024-12-14 23:32:29.678 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column result_name\n", + "2024-12-14 23:32:29.678 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 23:32:29.678 | WARNING | MEDS_transforms.extract.convert_to_sharded_events:extract_event:507 - Source column 'col(result_value)' for event column text_value is always interpreted as a column name. Removing col() function call and setting source column to result_value.\n", + "2024-12-14 23:32:29.678 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:558 - Filtering out rows with null codes via col(\"result_name\").is_not_null()\n", + "2024-12-14 23:32:29.678 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:29.678 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/omr/[0-2964).parquet\n", + "2024-12-14 23:32:29.686 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.019391\n", + "2024-12-14 23:32:29.686 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/omr/.[0-2964).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/omr/.[0-2964).parquet_cache/locks/2024-12-14T23:32:29.666585.json\n", + "2024-12-14 23:32:29.689 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 23:32:29.690 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:29.690314. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:29.690 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 23:32:29.691 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:29.691 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 23:32:29.700 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_start\n", + "2024-12-14 23:32:29.701 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 23:32:29.701 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 23:32:29.701 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce([col(\"starttime\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 23:32:29.703 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_stop\n", + "2024-12-14 23:32:29.704 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 23:32:29.704 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column stoptime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 23:32:29.705 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"stoptime\").str.strptime([String(raise)]).coalesce([col(\"stoptime\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 23:32:29.705 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 23:32:29.734 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.043994\n", + "2024-12-14 23:32:29.734 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/pharmacy/.[0-15306).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/pharmacy/.[0-15306).parquet_cache/locks/2024-12-14T23:32:29.690314.json\n", + "2024-12-14 23:32:29.736 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 23:32:29.737 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:29.737107. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:29.737 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 23:32:29.738 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:29.738 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/labevents/[0-107727).parquet\n", + "2024-12-14 23:32:29.745 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", + "2024-12-14 23:32:29.746 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 23:32:29.746 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 23:32:29.746 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:29.747 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:29.747 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 23:32:29.771 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.034586\n", + "2024-12-14 23:32:29.771 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/labevents/.[0-107727).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/labevents/.[0-107727).parquet_cache/locks/2024-12-14T23:32:29.737107.json\n", + "2024-12-14 23:32:29.772 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/emar/[0-35835).parquet\n", + "2024-12-14 23:32:29.773 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:29.773231. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:29.773 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", + "2024-12-14 23:32:29.773 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:29.774 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/emar/[0-35835).parquet\n", + "2024-12-14 23:32:29.782 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication\n", + "2024-12-14 23:32:29.782 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column event_txt\n", + "2024-12-14 23:32:29.782 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 23:32:29.782 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:29.783 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:29.783 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/emar/[0-35835).parquet\n", + "2024-12-14 23:32:29.798 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.025401\n", + "2024-12-14 23:32:29.798 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/emar/.[0-35835).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/emar/.[0-35835).parquet_cache/locks/2024-12-14T23:32:29.773231.json\n", + "2024-12-14 23:32:29.800 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 23:32:29.801 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:29.800910. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:29.801 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 23:32:29.801 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:29.802 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 23:32:29.811 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting start\n", + "2024-12-14 23:32:29.813 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 23:32:29.813 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:29.814 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:29.814 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting end\n", + "2024-12-14 23:32:29.816 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 23:32:29.816 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:29.817 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:29.817 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 23:32:29.830 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.030036\n", + "2024-12-14 23:32:29.831 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/.[0-1468).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/.[0-1468).parquet_cache/locks/2024-12-14T23:32:29.800910.json\n", + "2024-12-14 23:32:29.832 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 23:32:29.833 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:29.833439. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:29.834 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 23:32:29.834 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:29.834 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 23:32:29.843 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hcpcs\n", + "2024-12-14 23:32:29.843 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column short_description\n", + "2024-12-14 23:32:29.843 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 23:32:29.844 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:29.844 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 23:32:29.849 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.016438\n", + "2024-12-14 23:32:29.850 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/hcpcsevents/.[0-61).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/hcpcsevents/.[0-61).parquet_cache/locks/2024-12-14T23:32:29.833439.json\n", + "2024-12-14 23:32:29.852 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/patients/[0-100).parquet\n", + "2024-12-14 23:32:29.853 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:29.853587. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:29.854 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", + "2024-12-14 23:32:29.854 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:29.855 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/patients/[0-100).parquet\n", + "2024-12-14 23:32:29.864 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", + "2024-12-14 23:32:29.864 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", + "2024-12-14 23:32:29.864 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", + "2024-12-14 23:32:29.865 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", + "2024-12-14 23:32:29.865 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column year_of_birth in possible formats %Y\n", + "2024-12-14 23:32:29.865 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"year_of_birth\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:29.865 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting death\n", + "2024-12-14 23:32:29.866 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dod in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 23:32:29.866 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dod\").str.strptime([String(raise)]).coalesce([col(\"dod\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 23:32:29.866 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/patients/[0-100).parquet\n", + "2024-12-14 23:32:29.877 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.024153\n", + "2024-12-14 23:32:29.878 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/patients/.[0-100).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/patients/.[0-100).parquet_cache/locks/2024-12-14T23:32:29.853587.json\n", + "2024-12-14 23:32:29.882 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 23:32:29.883 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:29.883102. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:29.883 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 23:32:29.883 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:29.884 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/inputevents/[0-20404).parquet\n", + "2024-12-14 23:32:29.894 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_start\n", + "2024-12-14 23:32:29.895 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 23:32:29.895 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:29.896 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:29.896 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_end\n", + "2024-12-14 23:32:29.897 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 23:32:29.897 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:29.898 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:29.898 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting subject_weight\n", + "2024-12-14 23:32:29.898 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:29.899 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:29.899 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 23:32:29.935 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.052428\n", + "2024-12-14 23:32:29.935 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/.[0-20404).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/.[0-20404).parquet_cache/locks/2024-12-14T23:32:29.883102.json\n", + "2024-12-14 23:32:29.936 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 23:32:29.938 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:29.937964. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:29.938 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 23:32:29.938 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:29.939 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/transfers/[0-1190).parquet\n", + "2024-12-14 23:32:29.945 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting transfer\n", + "2024-12-14 23:32:29.945 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column careunit\n", + "2024-12-14 23:32:29.946 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column eventtype\n", + "2024-12-14 23:32:29.946 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:29.946 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:29.946 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 23:32:29.952 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.014822\n", + "2024-12-14 23:32:29.952 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/transfers/.[0-1190).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/transfers/.[0-1190).parquet_cache/locks/2024-12-14T23:32:29.937964.json\n", + "2024-12-14 23:32:29.955 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 23:32:29.956 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:29.956236. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:29.956 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 23:32:29.957 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:29.958 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/chartevents/[0-668862).parquet\n", + "2024-12-14 23:32:29.964 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting event\n", + "2024-12-14 23:32:29.965 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 23:32:29.965 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 23:32:29.965 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:29.965 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:29.966 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 23:32:30.114 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.157921\n", + "2024-12-14 23:32:30.114 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/chartevents/.[0-668862).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/chartevents/.[0-668862).parquet_cache/locks/2024-12-14T23:32:29.956236.json\n", + "2024-12-14 23:32:30.115 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 23:32:30.115 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:30.115792. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:30.116 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 23:32:30.116 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:30.116 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 23:32:30.122 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drg\n", + "2024-12-14 23:32:30.123 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_code\n", + "2024-12-14 23:32:30.123 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_type\n", + "2024-12-14 23:32:30.123 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column description\n", + "2024-12-14 23:32:30.123 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:30.124 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.124 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 23:32:30.132 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.016833\n", + "2024-12-14 23:32:30.132 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/.[0-454).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/.[0-454).parquet_cache/locks/2024-12-14T23:32:30.115792.json\n", + "2024-12-14 23:32:30.134 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 23:32:30.136 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:30.135840. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:30.136 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 23:32:30.137 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:30.137 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 23:32:30.146 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", + "2024-12-14 23:32:30.147 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", + "2024-12-14 23:32:30.147 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", + "2024-12-14 23:32:30.147 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:30.147 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.147 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 23:32:30.155 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.019124\n", + "2024-12-14 23:32:30.155 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/.[0-4506).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/.[0-4506).parquet_cache/locks/2024-12-14T23:32:30.135840.json\n", + "2024-12-14 23:32:30.157 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 23:32:30.159 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:30.159151. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:30.160 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 23:32:30.160 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:30.161 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/outputevents/[0-9362).parquet\n", + "2024-12-14 23:32:30.169 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting output\n", + "2024-12-14 23:32:30.170 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 23:32:30.170 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 23:32:30.170 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:30.171 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.171 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 23:32:30.184 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.025153\n", + "2024-12-14 23:32:30.184 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/.[0-9362).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/.[0-9362).parquet_cache/locks/2024-12-14T23:32:30.159151.json\n", + "2024-12-14 23:32:30.186 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 23:32:30.187 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:30.187163. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:30.187 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 23:32:30.187 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:30.188 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 23:32:30.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting procedure\n", + "2024-12-14 23:32:30.197 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", + "2024-12-14 23:32:30.197 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", + "2024-12-14 23:32:30.197 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 23:32:30.197 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.197 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 23:32:30.203 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.016239\n", + "2024-12-14 23:32:30.203 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/procedures_icd/.[0-722).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/procedures_icd/.[0-722).parquet_cache/locks/2024-12-14T23:32:30.187163.json\n", + "2024-12-14 23:32:30.205 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/[0-275).parquet\n", + "2024-12-14 23:32:30.206 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:30.206162. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:30.206 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", + "2024-12-14 23:32:30.206 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:30.207 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/admissions/[0-275).parquet\n", + "2024-12-14 23:32:30.217 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_registration\n", + "2024-12-14 23:32:30.218 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edregtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:30.218 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edregtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.218 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_out\n", + "2024-12-14 23:32:30.218 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edouttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:30.219 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edouttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.219 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission\n", + "2024-12-14 23:32:30.220 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_location\n", + "2024-12-14 23:32:30.220 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_type\n", + "2024-12-14 23:32:30.220 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column admittime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:30.221 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admittime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.221 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting discharge\n", + "2024-12-14 23:32:30.222 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column discharge_location\n", + "2024-12-14 23:32:30.222 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dischtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:30.223 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dischtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.223 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/[0-275).parquet\n", + "2024-12-14 23:32:30.242 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.036702\n", + "2024-12-14 23:32:30.243 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/.[0-275).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/.[0-275).parquet_cache/locks/2024-12-14T23:32:30.206162.json\n", + "2024-12-14 23:32:30.244 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/icustays/[0-140).parquet\n", + "2024-12-14 23:32:30.245 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:30.245389. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:30.245 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", + "2024-12-14 23:32:30.246 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:30.246 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/icustays/[0-140).parquet\n", + "2024-12-14 23:32:30.253 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_admission\n", + "2024-12-14 23:32:30.253 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column first_careunit\n", + "2024-12-14 23:32:30.253 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:30.254 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.254 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_discharge\n", + "2024-12-14 23:32:30.254 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column last_careunit\n", + "2024-12-14 23:32:30.254 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column outtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:30.255 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"outtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.255 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/icustays/[0-140).parquet\n", + "2024-12-14 23:32:30.264 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.019570\n", + "2024-12-14 23:32:30.265 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/icustays/.[0-140).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/icustays/.[0-140).parquet_cache/locks/2024-12-14T23:32:30.245389.json\n", + "2024-12-14 23:32:30.266 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/omr/[0-2964).parquet\n", + "2024-12-14 23:32:30.267 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:30.267396. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:30.267 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", + "2024-12-14 23:32:30.268 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:30.268 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/omr/[0-2964).parquet\n", + "2024-12-14 23:32:30.276 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting omr\n", + "2024-12-14 23:32:30.276 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column result_name\n", + "2024-12-14 23:32:30.277 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 23:32:30.277 | WARNING | MEDS_transforms.extract.convert_to_sharded_events:extract_event:507 - Source column 'col(result_value)' for event column text_value is always interpreted as a column name. Removing col() function call and setting source column to result_value.\n", + "2024-12-14 23:32:30.277 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:558 - Filtering out rows with null codes via col(\"result_name\").is_not_null()\n", + "2024-12-14 23:32:30.277 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.277 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/omr/[0-2964).parquet\n", + "2024-12-14 23:32:30.282 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.015276\n", + "2024-12-14 23:32:30.282 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/omr/.[0-2964).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/omr/.[0-2964).parquet_cache/locks/2024-12-14T23:32:30.267396.json\n", + "2024-12-14 23:32:30.283 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 23:32:30.284 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:30.284365. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:30.284 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 23:32:30.284 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:30.285 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 23:32:30.291 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_start\n", + "2024-12-14 23:32:30.291 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 23:32:30.291 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 23:32:30.292 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce([col(\"starttime\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 23:32:30.292 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_stop\n", + "2024-12-14 23:32:30.293 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 23:32:30.293 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column stoptime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 23:32:30.293 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"stoptime\").str.strptime([String(raise)]).coalesce([col(\"stoptime\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 23:32:30.293 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 23:32:30.326 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.042024\n", + "2024-12-14 23:32:30.326 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/pharmacy/.[0-15306).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/pharmacy/.[0-15306).parquet_cache/locks/2024-12-14T23:32:30.284365.json\n", + "2024-12-14 23:32:30.328 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 23:32:30.329 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:30.329061. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:30.329 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 23:32:30.329 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:30.330 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/labevents/[0-107727).parquet\n", + "2024-12-14 23:32:30.337 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", + "2024-12-14 23:32:30.338 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 23:32:30.338 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 23:32:30.338 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:30.339 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.339 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 23:32:30.376 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.047272\n", + "2024-12-14 23:32:30.376 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/labevents/.[0-107727).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/labevents/.[0-107727).parquet_cache/locks/2024-12-14T23:32:30.329061.json\n", + "2024-12-14 23:32:30.377 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/emar/[0-35835).parquet\n", + "2024-12-14 23:32:30.378 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:30.377864. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:30.378 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", + "2024-12-14 23:32:30.378 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:30.379 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/emar/[0-35835).parquet\n", + "2024-12-14 23:32:30.384 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication\n", + "2024-12-14 23:32:30.385 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column event_txt\n", + "2024-12-14 23:32:30.385 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 23:32:30.385 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:30.386 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.386 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/emar/[0-35835).parquet\n", + "2024-12-14 23:32:30.410 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.032700\n", + "2024-12-14 23:32:30.410 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/emar/.[0-35835).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/emar/.[0-35835).parquet_cache/locks/2024-12-14T23:32:30.377864.json\n", + "2024-12-14 23:32:30.413 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 23:32:30.414 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:30.413922. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:30.414 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 23:32:30.414 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:30.415 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 23:32:30.422 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting start\n", + "2024-12-14 23:32:30.423 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 23:32:30.423 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:30.423 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.423 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting end\n", + "2024-12-14 23:32:30.424 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 23:32:30.424 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:30.425 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.425 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 23:32:30.439 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.025069\n", + "2024-12-14 23:32:30.439 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/.[0-1468).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/.[0-1468).parquet_cache/locks/2024-12-14T23:32:30.413922.json\n", + "2024-12-14 23:32:30.440 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 23:32:30.441 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:30.441348. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:30.442 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 23:32:30.442 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:30.442 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 23:32:30.450 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hcpcs\n", + "2024-12-14 23:32:30.451 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column short_description\n", + "2024-12-14 23:32:30.451 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 23:32:30.451 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.451 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 23:32:30.458 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.016908\n", + "2024-12-14 23:32:30.458 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/hcpcsevents/.[0-61).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/hcpcsevents/.[0-61).parquet_cache/locks/2024-12-14T23:32:30.441348.json\n", + "2024-12-14 23:32:30.460 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/patients/[0-100).parquet\n", + "2024-12-14 23:32:30.462 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:30.462053. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:30.463 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", + "2024-12-14 23:32:30.463 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:30.464 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/patients/[0-100).parquet\n", + "2024-12-14 23:32:30.473 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", + "2024-12-14 23:32:30.473 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", + "2024-12-14 23:32:30.474 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", + "2024-12-14 23:32:30.474 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", + "2024-12-14 23:32:30.474 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column year_of_birth in possible formats %Y\n", + "2024-12-14 23:32:30.474 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"year_of_birth\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.474 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting death\n", + "2024-12-14 23:32:30.475 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dod in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 23:32:30.475 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dod\").str.strptime([String(raise)]).coalesce([col(\"dod\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 23:32:30.475 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/patients/[0-100).parquet\n", + "2024-12-14 23:32:30.486 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.024266\n", + "2024-12-14 23:32:30.486 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/patients/.[0-100).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/patients/.[0-100).parquet_cache/locks/2024-12-14T23:32:30.462053.json\n", + "2024-12-14 23:32:30.489 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 23:32:30.490 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:30.489817. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:30.490 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 23:32:30.490 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:30.490 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/inputevents/[0-20404).parquet\n", + "2024-12-14 23:32:30.498 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_start\n", + "2024-12-14 23:32:30.499 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 23:32:30.499 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:30.500 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.500 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_end\n", + "2024-12-14 23:32:30.501 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 23:32:30.501 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:30.502 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.502 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting subject_weight\n", + "2024-12-14 23:32:30.502 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:30.503 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.503 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 23:32:30.536 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.046372\n", + "2024-12-14 23:32:30.536 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/.[0-20404).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/.[0-20404).parquet_cache/locks/2024-12-14T23:32:30.489817.json\n", + "2024-12-14 23:32:30.537 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 23:32:30.538 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:30.537896. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:30.538 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 23:32:30.538 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:30.538 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/transfers/[0-1190).parquet\n", + "2024-12-14 23:32:30.544 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting transfer\n", + "2024-12-14 23:32:30.545 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column careunit\n", + "2024-12-14 23:32:30.545 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column eventtype\n", + "2024-12-14 23:32:30.545 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:30.545 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.545 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/transfers/[0-1190).parquet\n", + "2024-12-14 23:32:30.550 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.013045\n", + "2024-12-14 23:32:30.551 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/transfers/.[0-1190).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/transfers/.[0-1190).parquet_cache/locks/2024-12-14T23:32:30.537896.json\n", + "2024-12-14 23:32:30.552 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 23:32:30.553 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:30.552843. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:30.553 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 23:32:30.553 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:30.553 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/chartevents/[0-668862).parquet\n", + "2024-12-14 23:32:30.559 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting event\n", + "2024-12-14 23:32:30.560 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 23:32:30.560 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 23:32:30.560 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:30.561 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.561 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/chartevents/[0-668862).parquet\n", + "2024-12-14 23:32:30.834 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.281841\n", + "2024-12-14 23:32:30.834 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/chartevents/.[0-668862).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/chartevents/.[0-668862).parquet_cache/locks/2024-12-14T23:32:30.552843.json\n", + "2024-12-14 23:32:30.835 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 23:32:30.836 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:30.836291. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:30.836 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 23:32:30.836 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:30.837 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 23:32:30.843 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drg\n", + "2024-12-14 23:32:30.843 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_code\n", + "2024-12-14 23:32:30.843 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drg_type\n", + "2024-12-14 23:32:30.843 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column description\n", + "2024-12-14 23:32:30.843 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:30.844 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.844 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/[0-454).parquet\n", + "2024-12-14 23:32:30.853 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.017661\n", + "2024-12-14 23:32:30.854 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/.[0-454).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/.[0-454).parquet_cache/locks/2024-12-14T23:32:30.836291.json\n", + "2024-12-14 23:32:30.855 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 23:32:30.857 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:30.856806. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:30.857 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 23:32:30.857 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:30.858 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 23:32:30.866 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", + "2024-12-14 23:32:30.867 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", + "2024-12-14 23:32:30.867 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", + "2024-12-14 23:32:30.867 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column hadm_discharge_time in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:30.868 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hadm_discharge_time\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.868 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/[0-4506).parquet\n", + "2024-12-14 23:32:30.877 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.020737\n", + "2024-12-14 23:32:30.877 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/.[0-4506).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/.[0-4506).parquet_cache/locks/2024-12-14T23:32:30.856806.json\n", + "2024-12-14 23:32:30.880 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 23:32:30.882 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:30.882287. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:30.883 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 23:32:30.883 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:30.884 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/outputevents/[0-9362).parquet\n", + "2024-12-14 23:32:30.891 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting output\n", + "2024-12-14 23:32:30.892 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 23:32:30.893 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 23:32:30.893 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:30.893 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.893 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 23:32:30.909 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.027061\n", + "2024-12-14 23:32:30.909 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/.[0-9362).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/.[0-9362).parquet_cache/locks/2024-12-14T23:32:30.882287.json\n", + "2024-12-14 23:32:30.911 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 23:32:30.912 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:30.912113. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:30.912 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 23:32:30.912 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:30.913 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 23:32:30.921 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting procedure\n", + "2024-12-14 23:32:30.922 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_code\n", + "2024-12-14 23:32:30.922 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd_version\n", + "2024-12-14 23:32:30.923 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 23:32:30.923 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.923 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/procedures_icd/[0-722).parquet\n", + "2024-12-14 23:32:30.928 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.016660\n", + "2024-12-14 23:32:30.928 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/procedures_icd/.[0-722).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/procedures_icd/.[0-722).parquet_cache/locks/2024-12-14T23:32:30.912113.json\n", + "2024-12-14 23:32:30.930 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/[0-275).parquet\n", + "2024-12-14 23:32:30.931 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:30.931376. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:30.931 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/admissions/[0-275).parquet\n", + "2024-12-14 23:32:30.932 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:30.932 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/admissions/[0-275).parquet\n", + "2024-12-14 23:32:30.942 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_registration\n", + "2024-12-14 23:32:30.942 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edregtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:30.943 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edregtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.943 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ed_out\n", + "2024-12-14 23:32:30.943 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column edouttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:30.943 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"edouttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.943 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission\n", + "2024-12-14 23:32:30.944 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_location\n", + "2024-12-14 23:32:30.945 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admission_type\n", + "2024-12-14 23:32:30.945 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column admittime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:30.946 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admittime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.946 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting discharge\n", + "2024-12-14 23:32:30.947 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column discharge_location\n", + "2024-12-14 23:32:30.947 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dischtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:30.947 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dischtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.947 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/[0-275).parquet\n", + "2024-12-14 23:32:30.966 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.034843\n", + "2024-12-14 23:32:30.966 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/.[0-275).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/.[0-275).parquet_cache/locks/2024-12-14T23:32:30.931376.json\n", + "2024-12-14 23:32:30.968 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/icustays/[0-140).parquet\n", + "2024-12-14 23:32:30.969 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:30.968811. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:30.969 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/icustays/[0-140).parquet\n", + "2024-12-14 23:32:30.969 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:30.969 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/icustays/[0-140).parquet\n", + "2024-12-14 23:32:30.976 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_admission\n", + "2024-12-14 23:32:30.976 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column first_careunit\n", + "2024-12-14 23:32:30.976 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column intime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:30.977 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"intime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.977 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting icu_discharge\n", + "2024-12-14 23:32:30.977 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column last_careunit\n", + "2024-12-14 23:32:30.977 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column outtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:30.978 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"outtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.978 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/icustays/[0-140).parquet\n", + "2024-12-14 23:32:30.986 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.017256\n", + "2024-12-14 23:32:30.986 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/icustays/.[0-140).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/icustays/.[0-140).parquet_cache/locks/2024-12-14T23:32:30.968811.json\n", + "2024-12-14 23:32:30.987 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/omr/[0-2964).parquet\n", + "2024-12-14 23:32:30.988 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:30.988117. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:30.988 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/omr/[0-2964).parquet\n", + "2024-12-14 23:32:30.988 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:30.989 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/omr/[0-2964).parquet\n", + "2024-12-14 23:32:30.995 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting omr\n", + "2024-12-14 23:32:30.996 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column result_name\n", + "2024-12-14 23:32:30.996 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 23:32:30.996 | WARNING | MEDS_transforms.extract.convert_to_sharded_events:extract_event:507 - Source column 'col(result_value)' for event column text_value is always interpreted as a column name. Removing col() function call and setting source column to result_value.\n", + "2024-12-14 23:32:30.996 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:558 - Filtering out rows with null codes via col(\"result_name\").is_not_null()\n", + "2024-12-14 23:32:30.996 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:30.996 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/omr/[0-2964).parquet\n", + "2024-12-14 23:32:31.001 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.013443\n", + "2024-12-14 23:32:31.001 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/omr/.[0-2964).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/omr/.[0-2964).parquet_cache/locks/2024-12-14T23:32:30.988117.json\n", + "2024-12-14 23:32:31.002 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 23:32:31.003 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:31.003083. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:31.003 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 23:32:31.003 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:31.003 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 23:32:31.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_start\n", + "2024-12-14 23:32:31.010 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 23:32:31.010 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 23:32:31.010 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce([col(\"starttime\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 23:32:31.010 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication_stop\n", + "2024-12-14 23:32:31.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 23:32:31.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column stoptime in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 23:32:31.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"stoptime\").str.strptime([String(raise)]).coalesce([col(\"stoptime\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 23:32:31.011 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/pharmacy/[0-15306).parquet\n", + "2024-12-14 23:32:31.050 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.047293\n", + "2024-12-14 23:32:31.050 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/pharmacy/.[0-15306).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/pharmacy/.[0-15306).parquet_cache/locks/2024-12-14T23:32:31.003083.json\n", + "2024-12-14 23:32:31.051 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 23:32:31.052 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:31.052161. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:31.052 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 23:32:31.052 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:31.053 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/labevents/[0-107727).parquet\n", + "2024-12-14 23:32:31.058 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", + "2024-12-14 23:32:31.059 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 23:32:31.059 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column valueuom\n", + "2024-12-14 23:32:31.059 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:31.060 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:31.060 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/labevents/[0-107727).parquet\n", + "2024-12-14 23:32:31.111 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.059419\n", + "2024-12-14 23:32:31.111 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/labevents/.[0-107727).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/labevents/.[0-107727).parquet_cache/locks/2024-12-14T23:32:31.052161.json\n", + "2024-12-14 23:32:31.112 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/emar/[0-35835).parquet\n", + "2024-12-14 23:32:31.113 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:31.113054. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:31.113 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/emar/[0-35835).parquet\n", + "2024-12-14 23:32:31.113 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:31.114 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/emar/[0-35835).parquet\n", + "2024-12-14 23:32:31.119 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting medication\n", + "2024-12-14 23:32:31.120 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column event_txt\n", + "2024-12-14 23:32:31.120 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column medication\n", + "2024-12-14 23:32:31.120 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column charttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:31.121 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"charttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:31.121 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/emar/[0-35835).parquet\n", + "2024-12-14 23:32:31.162 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.049377\n", + "2024-12-14 23:32:31.162 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/emar/.[0-35835).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/emar/.[0-35835).parquet_cache/locks/2024-12-14T23:32:31.113054.json\n", + "2024-12-14 23:32:31.164 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 23:32:31.165 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:31.165140. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:31.165 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 23:32:31.165 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:31.166 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 23:32:31.172 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting start\n", + "2024-12-14 23:32:31.174 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 23:32:31.174 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:31.174 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:31.174 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting end\n", + "2024-12-14 23:32:31.175 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 23:32:31.176 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:31.176 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:31.176 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/[0-1468).parquet\n", + "2024-12-14 23:32:31.191 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.025862\n", + "2024-12-14 23:32:31.191 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/.[0-1468).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/.[0-1468).parquet_cache/locks/2024-12-14T23:32:31.165140.json\n", + "2024-12-14 23:32:31.192 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 23:32:31.193 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:31.193243. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:31.193 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 23:32:31.194 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:31.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 23:32:31.203 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hcpcs\n", + "2024-12-14 23:32:31.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column short_description\n", + "2024-12-14 23:32:31.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column chartdate in possible formats %Y-%m-%d\n", + "2024-12-14 23:32:31.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"chartdate\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:31.204 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/hcpcsevents/[0-61).parquet\n", + "2024-12-14 23:32:31.210 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.017202\n", + "2024-12-14 23:32:31.210 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/hcpcsevents/.[0-61).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/hcpcsevents/.[0-61).parquet_cache/locks/2024-12-14T23:32:31.193243.json\n", + "2024-12-14 23:32:31.213 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/patients/[0-100).parquet\n", + "2024-12-14 23:32:31.214 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:31.214208. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:31.215 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/hosp/patients/[0-100).parquet\n", + "2024-12-14 23:32:31.215 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:31.216 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for hosp/patients/[0-100).parquet\n", + "2024-12-14 23:32:31.225 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", + "2024-12-14 23:32:31.226 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", + "2024-12-14 23:32:31.226 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", + "2024-12-14 23:32:31.226 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", + "2024-12-14 23:32:31.226 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column year_of_birth in possible formats %Y\n", + "2024-12-14 23:32:31.227 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"year_of_birth\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:31.227 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting death\n", + "2024-12-14 23:32:31.227 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column dod in possible formats %Y-%m-%d %H:%M:%S, %Y-%m-%d\n", + "2024-12-14 23:32:31.227 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dod\").str.strptime([String(raise)]).coalesce([col(\"dod\").str.strptime([String(raise)])]).is_not_null()\n", + "2024-12-14 23:32:31.227 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/patients/[0-100).parquet\n", + "2024-12-14 23:32:31.238 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.024432\n", + "2024-12-14 23:32:31.238 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/patients/.[0-100).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/patients/.[0-100).parquet_cache/locks/2024-12-14T23:32:31.214208.json\n", + "2024-12-14 23:32:31.243 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 23:32:31.244 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:31.244014. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:31.244 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/shard_events/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 23:32:31.244 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:31.245 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for icu/inputevents/[0-20404).parquet\n", + "2024-12-14 23:32:31.255 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_start\n", + "2024-12-14 23:32:31.256 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 23:32:31.256 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:31.256 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:31.257 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting input_end\n", + "2024-12-14 23:32:31.257 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column itemid\n", + "2024-12-14 23:32:31.257 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column endtime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:31.258 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"endtime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:31.258 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting subject_weight\n", + "2024-12-14 23:32:31.259 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:484 - Adding time column starttime in possible formats %Y-%m-%d %H:%M:%S\n", + "2024-12-14 23:32:31.259 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"starttime\").str.strptime([String(raise)]).coalesce().is_not_null()\n", + "2024-12-14 23:32:31.259 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/[0-20404).parquet\n", + "2024-12-14 23:32:31.310 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.066946\n", + "2024-12-14 23:32:31.311 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/.[0-20404).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/.[0-20404).parquet_cache/locks/2024-12-14T23:32:31.244014.json\n", + "2024-12-14 23:32:31.311 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:823 - Subsharded into converted events.\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:33.763\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: merge_to_MEDS_cohort\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:33.769\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-merge_to_MEDS_cohort --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=merge_to_MEDS_cohort\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:35.153\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 23:32:31.487\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: merge_to_MEDS_cohort\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:31.502\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-merge_to_MEDS_cohort --config-dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=merge_to_MEDS_cohort\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:33.646\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:35.154\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 17:15:34.367 | INFO | MEDS_transforms.utils:stage_init:73 - Running merge_to_MEDS_cohort with the following configuration:\n", + "\u001b[32m2024-12-14 23:32:33.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 23:32:32.215 | INFO | MEDS_transforms.utils:stage_init:73 - Running merge_to_MEDS_cohort with the following configuration:\n", "input_dir: ${oc.env:MIMICIV_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:MIMICIV_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -2574,100 +2574,100 @@ "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", "\n", - "2024-12-14 17:15:34.382 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "2024-12-14 23:32:32.237 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", "unique_by: '*'\n", "additional_sort_by: null\n", "is_metadata: false\n", - "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events\n", - "metadata_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/split_and_shard_subjects\n", - "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", + "data_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events\n", + "metadata_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/split_and_shard_subjects\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", "reducer_output_dir: null\n", "\n", "Paths: (checkbox indicates if it exists)\n", - " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events\n", - " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", - " - metadata_input_dir: ❌ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/split_and_shard_subjects\n", - "2024-12-14 17:15:34.397 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator_by_shard_map:687 - Mapping computation over a maximum of 3 shards\n", - "2024-12-14 17:15:34.404 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", - "2024-12-14 17:15:34.405 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:34.405491. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:34.407 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0\n", - "2024-12-14 17:15:34.408 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 16 files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/[0-275).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/[0-4506).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/[0-454).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/emar/[0-35835).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/hcpcsevents/[0-61).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/labevents/[0-107727).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/omr/[0-2964).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/patients/[0-100).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/pharmacy/[0-15306).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/procedures_icd/[0-722).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/transfers/[0-1190).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/icustays/[0-140).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/chartevents/[0-668862).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/[0-1468).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/[0-20404).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 17:15:34.417 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:34.417 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", - "2024-12-14 17:15:34.536 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.130660\n", - "2024-12-14 17:15:34.536 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/.0.parquet_cache/locks/2024-12-14T17:15:34.405491.json\n", - "2024-12-14 17:15:34.536 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", - "2024-12-14 17:15:34.537 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:34.537340. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:34.537 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0\n", - "2024-12-14 17:15:34.538 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 16 files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/[0-275).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/[0-4506).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/[0-454).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/emar/[0-35835).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/hcpcsevents/[0-61).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/labevents/[0-107727).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/omr/[0-2964).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/patients/[0-100).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/pharmacy/[0-15306).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/procedures_icd/[0-722).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/transfers/[0-1190).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/icustays/[0-140).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/chartevents/[0-668862).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/[0-1468).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/[0-20404).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 17:15:34.540 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:34.540 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", - "2024-12-14 17:15:34.716 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.178790\n", - "2024-12-14 17:15:34.716 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/.0.parquet_cache/locks/2024-12-14T17:15:34.537340.json\n", - "2024-12-14 17:15:34.716 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", - "2024-12-14 17:15:34.717 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:34.717118. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:34.717 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0\n", - "2024-12-14 17:15:34.718 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 16 files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/[0-275).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/[0-4506).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/[0-454).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/emar/[0-35835).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/hcpcsevents/[0-61).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/labevents/[0-107727).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/omr/[0-2964).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/patients/[0-100).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/pharmacy/[0-15306).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/procedures_icd/[0-722).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/transfers/[0-1190).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/icustays/[0-140).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/chartevents/[0-668862).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/[0-1468).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/[0-20404).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/[0-9362).parquet\n", - "2024-12-14 17:15:34.720 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:34.720 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", - "2024-12-14 17:15:35.081 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.364595\n", - "2024-12-14 17:15:35.081 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/.0.parquet_cache/locks/2024-12-14T17:15:34.717118.json\n", - "2024-12-14 17:15:35.082 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:00.699477\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events\n", + " - output_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", + " - metadata_input_dir: ❌ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/split_and_shard_subjects\n", + "2024-12-14 23:32:32.259 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator_by_shard_map:687 - Mapping computation over a maximum of 3 shards\n", + "2024-12-14 23:32:32.270 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0 into /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", + "2024-12-14 23:32:32.271 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:32.271660. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:32.273 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0\n", + "2024-12-14 23:32:32.275 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 16 files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/admissions/[0-275).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/diagnoses_icd/[0-4506).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/drgcodes/[0-454).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/emar/[0-35835).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/hcpcsevents/[0-61).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/labevents/[0-107727).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/omr/[0-2964).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/patients/[0-100).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/pharmacy/[0-15306).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/procedures_icd/[0-722).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/hosp/transfers/[0-1190).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/icustays/[0-140).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/chartevents/[0-668862).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/procedureevents/[0-1468).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/inputevents/[0-20404).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/held_out/0/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 23:32:32.279 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:32.280 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", + "2024-12-14 23:32:32.493 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.221412\n", + "2024-12-14 23:32:32.493 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/.0.parquet_cache/locks/2024-12-14T23:32:32.271660.json\n", + "2024-12-14 23:32:32.493 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0 into /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", + "2024-12-14 23:32:32.494 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:32.494340. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:32.494 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0\n", + "2024-12-14 23:32:32.496 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 16 files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/admissions/[0-275).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/diagnoses_icd/[0-4506).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/drgcodes/[0-454).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/emar/[0-35835).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/hcpcsevents/[0-61).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/labevents/[0-107727).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/omr/[0-2964).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/patients/[0-100).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/pharmacy/[0-15306).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/procedures_icd/[0-722).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/hosp/transfers/[0-1190).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/icustays/[0-140).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/chartevents/[0-668862).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/procedureevents/[0-1468).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/inputevents/[0-20404).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/tuning/0/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 23:32:32.499 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:32.499 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", + "2024-12-14 23:32:32.859 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.365329\n", + "2024-12-14 23:32:32.859 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/.0.parquet_cache/locks/2024-12-14T23:32:32.494340.json\n", + "2024-12-14 23:32:32.860 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0 into /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", + "2024-12-14 23:32:32.861 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:32.861170. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:32.861 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0\n", + "2024-12-14 23:32:32.863 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 16 files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/admissions/[0-275).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/diagnoses_icd/[0-4506).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/drgcodes/[0-454).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/emar/[0-35835).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/hcpcsevents/[0-61).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/labevents/[0-107727).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/omr/[0-2964).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/patients/[0-100).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/pharmacy/[0-15306).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/procedures_icd/[0-722).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/hosp/transfers/[0-1190).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/icustays/[0-140).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/chartevents/[0-668862).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/procedureevents/[0-1468).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/inputevents/[0-20404).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/convert_to_sharded_events/train/0/icu/outputevents/[0-9362).parquet\n", + "2024-12-14 23:32:32.866 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:32.866 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", + "2024-12-14 23:32:33.460 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.599240\n", + "2024-12-14 23:32:33.460 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/.0.parquet_cache/locks/2024-12-14T23:32:32.861170.json\n", + "2024-12-14 23:32:33.460 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:01.223172\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:35.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: extract_code_metadata\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:35.164\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-extract_code_metadata --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=extract_code_metadata\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:36.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 23:32:33.648\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: extract_code_metadata\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:33.665\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-extract_code_metadata --config-dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=extract_code_metadata\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:35.086\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:36.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 17:15:35.785 | INFO | MEDS_transforms.utils:stage_init:73 - Running extract_code_metadata with the following configuration:\n", + "\u001b[32m2024-12-14 23:32:35.086\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 23:32:34.356 | INFO | MEDS_transforms.utils:stage_init:73 - Running extract_code_metadata with the following configuration:\n", "input_dir: ${oc.env:MIMICIV_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:MIMICIV_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -2736,23 +2736,23 @@ "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", "\n", - "2024-12-14 17:15:35.802 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "2024-12-14 23:32:34.380 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", "is_metadata: true\n", "description_separator: '\n", "\n", " '\n", - "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", - "metadata_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/split_and_shard_subjects\n", - "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata\n", - "reducer_output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata\n", + "data_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", + "metadata_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/split_and_shard_subjects\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata\n", + "reducer_output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata\n", "train_only: true\n", "\n", "Paths: (checkbox indicates if it exists)\n", - " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", - " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata\n", - " - metadata_input_dir: ❌ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/split_and_shard_subjects\n", - "2024-12-14 17:15:35.803 | INFO | MEDS_transforms.extract.extract_code_metadata:main:359 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs/event_configs.yaml\n", - "2024-12-14 17:15:35.835 | INFO | MEDS_transforms.extract.extract_code_metadata:main:361 - Event conversion config:\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", + " - output_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata\n", + " - metadata_input_dir: ❌ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/split_and_shard_subjects\n", + "2024-12-14 23:32:34.381 | INFO | MEDS_transforms.extract.extract_code_metadata:main:359 - Reading event conversion config from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs/event_configs.yaml\n", + "2024-12-14 23:32:34.422 | INFO | MEDS_transforms.extract.extract_code_metadata:main:361 - Event conversion config:\n", "subject_id_col: subject_id\n", "hosp/admissions:\n", " ed_registration:\n", @@ -3075,107 +3075,107 @@ " valueuom: unitname\n", " parent_codes: '{omop_vocabulary_id}/{omop_concept_code}'\n", "\n", - "2024-12-14 17:15:35.887 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet\n", - "2024-12-14 17:15:35.887 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_icd_procedures.parquet\n", - "2024-12-14 17:15:35.889 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:35.888743. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:35.890 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet\n", - "2024-12-14 17:15:35.890 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/extract_code_metadata.py:184: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + "2024-12-14 23:32:34.503 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_hcpcs.csv\n", + "2024-12-14 23:32:34.503 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_hcpcs.csv and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_hcpcs.parquet\n", + "2024-12-14 23:32:34.504 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:34.504433. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:34.506 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_hcpcs.csv\n", + "2024-12-14 23:32:34.506 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_transforms/extract/extract_code_metadata.py:184: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", " if metadata_df.schema[mandatory_col] is not mandatory_type:\n", - "2024-12-14 17:15:35.898 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 17:15:35.899 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 17:15:35.899 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_icd_procedures.parquet\n", - "2024-12-14 17:15:35.946 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.057475\n", - "2024-12-14 17:15:35.946 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_icd_procedures.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_icd_procedures.parquet_cache/locks/2024-12-14T17:15:35.888743.json\n", - "2024-12-14 17:15:35.947 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/inputevents_to_rxnorm.csv\n", - "2024-12-14 17:15:35.947 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/inputevents_to_rxnorm.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/inputevents_to_rxnorm.parquet\n", - "2024-12-14 17:15:35.947 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:35.947489. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:35.947 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/inputevents_to_rxnorm.csv\n", - "2024-12-14 17:15:35.948 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:35.951 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 17:15:35.951 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 17:15:35.953 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 17:15:35.953 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 17:15:35.953 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/inputevents_to_rxnorm.parquet\n", - "2024-12-14 17:15:35.970 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.022885\n", - "2024-12-14 17:15:35.970 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.inputevents_to_rxnorm.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.inputevents_to_rxnorm.parquet_cache/locks/2024-12-14T17:15:35.947489.json\n", - "2024-12-14 17:15:35.971 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_datetimeevents.csv\n", - "2024-12-14 17:15:35.971 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_datetimeevents.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/proc_datetimeevents.parquet\n", - "2024-12-14 17:15:35.971 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:35.971762. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:35.972 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_datetimeevents.csv\n", - "2024-12-14 17:15:35.972 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:35.973 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 17:15:35.974 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 17:15:35.975 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 17:15:35.975 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 17:15:35.975 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/proc_datetimeevents.parquet\n", - "2024-12-14 17:15:35.982 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010768\n", - "2024-12-14 17:15:35.982 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.proc_datetimeevents.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.proc_datetimeevents.parquet_cache/locks/2024-12-14T17:15:35.971762.json\n", - "2024-12-14 17:15:35.983 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/raw_data/mimic-iv-demo/2.2/hosp/d_hcpcs.csv\n", - "2024-12-14 17:15:35.983 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_hcpcs.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_hcpcs.parquet\n", - "2024-12-14 17:15:35.983 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:35.983359. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:35.983 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_hcpcs.csv\n", - "2024-12-14 17:15:35.983 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:35.985 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 17:15:35.985 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_hcpcs.parquet\n", - "2024-12-14 17:15:36.015 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.032007\n", - "2024-12-14 17:15:36.015 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_hcpcs.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_hcpcs.parquet_cache/locks/2024-12-14T17:15:35.983359.json\n", - "2024-12-14 17:15:36.016 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet\n", - "2024-12-14 17:15:36.016 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_icd_diagnoses.parquet\n", - "2024-12-14 17:15:36.016 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:36.016392. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:36.016 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet\n", - "2024-12-14 17:15:36.016 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:36.019 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 17:15:36.019 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 17:15:36.019 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_icd_diagnoses.parquet\n", - "2024-12-14 17:15:36.052 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.035722\n", - "2024-12-14 17:15:36.052 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_icd_diagnoses.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_icd_diagnoses.parquet_cache/locks/2024-12-14T17:15:36.016392.json\n", - "2024-12-14 17:15:36.053 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/d_labitems_to_loinc.csv\n", - "2024-12-14 17:15:36.053 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/d_labitems_to_loinc.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/d_labitems_to_loinc.parquet\n", - "2024-12-14 17:15:36.053 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:36.053647. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:36.053 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/d_labitems_to_loinc.csv\n", - "2024-12-14 17:15:36.054 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:36.056 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 17:15:36.057 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 17:15:36.058 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 17:15:36.058 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 17:15:36.058 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/d_labitems_to_loinc.parquet\n", - "2024-12-14 17:15:36.077 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.023464\n", - "2024-12-14 17:15:36.077 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.d_labitems_to_loinc.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.d_labitems_to_loinc.parquet_cache/locks/2024-12-14T17:15:36.053647.json\n", - "2024-12-14 17:15:36.077 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/outputevents_to_loinc.csv\n", - "2024-12-14 17:15:36.077 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/outputevents_to_loinc.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/outputevents_to_loinc.parquet\n", - "2024-12-14 17:15:36.078 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:36.078096. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:36.078 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/outputevents_to_loinc.csv\n", - "2024-12-14 17:15:36.078 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:36.080 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 17:15:36.080 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 17:15:36.080 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/outputevents_to_loinc.parquet\n", - "2024-12-14 17:15:36.083 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.005380\n", - "2024-12-14 17:15:36.083 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.outputevents_to_loinc.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.outputevents_to_loinc.parquet_cache/locks/2024-12-14T17:15:36.078096.json\n", - "2024-12-14 17:15:36.084 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_itemid.csv\n", - "2024-12-14 17:15:36.084 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_itemid.csv and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/proc_itemid.parquet\n", - "2024-12-14 17:15:36.084 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:36.084421. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:36.084 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_itemid.csv\n", - "2024-12-14 17:15:36.084 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:36.086 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 17:15:36.086 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 17:15:36.088 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", - "2024-12-14 17:15:36.088 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", - "2024-12-14 17:15:36.088 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/proc_itemid.parquet\n", - "2024-12-14 17:15:36.096 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.011606\n", - "2024-12-14 17:15:36.096 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.proc_itemid.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.proc_itemid.parquet_cache/locks/2024-12-14T17:15:36.084421.json\n", - "2024-12-14 17:15:36.096 | INFO | MEDS_transforms.extract.extract_code_metadata:main:398 - Extracted metadata for all events. Merging.\n", - "2024-12-14 17:15:36.096 | INFO | MEDS_transforms.extract.extract_code_metadata:main:404 - Starting reduction process\n", - "2024-12-14 17:15:36.096 | INFO | MEDS_transforms.extract.extract_code_metadata:main:412 - All map shards complete! Starting code metadata reduction computation.\n", - "2024-12-14 17:15:36.113 | INFO | MEDS_transforms.extract.extract_code_metadata:main:424 - Collected metadata for 2661 unique codes among 42898 total observations.\n", - "2024-12-14 17:15:36.151 | INFO | MEDS_transforms.extract.extract_code_metadata:main:449 - Finished reduction in 0:00:00.054648\n", + "2024-12-14 23:32:34.508 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 23:32:34.509 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_hcpcs.parquet\n", + "2024-12-14 23:32:34.587 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.083322\n", + "2024-12-14 23:32:34.587 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_hcpcs.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_hcpcs.parquet_cache/locks/2024-12-14T23:32:34.504433.json\n", + "2024-12-14 23:32:34.589 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/inputevents_to_rxnorm.csv\n", + "2024-12-14 23:32:34.589 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/inputevents_to_rxnorm.csv and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/inputevents_to_rxnorm.parquet\n", + "2024-12-14 23:32:34.589 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:34.589619. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:34.590 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/inputevents_to_rxnorm.csv\n", + "2024-12-14 23:32:34.590 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:34.592 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 23:32:34.592 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 23:32:34.594 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 23:32:34.594 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 23:32:34.594 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/inputevents_to_rxnorm.parquet\n", + "2024-12-14 23:32:34.614 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.025106\n", + "2024-12-14 23:32:34.614 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.inputevents_to_rxnorm.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.inputevents_to_rxnorm.parquet_cache/locks/2024-12-14T23:32:34.589619.json\n", + "2024-12-14 23:32:34.617 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_itemid.csv\n", + "2024-12-14 23:32:34.618 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_itemid.csv and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/proc_itemid.parquet\n", + "2024-12-14 23:32:34.618 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:34.618600. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:34.619 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_itemid.csv\n", + "2024-12-14 23:32:34.619 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:34.621 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 23:32:34.621 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 23:32:34.622 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 23:32:34.623 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 23:32:34.623 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/proc_itemid.parquet\n", + "2024-12-14 23:32:34.633 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.014555\n", + "2024-12-14 23:32:34.633 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.proc_itemid.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.proc_itemid.parquet_cache/locks/2024-12-14T23:32:34.618600.json\n", + "2024-12-14 23:32:34.635 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/d_labitems_to_loinc.csv\n", + "2024-12-14 23:32:34.635 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/d_labitems_to_loinc.csv and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/d_labitems_to_loinc.parquet\n", + "2024-12-14 23:32:34.635 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:34.635669. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:34.636 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/d_labitems_to_loinc.csv\n", + "2024-12-14 23:32:34.636 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:34.639 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 23:32:34.639 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 23:32:34.641 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 23:32:34.641 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 23:32:34.641 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/d_labitems_to_loinc.parquet\n", + "2024-12-14 23:32:34.665 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.029369\n", + "2024-12-14 23:32:34.665 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.d_labitems_to_loinc.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.d_labitems_to_loinc.parquet_cache/locks/2024-12-14T23:32:34.635669.json\n", + "2024-12-14 23:32:34.666 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/outputevents_to_loinc.csv\n", + "2024-12-14 23:32:34.666 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/outputevents_to_loinc.csv and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/outputevents_to_loinc.parquet\n", + "2024-12-14 23:32:34.667 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:34.666800. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:34.667 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/outputevents_to_loinc.csv\n", + "2024-12-14 23:32:34.667 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:34.669 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 23:32:34.669 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 23:32:34.669 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/outputevents_to_loinc.parquet\n", + "2024-12-14 23:32:34.675 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008507\n", + "2024-12-14 23:32:34.675 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.outputevents_to_loinc.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.outputevents_to_loinc.parquet_cache/locks/2024-12-14T23:32:34.666800.json\n", + "2024-12-14 23:32:34.676 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet\n", + "2024-12-14 23:32:34.676 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_icd_procedures.parquet\n", + "2024-12-14 23:32:34.677 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:34.677175. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:34.677 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_procedures.parquet\n", + "2024-12-14 23:32:34.677 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:34.681 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 23:32:34.681 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 23:32:34.681 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_icd_procedures.parquet\n", + "2024-12-14 23:32:34.736 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.059329\n", + "2024-12-14 23:32:34.736 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_icd_procedures.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_icd_procedures.parquet_cache/locks/2024-12-14T23:32:34.677175.json\n", + "2024-12-14 23:32:34.739 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_datetimeevents.csv\n", + "2024-12-14 23:32:34.739 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_datetimeevents.csv and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/proc_datetimeevents.parquet\n", + "2024-12-14 23:32:34.740 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:34.740011. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:34.740 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/proc_datetimeevents.csv\n", + "2024-12-14 23:32:34.741 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:34.744 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 23:32:34.744 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 23:32:34.746 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 23:32:34.746 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 23:32:34.747 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/proc_datetimeevents.parquet\n", + "2024-12-14 23:32:34.753 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.013442\n", + "2024-12-14 23:32:34.753 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.proc_datetimeevents.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/.proc_datetimeevents.parquet_cache/locks/2024-12-14T23:32:34.740011.json\n", + "2024-12-14 23:32:34.754 | DEBUG | MEDS_transforms.extract.utils:get_supported_fp:126 - Found file: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet\n", + "2024-12-14 23:32:34.754 | INFO | MEDS_transforms.extract.extract_code_metadata:main:391 - Extracting metadata from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_icd_diagnoses.parquet\n", + "2024-12-14 23:32:34.755 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:34.754918. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:34.755 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/pre_meds/hosp/d_icd_diagnoses.parquet\n", + "2024-12-14 23:32:34.755 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:34.757 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'description' must be of type String. Casting.\n", + "2024-12-14 23:32:34.757 | WARNING | MEDS_transforms.extract.extract_code_metadata:extract_metadata:185 - Metadata column 'parent_codes' must be of type List(String). Casting.\n", + "2024-12-14 23:32:34.757 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/d_icd_diagnoses.parquet\n", + "2024-12-14 23:32:34.835 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.080673\n", + "2024-12-14 23:32:34.835 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_icd_diagnoses.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/hosp/.d_icd_diagnoses.parquet_cache/locks/2024-12-14T23:32:34.754918.json\n", + "2024-12-14 23:32:34.836 | INFO | MEDS_transforms.extract.extract_code_metadata:main:398 - Extracted metadata for all events. Merging.\n", + "2024-12-14 23:32:34.836 | INFO | MEDS_transforms.extract.extract_code_metadata:main:404 - Starting reduction process\n", + "2024-12-14 23:32:34.836 | INFO | MEDS_transforms.extract.extract_code_metadata:main:412 - All map shards complete! Starting code metadata reduction computation.\n", + "2024-12-14 23:32:34.868 | INFO | MEDS_transforms.extract.extract_code_metadata:main:424 - Collected metadata for 2661 unique codes among 42898 total observations.\n", + "2024-12-14 23:32:34.952 | INFO | MEDS_transforms.extract.extract_code_metadata:main:449 - Finished reduction in 0:00:00.116083\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:36.201\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: finalize_MEDS_metadata\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:36.249\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-finalize_MEDS_metadata --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=finalize_MEDS_metadata\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:37.150\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 23:32:35.090\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: finalize_MEDS_metadata\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:35.140\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-finalize_MEDS_metadata --config-dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=finalize_MEDS_metadata\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:36.145\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:37.151\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 17:15:36.869 | INFO | MEDS_transforms.utils:stage_init:73 - Running finalize_MEDS_metadata with the following configuration:\n", + "\u001b[32m2024-12-14 23:32:36.145\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 23:32:35.869 | INFO | MEDS_transforms.utils:stage_init:73 - Running finalize_MEDS_metadata with the following configuration:\n", "input_dir: ${oc.env:MIMICIV_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:MIMICIV_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -3244,36 +3244,36 @@ "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", "\n", - "2024-12-14 17:15:36.888 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "2024-12-14 23:32:35.895 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", "is_metadata: true\n", "do_retype: true\n", - "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", - "metadata_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata\n", - "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/finalize_MEDS_metadata\n", - "reducer_output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata\n", + "data_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", + "metadata_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/finalize_MEDS_metadata\n", + "reducer_output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata\n", "train_only: true\n", "\n", "Paths: (checkbox indicates if it exists)\n", - " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", - " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/finalize_MEDS_metadata\n", - " - metadata_input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata\n", - "2024-12-14 17:15:36.892 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:170 - Validating code metadata\n", - "2024-12-14 17:15:36.892 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:173 - Reading code metadata from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/codes.parquet\n", - "2024-12-14 17:15:37.079 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:183 - Writing finalized metadata df to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata/codes.parquet\n", - "2024-12-14 17:15:37.091 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:187 - Creating dataset metadata\n", - "2024-12-14 17:15:37.093 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:199 - Writing finalized dataset metadata to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata/dataset.json\n", - "2024-12-14 17:15:37.094 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:204 - Creating subject splits from {str(shards_map_fp.resolve())}\n", - "2024-12-14 17:15:37.094 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split train has 50 subjects\n", - "2024-12-14 17:15:37.094 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split tuning has 25 subjects\n", - "2024-12-14 17:15:37.094 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split held_out has 25 subjects\n", - "2024-12-14 17:15:37.096 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:222 - Writing finalized subject splits to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata/subject_splits.parquet\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", + " - output_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/finalize_MEDS_metadata\n", + " - metadata_input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata\n", + "2024-12-14 23:32:35.901 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:170 - Validating code metadata\n", + "2024-12-14 23:32:35.901 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:173 - Reading code metadata from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/extract_code_metadata/codes.parquet\n", + "2024-12-14 23:32:36.023 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:183 - Writing finalized metadata df to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata/codes.parquet\n", + "2024-12-14 23:32:36.046 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:187 - Creating dataset metadata\n", + "2024-12-14 23:32:36.049 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:199 - Writing finalized dataset metadata to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata/dataset.json\n", + "2024-12-14 23:32:36.050 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:204 - Creating subject splits from {str(shards_map_fp.resolve())}\n", + "2024-12-14 23:32:36.050 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split train has 50 subjects\n", + "2024-12-14 23:32:36.051 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split tuning has 25 subjects\n", + "2024-12-14 23:32:36.051 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split held_out has 25 subjects\n", + "2024-12-14 23:32:36.051 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:222 - Writing finalized subject splits to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata/subject_splits.parquet\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:37.152\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: finalize_MEDS_data\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:37.159\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-finalize_MEDS_data --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=finalize_MEDS_data\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:38.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 23:32:36.147\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: finalize_MEDS_data\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:36.164\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-finalize_MEDS_data --config-dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/MIMIC-IV_Example/configs --config-name=extract_MIMIC 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=finalize_MEDS_data\u001b[0m\n", + "\u001b[32m2024-12-14 23:32:37.697\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:15:38.199\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 17:15:37.714 | INFO | MEDS_transforms.utils:stage_init:73 - Running finalize_MEDS_data with the following configuration:\n", + "\u001b[32m2024-12-14 23:32:37.697\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 23:32:36.831 | INFO | MEDS_transforms.utils:stage_init:73 - Running finalize_MEDS_data with the following configuration:\n", "input_dir: ${oc.env:MIMICIV_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:MIMICIV_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -3342,41 +3342,41 @@ "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", "\n", - "2024-12-14 17:15:37.732 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "2024-12-14 23:32:36.858 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", "do_retype: true\n", "is_metadata: false\n", - "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", - "metadata_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata\n", - "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", + "data_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", + "metadata_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", "reducer_output_dir: null\n", "\n", "Paths: (checkbox indicates if it exists)\n", - " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", - " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", - " - metadata_input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata\n", - "2024-12-14 17:15:37.749 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator:600 - Mapping computation over a maximum of 3 shards\n", - "2024-12-14 17:15:37.758 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\n", - "2024-12-14 17:15:37.759 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:37.759430. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:37.760 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", - "2024-12-14 17:15:37.761 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:37.862 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\n", - "2024-12-14 17:15:37.899 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.140464\n", - "2024-12-14 17:15:37.900 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/.0.parquet_cache/locks/2024-12-14T17:15:37.759430.json\n", - "2024-12-14 17:15:37.900 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\n", - "2024-12-14 17:15:37.900 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:37.900755. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:37.901 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", - "2024-12-14 17:15:37.901 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:37.915 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\n", - "2024-12-14 17:15:37.979 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.078518\n", - "2024-12-14 17:15:37.979 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/.0.parquet_cache/locks/2024-12-14T17:15:37.900755.json\n", - "2024-12-14 17:15:37.979 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\n", - "2024-12-14 17:15:37.980 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:15:37.980261. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:15:37.980 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", - "2024-12-14 17:15:37.980 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:15:38.009 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\n", - "2024-12-14 17:15:38.139 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.159248\n", - "2024-12-14 17:15:38.139 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/.0.parquet_cache/locks/2024-12-14T17:15:37.980261.json\n", - "2024-12-14 17:15:38.139 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:00.407567\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort\n", + " - output_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", + " - metadata_input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata\n", + "2024-12-14 23:32:36.885 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator:600 - Mapping computation over a maximum of 3 shards\n", + "2024-12-14 23:32:36.898 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet into /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\n", + "2024-12-14 23:32:36.899 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:36.899476. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:36.901 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", + "2024-12-14 23:32:36.901 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:36.994 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\n", + "2024-12-14 23:32:37.052 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.152918\n", + "2024-12-14 23:32:37.052 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/.0.parquet_cache/locks/2024-12-14T23:32:36.899476.json\n", + "2024-12-14 23:32:37.053 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet into /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\n", + "2024-12-14 23:32:37.053 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:37.053698. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:37.054 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", + "2024-12-14 23:32:37.054 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:37.091 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\n", + "2024-12-14 23:32:37.241 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.187855\n", + "2024-12-14 23:32:37.241 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/.0.parquet_cache/locks/2024-12-14T23:32:37.053698.json\n", + "2024-12-14 23:32:37.242 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/0.parquet into /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\n", + "2024-12-14 23:32:37.243 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:32:37.242937. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:32:37.243 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", + "2024-12-14 23:32:37.243 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:32:37.297 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\n", + "2024-12-14 23:32:37.562 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.319642\n", + "2024-12-14 23:32:37.562 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/.0.parquet_cache/locks/2024-12-14T23:32:37.242937.json\n", + "2024-12-14 23:32:37.564 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:00.705197\n", "\u001b[0m\n" ] } @@ -3396,7 +3396,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -3439,7 +3439,7 @@ "└────────────┴─────────────────────┴───────────────────┴───────────────┘" ] }, - "execution_count": 43, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -3455,7 +3455,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -3469,7 +3469,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/var/folders/fm/gwbmjrfx6gxg6x7xxcxbnrth0000gn/T/ipykernel_61345/1492712476.py:5: DeprecationWarning: `GroupBy.count` is deprecated. It has been renamed to `len`.\n", + "/tmp/ipykernel_945876/1492712476.py:5: DeprecationWarning: `GroupBy.count` is deprecated. It has been renamed to `len`.\n", " icd10_events.group_by('code').count().sort('count', descending=True)\n" ] }, @@ -3483,7 +3483,7 @@ " white-space: pre-wrap;\n", "}\n", "\n", - "shape: (734, 2)
codecount
stru32
"DIAGNOSIS//ICD//10//E785"57
"DIAGNOSIS//ICD//10//E039"47
"DIAGNOSIS//ICD//10//Z794"37
"DIAGNOSIS//ICD//10//Z87891"35
"DIAGNOSIS//ICD//10//I2510"33
"DIAGNOSIS//ICD//10//Z792"1
"DIAGNOSIS//ICD//10//E041"1
"DIAGNOSIS//ICD//10//Z4502"1
"DIAGNOSIS//ICD//10//I2699"1
"DIAGNOSIS//ICD//10//J942"1
" + "shape: (734, 2)
codecount
stru32
"DIAGNOSIS//ICD//10//E785"57
"DIAGNOSIS//ICD//10//E039"47
"DIAGNOSIS//ICD//10//Z794"37
"DIAGNOSIS//ICD//10//Z87891"35
"DIAGNOSIS//ICD//10//I2510"33
"DIAGNOSIS//ICD//10//L723"1
"DIAGNOSIS//ICD//10//Y828"1
"DIAGNOSIS//ICD//10//F1011"1
"DIAGNOSIS//ICD//10//R1013"1
"DIAGNOSIS//ICD//10//L97429"1
" ], "text/plain": [ "shape: (734, 2)\n", @@ -3498,15 +3498,15 @@ "│ DIAGNOSIS//ICD//10//Z87891 ┆ 35 │\n", "│ DIAGNOSIS//ICD//10//I2510 ┆ 33 │\n", "│ … ┆ … │\n", - "│ DIAGNOSIS//ICD//10//Z792 ┆ 1 │\n", - "│ DIAGNOSIS//ICD//10//E041 ┆ 1 │\n", - "│ DIAGNOSIS//ICD//10//Z4502 ┆ 1 │\n", - "│ DIAGNOSIS//ICD//10//I2699 ┆ 1 │\n", - "│ DIAGNOSIS//ICD//10//J942 ┆ 1 │\n", + "│ DIAGNOSIS//ICD//10//L723 ┆ 1 │\n", + "│ DIAGNOSIS//ICD//10//Y828 ┆ 1 │\n", + "│ DIAGNOSIS//ICD//10//F1011 ┆ 1 │\n", + "│ DIAGNOSIS//ICD//10//R1013 ┆ 1 │\n", + "│ DIAGNOSIS//ICD//10//L97429 ┆ 1 │\n", "└────────────────────────────┴───────┘" ] }, - "execution_count": 44, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -3521,7 +3521,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 10, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -3541,45 +3541,43 @@ " white-space: pre-wrap;\n", "}\n", "\n", - "shape: (2_661, 6)
codedescriptionparent_codesitemidpossibly_cpt_codevalueuom
strstrlist[str]list[str]list[str]list[str]
"DIAGNOSIS//ICD//10//E873""Alkalosis"["ICD10CM/E87.3"][null][null][null]
"DIAGNOSIS//ICD//10//Z85810""Personal history of malignant …["ICD10CM/Z85.810"][null][null][null]
"LAB//50883//mg/dL""Bilirubin.direct [Mass/volume]…["LOINC/1968-7"]["50883"][null]["mg/dL"]
"LAB//51000//mg/dL""Triglyceride [Mass/volume] in …["LOINC/2571-8"]["51000"][null]["mg/dL"]
"INFUSION_END//227531""mannitol Injection"["RxNorm/1791383"]["227531"][null][null]
"LAB//51501//#/hpf""Transitional cells [#/area] in…["LOINC/30089-7"]["51501"][null]["#/hpf"]
"DIAGNOSIS//ICD//9//V600""Lack of housing"["ICD9CM/V60.0"][null][null][null]
"PROCEDURE//START//225400""Bronchoscopy"["SNOMED/10847001"]["225400"][null][null]
"LAB//50991//ng/mL""Thyroglobulin [Mass/volume] in…["LOINC/3013-0"]["50991"][null]["ng/mL"]
"LAB//51296//N/A""Dacrocytes [Presence] in Blood…["LOINC/7791-7"]["51296"][null]["N/A"]
" + "shape: (2_661, 6)
codedescriptionparent_codespossibly_cpt_codeitemidvalueuom
strstrlist[str]list[str]list[str]list[str]
"PROCEDURE//ICD//10//0BB88ZX""Excision of Left Upper Lobe Br…["ICD10PCS/0BB88ZX"][null][null][null]
"INFUSION_START//225152""heparin Injection"["RxNorm/1857598"][null]["225152"][null]
"DIAGNOSIS//ICD//10//I472""Ventricular tachycardia"["ICD10CM/I47.2"][null][null][null]
"DIAGNOSIS//ICD//9//27542""Hypercalcemia"["ICD9CM/275.42"][null][null][null]
"PROCEDURE//ICD//10//3E04305""Introduction of Other Antineop…["ICD10PCS/3E04305"][null][null][null]
"PROCEDURE//START//225479""Thoracentesis"["SNOMED/91602002"][null]["225479"][null]
"INFUSION_END//225865""doxycycline Injection"["RxNorm/1652652"][null]["225865"][null]
"LAB//50950//mg/dL""IgG [Mass/volume] in Serum or …["LOINC/2465-3"][null]["50950"]["mg/dL"]
"LAB//50954//IU/L""Lactate dehydrogenase [Enzymat…["LOINC/2532-0"][null]["50954"]["IU/L"]
"DIAGNOSIS//ICD//10//E039""Hypothyroidism, unspecified"["ICD10CM/E03.9"][null][null][null]
" ], "text/plain": [ "shape: (2_661, 6)\n", - "┌──────────────────┬──────────────────┬─────────────────┬────────────┬─────────────────┬───────────┐\n", - "│ code ┆ description ┆ parent_codes ┆ itemid ┆ possibly_cpt_co ┆ valueuom │\n", - "│ --- ┆ --- ┆ --- ┆ --- ┆ de ┆ --- │\n", - "│ str ┆ str ┆ list[str] ┆ list[str] ┆ --- ┆ list[str] │\n", - "│ ┆ ┆ ┆ ┆ list[str] ┆ │\n", - "╞══════════════════╪══════════════════╪═════════════════╪════════════╪═════════════════╪═══════════╡\n", - "│ DIAGNOSIS//ICD// ┆ Alkalosis ┆ [\"ICD10CM/E87.3 ┆ [null] ┆ [null] ┆ [null] │\n", - "│ 10//E873 ┆ ┆ \"] ┆ ┆ ┆ │\n", - "│ DIAGNOSIS//ICD// ┆ Personal history ┆ [\"ICD10CM/Z85.8 ┆ [null] ┆ [null] ┆ [null] │\n", - "│ 10//Z85810 ┆ of malignant … ┆ 10\"] ┆ ┆ ┆ │\n", - "│ LAB//50883//mg/d ┆ Bilirubin.direct ┆ [\"LOINC/1968-7\" ┆ [\"50883\"] ┆ [null] ┆ [\"mg/dL\"] │\n", - "│ L ┆ [Mass/volume]… ┆ ] ┆ ┆ ┆ │\n", - "│ LAB//51000//mg/d ┆ Triglyceride ┆ [\"LOINC/2571-8\" ┆ [\"51000\"] ┆ [null] ┆ [\"mg/dL\"] │\n", - "│ L ┆ [Mass/volume] in ┆ ] ┆ ┆ ┆ │\n", - "│ ┆ … ┆ ┆ ┆ ┆ │\n", - "│ INFUSION_END//22 ┆ mannitol ┆ [\"RxNorm/179138 ┆ [\"227531\"] ┆ [null] ┆ [null] │\n", - "│ 7531 ┆ Injection ┆ 3\"] ┆ ┆ ┆ │\n", - "│ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", - "│ LAB//51501//#/hp ┆ Transitional ┆ [\"LOINC/30089-7 ┆ [\"51501\"] ┆ [null] ┆ [\"#/hpf\"] │\n", - "│ f ┆ cells [#/area] ┆ \"] ┆ ┆ ┆ │\n", - "│ ┆ in… ┆ ┆ ┆ ┆ │\n", - "│ DIAGNOSIS//ICD// ┆ Lack of housing ┆ [\"ICD9CM/V60.0\" ┆ [null] ┆ [null] ┆ [null] │\n", - "│ 9//V600 ┆ ┆ ] ┆ ┆ ┆ │\n", - "│ PROCEDURE//START ┆ Bronchoscopy ┆ [\"SNOMED/108470 ┆ [\"225400\"] ┆ [null] ┆ [null] │\n", - "│ //225400 ┆ ┆ 01\"] ┆ ┆ ┆ │\n", - "│ LAB//50991//ng/m ┆ Thyroglobulin ┆ [\"LOINC/3013-0\" ┆ [\"50991\"] ┆ [null] ┆ [\"ng/mL\"] │\n", - "│ L ┆ [Mass/volume] ┆ ] ┆ ┆ ┆ │\n", - "│ ┆ in… ┆ ┆ ┆ ┆ │\n", - "│ LAB//51296//N/A ┆ Dacrocytes ┆ [\"LOINC/7791-7\" ┆ [\"51296\"] ┆ [null] ┆ [\"N/A\"] │\n", - "│ ┆ [Presence] in ┆ ] ┆ ┆ ┆ │\n", - "│ ┆ Blood… ┆ ┆ ┆ ┆ │\n", - "└──────────────────┴──────────────────┴─────────────────┴────────────┴─────────────────┴───────────┘" + "┌──────────────────┬──────────────────┬─────────────────┬─────────────────┬────────────┬───────────┐\n", + "│ code ┆ description ┆ parent_codes ┆ possibly_cpt_co ┆ itemid ┆ valueuom │\n", + "│ --- ┆ --- ┆ --- ┆ de ┆ --- ┆ --- │\n", + "│ str ┆ str ┆ list[str] ┆ --- ┆ list[str] ┆ list[str] │\n", + "│ ┆ ┆ ┆ list[str] ┆ ┆ │\n", + "╞══════════════════╪══════════════════╪═════════════════╪═════════════════╪════════════╪═══════════╡\n", + "│ PROCEDURE//ICD// ┆ Excision of Left ┆ [\"ICD10PCS/0BB8 ┆ [null] ┆ [null] ┆ [null] │\n", + "│ 10//0BB88ZX ┆ Upper Lobe Br… ┆ 8ZX\"] ┆ ┆ ┆ │\n", + "│ INFUSION_START// ┆ heparin ┆ [\"RxNorm/185759 ┆ [null] ┆ [\"225152\"] ┆ [null] │\n", + "│ 225152 ┆ Injection ┆ 8\"] ┆ ┆ ┆ │\n", + "│ DIAGNOSIS//ICD// ┆ Ventricular ┆ [\"ICD10CM/I47.2 ┆ [null] ┆ [null] ┆ [null] │\n", + "│ 10//I472 ┆ tachycardia ┆ \"] ┆ ┆ ┆ │\n", + "│ DIAGNOSIS//ICD// ┆ Hypercalcemia ┆ [\"ICD9CM/275.42 ┆ [null] ┆ [null] ┆ [null] │\n", + "│ 9//27542 ┆ ┆ \"] ┆ ┆ ┆ │\n", + "│ PROCEDURE//ICD// ┆ Introduction of ┆ [\"ICD10PCS/3E04 ┆ [null] ┆ [null] ┆ [null] │\n", + "│ 10//3E04305 ┆ Other Antineop… ┆ 305\"] ┆ ┆ ┆ │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ PROCEDURE//START ┆ Thoracentesis ┆ [\"SNOMED/916020 ┆ [null] ┆ [\"225479\"] ┆ [null] │\n", + "│ //225479 ┆ ┆ 02\"] ┆ ┆ ┆ │\n", + "│ INFUSION_END//22 ┆ doxycycline ┆ [\"RxNorm/165265 ┆ [null] ┆ [\"225865\"] ┆ [null] │\n", + "│ 5865 ┆ Injection ┆ 2\"] ┆ ┆ ┆ │\n", + "│ LAB//50950//mg/d ┆ IgG ┆ [\"LOINC/2465-3\" ┆ [null] ┆ [\"50950\"] ┆ [\"mg/dL\"] │\n", + "│ L ┆ [Mass/volume] in ┆ ] ┆ ┆ ┆ │\n", + "│ ┆ Serum or … ┆ ┆ ┆ ┆ │\n", + "│ LAB//50954//IU/L ┆ Lactate ┆ [\"LOINC/2532-0\" ┆ [null] ┆ [\"50954\"] ┆ [\"IU/L\"] │\n", + "│ ┆ dehydrogenase ┆ ] ┆ ┆ ┆ │\n", + "│ ┆ [Enzymat… ┆ ┆ ┆ ┆ │\n", + "│ DIAGNOSIS//ICD// ┆ Hypothyroidism, ┆ [\"ICD10CM/E03.9 ┆ [null] ┆ [null] ┆ [null] │\n", + "│ 10//E039 ┆ unspecified ┆ \"] ┆ ┆ ┆ │\n", + "└──────────────────┴──────────────────┴─────────────────┴─────────────────┴────────────┴───────────┘" ] }, - "execution_count": 45, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -3591,14 +3589,34 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 11, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/pty.py:95: RuntimeWarning: Using fork() can cause Polars to deadlock in the child process.\n", + "In addition, using fork() with Python in general is a recipe for mysterious\n", + "deadlocks and crashes.\n", + "\n", + "The most likely reason you are seeing this error is because you are using the\n", + "multiprocessing module on Linux, which uses fork() by default. This will be\n", + "fixed in Python 3.14. Until then, you want to use the \"spawn\" context instead.\n", + "\n", + "See https://docs.pola.rs/user-guide/misc/multiprocessing/ for details.\n", + "\n", + "If you really know what your doing, you can silence this warning with the warning module\n", + "or by setting POLARS_ALLOW_FORKING_THREAD=1.\n", + "\n", + " pid, fd = os.forkpty()\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/\n" + "/storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/\n" ] } ], @@ -3617,19 +3635,19 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Cloning into '/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo//tmp'...\n", + "Cloning into '/storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo//tmp'...\n", "remote: Enumerating objects: 144, done.\u001b[K\n", "remote: Counting objects: 100% (144/144), done.\u001b[K\n", "remote: Compressing objects: 100% (129/129), done.\u001b[K\n", "remote: Total 144 (delta 22), reused 70 (delta 7), pack-reused 0 (from 0)\u001b[K\n", - "Receiving objects: 100% (144/144), 211.41 KiB | 478.00 KiB/s, done.\n", + "Receiving objects: 100% (144/144), 211.41 KiB | 7.29 MiB/s, done.\n", "Resolving deltas: 100% (22/22), done.\n" ] } @@ -3647,7 +3665,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -3656,14 +3674,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo//eICU_Example\n", + "/storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo//eICU_Example\n", "Note that eICU has a lot more observations per subject than does MIMIC-IV, so to keep to a reasonable \n", "memory burden (e.g., < 150GB per worker), you will want a smaller shard size, as well as to turn off \n", "the final unique check (which should not be necessary given the structure of eICU and is expensive) \n", @@ -3674,8 +3692,8 @@ "Additionally, consider reducing N_PARALLEL_WORKERS if > 1\n", "Skipping unzipping.\n", "Running pre-MEDS conversion.\n", - "\u001b[32m2024-12-14 17:20:07.072\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m278\u001b[0m - \u001b[1mLoading table preprocessors from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs/table_preprocessors.yaml...\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.090\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for admissionDx:\n", + "\u001b[32m2024-12-14 23:38:40.901\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m278\u001b[0m - \u001b[1mLoading table preprocessors from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs/table_preprocessors.yaml...\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:40.925\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for admissionDx:\n", "offset_col: admitdxenteredoffset\n", "pseudotime_col: admitDxEnteredTimestamp\n", "output_data_cols:\n", @@ -3685,7 +3703,7 @@ "- How should we use `admitdxtest`?\n", "- How should we use `admitdxpath`?\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.091\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for allergy:\n", + "\u001b[32m2024-12-14 23:38:40.926\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for allergy:\n", "offset_col: allergyenteredoffset\n", "pseudotime_col: allergyEnteredTimestamp\n", "output_data_cols:\n", @@ -3698,21 +3716,21 @@ "- Is `drugName` the name of the drug to which the patient is allergic or the drug\n", " given to the patient (docs say 'name of the selected admission drug')?\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.091\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for carePlanGeneral:\n", + "\u001b[32m2024-12-14 23:38:40.926\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for carePlanGeneral:\n", "offset_col: cplitemoffset\n", "pseudotime_col: carePlanGeneralItemEnteredTimestamp\n", "output_data_cols:\n", "- cplgroup\n", "- cplitemvalue\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.091\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for carePlanEOL:\n", + "\u001b[32m2024-12-14 23:38:40.927\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for carePlanEOL:\n", "offset_col: cpleoldiscussionoffset\n", "pseudotime_col: carePlanEolDiscussionOccurredTimestamp\n", "warning_items:\n", "- Is the DiscussionOffset time actually reliable? Should we fall back on the SaveOffset\n", " time?\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.092\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for carePlanGoal:\n", + "\u001b[32m2024-12-14 23:38:40.927\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for carePlanGoal:\n", "offset_col: cplgoaloffset\n", "pseudotime_col: carePlanGoalEnteredTimestamp\n", "output_data_cols:\n", @@ -3720,7 +3738,7 @@ "- cplgoalvalue\n", "- cplgoalstatus\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.092\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for carePlanInfectiousDisease:\n", + "\u001b[32m2024-12-14 23:38:40.928\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for carePlanInfectiousDisease:\n", "offset_col: cplinfectdiseaseoffset\n", "pseudotime_col: carePlanInfectDiseaseEnteredTimestamp\n", "output_data_cols:\n", @@ -3729,7 +3747,7 @@ "- responsetotherapy\n", "- treatment\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.092\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for diagnosis:\n", + "\u001b[32m2024-12-14 23:38:40.928\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for diagnosis:\n", "offset_col: diagnosisoffset\n", "pseudotime_col: diagnosisEnteredTimestamp\n", "output_data_cols:\n", @@ -3740,7 +3758,7 @@ "- Though we use it, the `diagnosisString` field documentation is unclear -- by what\n", " is it separated?\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for infusionDrug:\n", + "\u001b[32m2024-12-14 23:38:40.929\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for infusionDrug:\n", "offset_col: infusionoffset\n", "pseudotime_col: infusionEnteredTimestamp\n", "output_data_cols:\n", @@ -3752,7 +3770,7 @@ "- volumeoffluid\n", "- patientweight\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for lab:\n", + "\u001b[32m2024-12-14 23:38:40.929\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for lab:\n", "offset_col: labresultoffset\n", "pseudotime_col: labResultDrawnTimestamp\n", "output_data_cols:\n", @@ -3766,7 +3784,7 @@ "- Is this the time the lab was drawn? Entered? The time the result came in?\n", "- We **IGNORE** the `labResultRevisedOffset` column -- this may be a mistake!\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for medication:\n", + "\u001b[32m2024-12-14 23:38:40.930\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for medication:\n", "offset_col:\n", "- drugorderoffset\n", "- drugstartoffset\n", @@ -3789,7 +3807,7 @@ "warning_items:\n", "- We **IGNORE** the `drugOrderCancelled` column -- this may be a mistake!\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.094\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for nurseAssessment:\n", + "\u001b[32m2024-12-14 23:38:40.931\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for nurseAssessment:\n", "offset_col:\n", "- nurseassessoffset\n", "- nurseassessentryoffset\n", @@ -3805,7 +3823,7 @@ "- Should we be using `cellAttributePath` instead of `cellAttribute`?\n", "- SOME MAY BE LISTS\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.094\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for nurseCare:\n", + "\u001b[32m2024-12-14 23:38:40.932\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for nurseCare:\n", "offset_col:\n", "- nursecareoffset\n", "- nursecareentryoffset\n", @@ -3821,7 +3839,7 @@ "- Should we be using `cellAttributePath` instead of `cellAttribute`?\n", "- SOME MAY BE LISTS\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.095\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for nurseCharting:\n", + "\u001b[32m2024-12-14 23:38:40.932\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for nurseCharting:\n", "offset_col:\n", "- nursingchartoffset\n", "- nursingchartentryoffset\n", @@ -3837,7 +3855,7 @@ "warning_items:\n", "- SOME MAY BE LISTS\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.095\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for pastHistory:\n", + "\u001b[32m2024-12-14 23:38:40.933\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for pastHistory:\n", "offset_col:\n", "- pasthistoryoffset\n", "- pasthistoryenteredoffset\n", @@ -3855,7 +3873,7 @@ "- How should we use `pastHistoryPath` vs. `pastHistoryNoteType`?\n", "- How should we use `pastHistoryValue` vs. `pastHistoryValueText`?\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.096\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for physicalExam:\n", + "\u001b[32m2024-12-14 23:38:40.934\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for physicalExam:\n", "offset_col: physicalexamoffset\n", "pseudotime_col: physicalExamEnteredTimestamp\n", "output_data_cols:\n", @@ -3867,7 +3885,7 @@ "- How should we use `physicalExamValue` vs. `physicalExamText`?\n", "- I believe the `physicalExamValue` is a **LIST**. This must be processed specially.\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for respiratoryCare:\n", + "\u001b[32m2024-12-14 23:38:40.935\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for respiratoryCare:\n", "offset_col:\n", "- respcarestatusoffset\n", "- ventstartoffset\n", @@ -3910,7 +3928,7 @@ "- We might be able to use `priorVent` timestamps to further refine true season of\n", " unit admission.\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for respiratoryCharting:\n", + "\u001b[32m2024-12-14 23:38:40.935\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for respiratoryCharting:\n", "offset_col:\n", "- respchartoffset\n", "- respchartentryoffset\n", @@ -3925,7 +3943,7 @@ "warning_items:\n", "- SOME MAY BE LISTS\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for treatment:\n", + "\u001b[32m2024-12-14 23:38:40.936\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for treatment:\n", "offset_col: treatmentoffset\n", "pseudotime_col: treatmentEnteredTimestamp\n", "output_data_cols:\n", @@ -3934,7 +3952,7 @@ "warning_items:\n", "- Absence of entries in table do not indicate absence of treatments\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.098\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for vitalAperiodic:\n", + "\u001b[32m2024-12-14 23:38:40.937\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for vitalAperiodic:\n", "offset_col: observationoffset\n", "pseudotime_col: observationEnteredTimestamp\n", "output_data_cols:\n", @@ -3950,7 +3968,7 @@ "- pvr\n", "- pvri\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.098\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for vitalPeriodic:\n", + "\u001b[32m2024-12-14 23:38:40.937\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m282\u001b[0m - \u001b[1m Adding preprocessor for vitalPeriodic:\n", "offset_col: observationoffset\n", "pseudotime_col: observationEnteredTimestamp\n", "output_data_cols:\n", @@ -3974,141 +3992,141 @@ "warning_items:\n", "- These are 5-minute median values. There are going to be a *lot* of events.\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.098\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m294\u001b[0m - \u001b[1mProcessing patient table first...\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.099\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m298\u001b[0m - \u001b[1mLoading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/hospital.csv.gz...\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.105\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m302\u001b[0m - \u001b[1mLoading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/patient.csv.gz...\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m305\u001b[0m - \u001b[1mProcessing patient table...\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.117\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_patient\u001b[0m:\u001b[36m101\u001b[0m - \u001b[1mChecking that the 24h times are consistent. If this is extremely slow, consider refactoring to have only one `.collect()` call.\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.117\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mcheck_timestamps_agree\u001b[0m:\u001b[36m57\u001b[0m - \u001b[1mChecking that stated 24h times are consistent given offsets between {pseudotime_col.name} and hospitaldischargetime24...\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.126\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mcheck_timestamps_agree\u001b[0m:\u001b[36m57\u001b[0m - \u001b[1mChecking that stated 24h times are consistent given offsets between {pseudotime_col.name} and hospitaladmittime24...\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mcheck_timestamps_agree\u001b[0m:\u001b[36m57\u001b[0m - \u001b[1mChecking that stated 24h times are consistent given offsets between {pseudotime_col.name} and unitadmittime24...\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.132\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mcheck_timestamps_agree\u001b[0m:\u001b[36m57\u001b[0m - \u001b[1mChecking that stated 24h times are consistent given offsets between {pseudotime_col.name} and unitdischargetime24...\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.135\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_patient\u001b[0m:\u001b[36m109\u001b[0m - \u001b[1mValidated 24h times in 0:00:00.017329\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.135\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_patient\u001b[0m:\u001b[36m111\u001b[0m - \u001b[33m\u001b[1mNOT validating the `unitVisitNumber` column as that isn't implemented yet.\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.135\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_patient\u001b[0m:\u001b[36m113\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING. Check with the eICU team:\n", + "\u001b[32m2024-12-14 23:38:40.938\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m294\u001b[0m - \u001b[1mProcessing patient table first...\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:40.938\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m298\u001b[0m - \u001b[1mLoading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/hospital.csv.gz...\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:40.948\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m302\u001b[0m - \u001b[1mLoading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/patient.csv.gz...\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:40.975\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m305\u001b[0m - \u001b[1mProcessing patient table...\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:40.975\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_patient\u001b[0m:\u001b[36m101\u001b[0m - \u001b[1mChecking that the 24h times are consistent. If this is extremely slow, consider refactoring to have only one `.collect()` call.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:40.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mcheck_timestamps_agree\u001b[0m:\u001b[36m57\u001b[0m - \u001b[1mChecking that stated 24h times are consistent given offsets between {pseudotime_col.name} and hospitaldischargetime24...\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:40.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mcheck_timestamps_agree\u001b[0m:\u001b[36m57\u001b[0m - \u001b[1mChecking that stated 24h times are consistent given offsets between {pseudotime_col.name} and hospitaladmittime24...\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:40.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mcheck_timestamps_agree\u001b[0m:\u001b[36m57\u001b[0m - \u001b[1mChecking that stated 24h times are consistent given offsets between {pseudotime_col.name} and unitadmittime24...\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:40.996\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mcheck_timestamps_agree\u001b[0m:\u001b[36m57\u001b[0m - \u001b[1mChecking that stated 24h times are consistent given offsets between {pseudotime_col.name} and unitdischargetime24...\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.008\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_patient\u001b[0m:\u001b[36m109\u001b[0m - \u001b[1mValidated 24h times in 0:00:00.032635\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.008\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_patient\u001b[0m:\u001b[36m111\u001b[0m - \u001b[33m\u001b[1mNOT validating the `unitVisitNumber` column as that isn't implemented yet.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.008\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mprocess_patient\u001b[0m:\u001b[36m113\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING. Check with the eICU team:\n", " - `apacheAdmissionDx` is not selected from the patients table as we grab it from `admissiondx`. Is this right?\n", " - `admissionHeight` and `admissionWeight` are interpreted as **unit** admission height/weight, not hospital admission height/weight. Is this right?\n", " - `age` is interpreted as the age at the time of the unit stay, not the hospital stay. Is this right?\n", " - `What is the actual mean age for those > 89? Here we assume 90.\n", " - Note that all the column names appear to be all in lowercase for the csv versions, vs. the docs\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.163\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing diagnosis...\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.184\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/diagnosis.csv.gz in 0:00:00.021635\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.184\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for diagnosis table. Check with the eICU team:\n", - " - Though we use it, the `diagnosisString` field documentation is unclear -- by what is it separated?\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.201\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet in 0:00:00.038430\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.201\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing vitalAperiodic...\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.360\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/vitalAperiodic.csv.gz in 0:00:00.158846\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.430\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet in 0:00:00.228263\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.430\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing admissionDx...\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.437\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/admissionDx.csv.gz in 0:00:00.007296\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.437\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for admissionDx table. Check with the eICU team:\n", - " - How should we use `admitdxtest`?\n", - " - How should we use `admitdxpath`?\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.449\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet in 0:00:00.019188\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.449\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing respiratoryCare...\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.459\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/respiratoryCare.csv.gz in 0:00:00.009363\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.459\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for respiratoryCare table. Check with the eICU team:\n", - " - We ignore the `priorVent*` columns -- this may be a mistake!\n", - " - There is a lot of data in this table -- what should be incorporated into the event structure?\n", - " - We might be able to use `priorVent` timestamps to further refine true season of unit admission.\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.472\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet in 0:00:00.022688\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.472\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing nurseAssessment...\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.560\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/nurseAssessment.csv.gz in 0:00:00.087465\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.560\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for nurseAssessment table. Check with the eICU team:\n", - " - Should we be using `cellAttributePath` instead of `cellAttribute`?\n", - " - SOME MAY BE LISTS\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.593\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet in 0:00:00.120698\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:07.593\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing vitalPeriodic...\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:08.837\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/vitalPeriodic.csv.gz in 0:00:01.244370\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:08.838\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for vitalPeriodic table. Check with the eICU team:\n", - " - These are 5-minute median values. There are going to be a *lot* of events.\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.222\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet in 0:00:01.629149\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.222\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing carePlanGeneral...\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.244\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/carePlanGeneral.csv.gz in 0:00:00.021348\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.260\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet in 0:00:00.037551\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.260\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing carePlanGoal...\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.263\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/carePlanGoal.csv.gz in 0:00:00.003226\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.273\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet in 0:00:00.012718\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.273\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing treatment...\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.296\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/treatment.csv.gz in 0:00:00.023153\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.296\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for treatment table. Check with the eICU team:\n", + "\u001b[32m2024-12-14 23:38:41.054\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing carePlanInfectiousDisease...\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.055\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/carePlanInfectiousDisease.csv.gz in 0:00:00.000948\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet in 0:00:00.015875\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing treatment...\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.136\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/treatment.csv.gz in 0:00:00.066012\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.137\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for treatment table. Check with the eICU team:\n", " - Absence of entries in table do not indicate absence of treatments\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.320\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet in 0:00:00.046999\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.320\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping apacheApsVar as it is not supported in this pipeline.\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.320\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing carePlanEOL...\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/carePlanEOL.csv.gz in 0:00:00.000273\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.321\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for carePlanEOL table. Check with the eICU team:\n", - " - Is the DiscussionOffset time actually reliable? Should we fall back on the SaveOffset time?\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.329\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet in 0:00:00.008632\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.329\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m331\u001b[0m - \u001b[33m\u001b[1mNo function needed for infusiondrug. For eICU, THIS IS UNEXPECTED\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.329\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping carePlanCareProvider as it is not supported in this pipeline.\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.329\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping microLab as it is not supported in this pipeline.\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.329\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing nurseCare...\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.368\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/nurseCare.csv.gz in 0:00:00.038466\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.368\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for nurseCare table. Check with the eICU team:\n", - " - Should we be using `cellAttributePath` instead of `cellAttribute`?\n", - " - SOME MAY BE LISTS\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.387\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet in 0:00:00.057232\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.387\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing physicalExam...\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.449\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/physicalExam.csv.gz in 0:00:00.062587\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.450\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for physicalExam table. Check with the eICU team:\n", + "\u001b[32m2024-12-14 23:38:41.184\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet in 0:00:00.114126\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.185\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing physicalExam...\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.314\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/physicalExam.csv.gz in 0:00:00.128952\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.314\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for physicalExam table. Check with the eICU team:\n", " - How should we use `physicalExamValue` vs. `physicalExamText`?\n", " - I believe the `physicalExamValue` is a **LIST**. This must be processed specially.\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet in 0:00:00.096029\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing respiratoryCharting...\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.600\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/respiratoryCharting.csv.gz in 0:00:00.116960\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.600\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for respiratoryCharting table. Check with the eICU team:\n", + "\u001b[32m2024-12-14 23:38:41.395\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet in 0:00:00.210660\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing vitalAperiodic...\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.639\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/vitalAperiodic.csv.gz in 0:00:00.243733\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.784\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet in 0:00:00.388033\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.784\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing pastHistory...\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.808\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/pastHistory.csv.gz in 0:00:00.023773\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.808\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for pastHistory table. Check with the eICU team:\n", + " - SOME MAY BE LISTS\n", + " - How should we use `pastHistoryPath` vs. `pastHistoryNoteType`?\n", + " - How should we use `pastHistoryValue` vs. `pastHistoryValueText`?\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.852\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet in 0:00:00.067638\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.852\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping carePlanCareProvider as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.852\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing diagnosis...\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/diagnosis.csv.gz in 0:00:00.049100\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.902\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for diagnosis table. Check with the eICU team:\n", + " - Though we use it, the `diagnosisString` field documentation is unclear -- by what is it separated?\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.947\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet in 0:00:00.094038\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.947\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing carePlanGeneral...\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.996\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/carePlanGeneral.csv.gz in 0:00:00.049384\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:42.046\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet in 0:00:00.098870\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:42.046\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing nurseAssessment...\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:42.206\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/nurseAssessment.csv.gz in 0:00:00.159300\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:42.206\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for nurseAssessment table. Check with the eICU team:\n", + " - Should we be using `cellAttributePath` instead of `cellAttribute`?\n", " - SOME MAY BE LISTS\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.657\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet in 0:00:00.173772\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.657\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping note as it is not supported in this pipeline.\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.657\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping admissiondrug as it is not supported in this pipeline.\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:09.657\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing lab...\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:10.045\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/lab.csv.gz in 0:00:00.387661\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:10.045\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for lab table. Check with the eICU team:\n", - " - Is this the time the lab was drawn? Entered? The time the result came in?\n", - " - We **IGNORE** the `labResultRevisedOffset` column -- this may be a mistake!\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:10.143\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet in 0:00:00.485708\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:10.143\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping apachePredVar as it is not supported in this pipeline.\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:10.143\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping customLab as it is not supported in this pipeline.\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:10.143\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping apachePatientResult as it is not supported in this pipeline.\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:10.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing carePlanInfectiousDisease...\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:10.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/carePlanInfectiousDisease.csv.gz in 0:00:00.000544\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:10.153\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet in 0:00:00.009958\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:10.154\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing allergy...\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:10.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/allergy.csv.gz in 0:00:00.003649\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:10.157\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for allergy table. Check with the eICU team:\n", + "\u001b[32m2024-12-14 23:38:42.293\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet in 0:00:00.246346\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:42.293\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping note as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:42.293\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping intakeOutput as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:42.293\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping apachePredVar as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:42.293\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing carePlanGoal...\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:42.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/carePlanGoal.csv.gz in 0:00:00.005396\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:42.329\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet in 0:00:00.036006\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:42.330\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing allergy...\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:42.336\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/allergy.csv.gz in 0:00:00.005835\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:42.336\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for allergy table. Check with the eICU team:\n", " - How should we use `allergyNoteType`?\n", " - How should we use `specialtyType`?\n", " - How should we use `userType`?\n", " - Is `drugName` the name of the drug to which the patient is allergic or the drug given to the patient (docs say 'name of the selected admission drug')?\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:10.165\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet in 0:00:00.011618\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:10.165\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing nurseCharting...\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:11.333\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/nurseCharting.csv.gz in 0:00:01.167813\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:11.334\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for nurseCharting table. Check with the eICU team:\n", + "\u001b[32m2024-12-14 23:38:42.361\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet in 0:00:00.031064\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:42.361\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m331\u001b[0m - \u001b[33m\u001b[1mNo function needed for infusiondrug. For eICU, THIS IS UNEXPECTED\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:42.362\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing respiratoryCare...\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:42.385\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/respiratoryCare.csv.gz in 0:00:00.023569\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:42.386\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for respiratoryCare table. Check with the eICU team:\n", + " - We ignore the `priorVent*` columns -- this may be a mistake!\n", + " - There is a lot of data in this table -- what should be incorporated into the event structure?\n", + " - We might be able to use `priorVent` timestamps to further refine true season of unit admission.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:42.423\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet in 0:00:00.061466\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:42.424\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing carePlanEOL...\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:42.424\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/carePlanEOL.csv.gz in 0:00:00.000658\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:42.424\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for carePlanEOL table. Check with the eICU team:\n", + " - Is the DiscussionOffset time actually reliable? Should we fall back on the SaveOffset time?\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:42.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet in 0:00:00.015729\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:42.440\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing respiratoryCharting...\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:42.709\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/respiratoryCharting.csv.gz in 0:00:00.268863\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:42.709\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for respiratoryCharting table. Check with the eICU team:\n", " - SOME MAY BE LISTS\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:11.706\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet in 0:00:01.540825\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:11.707\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing pastHistory...\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:11.719\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/pastHistory.csv.gz in 0:00:00.012306\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:11.719\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for pastHistory table. Check with the eICU team:\n", - " - SOME MAY BE LISTS\n", - " - How should we use `pastHistoryPath` vs. `pastHistoryNoteType`?\n", - " - How should we use `pastHistoryValue` vs. `pastHistoryValueText`?\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:11.734\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet in 0:00:00.027972\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:11.735\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing medication...\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:11.859\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/medication.csv.gz in 0:00:00.124266\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:11.859\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for medication table. Check with the eICU team:\n", + "\u001b[32m2024-12-14 23:38:42.867\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet in 0:00:00.427698\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:42.868\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing medication...\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:43.032\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/medication.csv.gz in 0:00:00.164295\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:43.033\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for medication table. Check with the eICU team:\n", " - We **IGNORE** the `drugOrderCancelled` column -- this may be a mistake!\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:11.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet in 0:00:00.168091\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:11.903\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping intakeOutput as it is not supported in this pipeline.\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:11.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m352\u001b[0m - \u001b[1mDone! All dataframes processed and written to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:43.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet in 0:00:00.269026\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:43.137\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping customLab as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:43.137\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping microLab as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:43.138\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing nurseCare...\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:43.209\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/nurseCare.csv.gz in 0:00:00.071627\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:43.210\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for nurseCare table. Check with the eICU team:\n", + " - Should we be using `cellAttributePath` instead of `cellAttribute`?\n", + " - SOME MAY BE LISTS\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:43.280\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet in 0:00:00.142561\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:43.280\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing lab...\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:44.011\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/lab.csv.gz in 0:00:00.730567\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:44.011\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for lab table. Check with the eICU team:\n", + " - Is this the time the lab was drawn? Entered? The time the result came in?\n", + " - We **IGNORE** the `labResultRevisedOffset` column -- this may be a mistake!\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:44.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet in 0:00:00.942354\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:44.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing vitalPeriodic...\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:46.348\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/vitalPeriodic.csv.gz in 0:00:02.124718\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:46.349\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for vitalPeriodic table. Check with the eICU team:\n", + " - These are 5-minute median values. There are going to be a *lot* of events.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:47.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet in 0:00:02.893194\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:47.117\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing admissionDx...\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:47.132\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/admissionDx.csv.gz in 0:00:00.014956\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:47.132\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for admissionDx table. Check with the eICU team:\n", + " - How should we use `admitdxtest`?\n", + " - How should we use `admitdxpath`?\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:47.183\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet in 0:00:00.065904\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:47.183\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping admissiondrug as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:47.184\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping apachePatientResult as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:47.184\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m345\u001b[0m - \u001b[1mProcessing nurseCharting...\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:49.161\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1m * Loaded raw /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/raw_data/eicu-crd-demo/2.0.1/nurseCharting.csv.gz in 0:00:01.977468\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:49.162\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfn\u001b[0m:\u001b[36m206\u001b[0m - \u001b[33m\u001b[1mNOT SURE ABOUT THE FOLLOWING for nurseCharting table. Check with the eICU team:\n", + " - SOME MAY BE LISTS\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:49.935\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m350\u001b[0m - \u001b[1m * Processed and wrote to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet in 0:00:02.751533\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:49.936\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m328\u001b[0m - \u001b[33m\u001b[1mSkipping apacheApsVar as it is not supported in this pipeline.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:49.936\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m352\u001b[0m - \u001b[1mDone! All dataframes processed and written to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds\u001b[0m\n", "Setting N_WORKERS to 1 to avoid issues with the runners.\n", "Running extraction pipeline.\n", - "\u001b[32m2024-12-14 17:20:12.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: shard_events\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:12.472\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-shard_events --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=shard_events\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:14.418\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 23:38:50.702\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: shard_events\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:50.718\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-shard_events --config-dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=shard_events\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:54.165\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:14.418\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 17:20:13.013 | INFO | MEDS_transforms.extract.shard_events:main:330 - Running with config:\n", + "\u001b[32m2024-12-14 23:38:54.165\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 23:38:51.407 | INFO | MEDS_transforms.extract.shard_events:main:330 - Running with config:\n", "input_dir: ${oc.env:EICU_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:EICU_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -4181,463 +4199,463 @@ "Stage config:\n", "row_chunksize: 200000000\n", "infer_schema_length: 999999999\n", - "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo//pre_meds/\n", + "data_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo//pre_meds/\n", "is_metadata: false\n", - "metadata_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/metadata\n", - "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events\n", + "metadata_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/metadata\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events\n", "reducer_output_dir: null\n", "\n", - "2024-12-14 17:20:13.017 | INFO | MEDS_transforms.extract.shard_events:main:342 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs/event_configs.yaml to identify needed columns.\n", - "2024-12-14 17:20:13.069 | INFO | MEDS_transforms.extract.shard_events:main:368 - Starting event sub-sharding. Sub-sharding 20 files:\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet\n", - " * /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet\n", - "2024-12-14 17:20:13.071 | INFO | MEDS_transforms.extract.shard_events:main:372 - Will read raw data from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/$IN_FILE.parquet and write sub-sharded data to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/$IN_FILE/$ROW_START-$ROW_END.parquet\n", - "2024-12-14 17:20:13.073 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL.\n", - "2024-12-14 17:20:13.073 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet to determine row count.\n", - "2024-12-14 17:20:13.075 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:13.076 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:51.414 | INFO | MEDS_transforms.extract.shard_events:main:342 - Reading event conversion config from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs/event_configs.yaml to identify needed columns.\n", + "2024-12-14 23:38:51.488 | INFO | MEDS_transforms.extract.shard_events:main:368 - Starting event sub-sharding. Sub-sharding 20 files:\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet\n", + " * /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet\n", + "2024-12-14 23:38:51.491 | INFO | MEDS_transforms.extract.shard_events:main:372 - Will read raw data from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/$IN_FILE.parquet and write sub-sharded data to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/$IN_FILE/$ROW_START-$ROW_END.parquet\n", + "2024-12-14 23:38:51.494 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx.\n", + "2024-12-14 23:38:51.495 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet to determine row count.\n", + "2024-12-14 23:38:51.498 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:51.498 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:13.076 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanEolDiscussionOccurredTimestamp', 'patienthealthsystemstayid']\n", - "2024-12-14 17:20:13.082 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanEolDiscussionOccurredTimestamp, patienthealthsystemstayid\n", - "2024-12-14 17:20:13.088 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 15 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet.\n", - "2024-12-14 17:20:13.088 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:20:13.088 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet row-chunk [0-15) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet.\n", - "2024-12-14 17:20:13.091 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:13.091462. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:13.093 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet\n", - "2024-12-14 17:20:13.093 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:13.093 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:51.498 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['admissiondxid', 'admitDxEnteredTimestamp', 'admitdxname', 'patienthealthsystemstayid', 'patientunitstayid']\n", + "2024-12-14 23:38:51.500 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, admissiondxid, admitDxEnteredTimestamp, admitdxname, patienthealthsystemstayid, patientunitstayid\n", + "2024-12-14 23:38:51.509 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 7578 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet.\n", + "2024-12-14 23:38:51.509 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:38:51.509 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet row-chunk [0-7578) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet.\n", + "2024-12-14 23:38:51.516 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:51.515992. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:51.519 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet\n", + "2024-12-14 23:38:51.519 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:51.519 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:13.093 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanEolDiscussionOccurredTimestamp', 'patienthealthsystemstayid']\n", - "2024-12-14 17:20:13.093 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanEolDiscussionOccurredTimestamp, patienthealthsystemstayid\n", - "2024-12-14 17:20:13.093 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:13.094 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet\n", - "2024-12-14 17:20:13.101 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010042\n", - "2024-12-14 17:20:13.101 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/.[0-15).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/.[0-15).parquet_cache/locks/2024-12-14T17:20:13.091462.json\n", - "2024-12-14 17:20:13.104 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting.\n", - "2024-12-14 17:20:13.104 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet to determine row count.\n", - "2024-12-14 17:20:13.106 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:13.106 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:51.520 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['admissiondxid', 'admitDxEnteredTimestamp', 'admitdxname', 'patienthealthsystemstayid', 'patientunitstayid']\n", + "2024-12-14 23:38:51.520 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, admissiondxid, admitDxEnteredTimestamp, admitdxname, patienthealthsystemstayid, patientunitstayid\n", + "2024-12-14 23:38:51.520 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:51.521 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet\n", + "2024-12-14 23:38:51.536 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.020793\n", + "2024-12-14 23:38:51.537 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/.[0-7578).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/.[0-7578).parquet_cache/locks/2024-12-14T23:38:51.515992.json\n", + "2024-12-14 23:38:51.542 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment.\n", + "2024-12-14 23:38:51.543 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet to determine row count.\n", + "2024-12-14 23:38:51.548 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:51.548 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:13.106 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['nursingChartEnteredTimestamp', 'nursingChartPerformedTimestamp', 'nursingchartcelltypecat', 'nursingchartcelltypevallabel', 'nursingchartcelltypevalname', 'nursingchartid', 'nursingchartvalue', 'patienthealthsystemstayid']\n", - "2024-12-14 17:20:13.107 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, nursingChartEnteredTimestamp, nursingChartPerformedTimestamp, nursingchartcelltypecat, nursingchartcelltypevallabel, nursingchartcelltypevalname, nursingchartid, nursingchartvalue, patienthealthsystemstayid\n", - "2024-12-14 17:20:13.118 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 1477163 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet.\n", - "2024-12-14 17:20:13.118 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:20:13.118 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet row-chunk [0-1477163) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet.\n", - "2024-12-14 17:20:13.120 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:13.120700. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:13.121 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet\n", - "2024-12-14 17:20:13.121 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:13.121 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:51.549 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['patienthealthsystemstayid', 'treatmentEnteredTimestamp', 'treatmentid', 'treatmentstring']\n", + "2024-12-14 23:38:51.549 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, patienthealthsystemstayid, treatmentEnteredTimestamp, treatmentid, treatmentstring\n", + "2024-12-14 23:38:51.551 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 38290 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet.\n", + "2024-12-14 23:38:51.551 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:38:51.551 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet row-chunk [0-38290) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet.\n", + "2024-12-14 23:38:51.557 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:51.556751. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:51.557 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet\n", + "2024-12-14 23:38:51.557 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:51.558 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:13.121 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['nursingChartEnteredTimestamp', 'nursingChartPerformedTimestamp', 'nursingchartcelltypecat', 'nursingchartcelltypevallabel', 'nursingchartcelltypevalname', 'nursingchartid', 'nursingchartvalue', 'patienthealthsystemstayid']\n", - "2024-12-14 17:20:13.121 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, nursingChartEnteredTimestamp, nursingChartPerformedTimestamp, nursingchartcelltypecat, nursingchartcelltypevallabel, nursingchartcelltypevalname, nursingchartid, nursingchartvalue, patienthealthsystemstayid\n", - "2024-12-14 17:20:13.121 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:13.121 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet\n", - "2024-12-14 17:20:13.501 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.380454\n", - "2024-12-14 17:20:13.501 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/.[0-1477163).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/.[0-1477163).parquet_cache/locks/2024-12-14T17:20:13.120700.json\n", - "2024-12-14 17:20:13.504 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication.\n", - "2024-12-14 17:20:13.504 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet to determine row count.\n", - "2024-12-14 17:20:13.506 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:13.507 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:51.558 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['patienthealthsystemstayid', 'treatmentEnteredTimestamp', 'treatmentid', 'treatmentstring']\n", + "2024-12-14 23:38:51.558 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, patienthealthsystemstayid, treatmentEnteredTimestamp, treatmentid, treatmentstring\n", + "2024-12-14 23:38:51.558 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:51.559 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet\n", + "2024-12-14 23:38:51.583 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.026468\n", + "2024-12-14 23:38:51.583 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/.[0-38290).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/.[0-38290).parquet_cache/locks/2024-12-14T23:38:51.556751.json\n", + "2024-12-14 23:38:51.587 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis.\n", + "2024-12-14 23:38:51.587 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet to determine row count.\n", + "2024-12-14 23:38:51.591 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:51.591 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:13.507 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['dosage', 'drugivadmixture', 'drugname', 'drugordertimestamp', 'drugstarttimestamp', 'drugstoptimestamp', 'frequency', 'gtc', 'loadingdose', 'medicationid', 'patienthealthsystemstayid', 'prn', 'routeadmin']\n", - "2024-12-14 17:20:13.507 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, dosage, drugivadmixture, drugname, drugordertimestamp, drugstarttimestamp, drugstoptimestamp, frequency, gtc, loadingdose, medicationid, patienthealthsystemstayid, prn, routeadmin\n", - "2024-12-14 17:20:13.508 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 75604 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet.\n", - "2024-12-14 17:20:13.508 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:20:13.508 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet row-chunk [0-75604) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet.\n", - "2024-12-14 17:20:13.510 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:13.510654. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:13.511 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet\n", - "2024-12-14 17:20:13.511 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:13.511 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:51.591 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['diagnosisEnteredTimestamp', 'diagnosispriority', 'diagnosisstring', 'icd9code', 'patienthealthsystemstayid']\n", + "2024-12-14 23:38:51.591 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, diagnosisEnteredTimestamp, diagnosispriority, diagnosisstring, icd9code, patienthealthsystemstayid\n", + "2024-12-14 23:38:51.592 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 24978 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet.\n", + "2024-12-14 23:38:51.593 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:38:51.593 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet row-chunk [0-24978) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet.\n", + "2024-12-14 23:38:51.596 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:51.596337. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:51.596 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet\n", + "2024-12-14 23:38:51.596 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:51.596 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:13.511 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['dosage', 'drugivadmixture', 'drugname', 'drugordertimestamp', 'drugstarttimestamp', 'drugstoptimestamp', 'frequency', 'gtc', 'loadingdose', 'medicationid', 'patienthealthsystemstayid', 'prn', 'routeadmin']\n", - "2024-12-14 17:20:13.511 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, dosage, drugivadmixture, drugname, drugordertimestamp, drugstarttimestamp, drugstoptimestamp, frequency, gtc, loadingdose, medicationid, patienthealthsystemstayid, prn, routeadmin\n", - "2024-12-14 17:20:13.511 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:13.511 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet\n", - "2024-12-14 17:20:13.548 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.037873\n", - "2024-12-14 17:20:13.548 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/.[0-75604).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/.[0-75604).parquet_cache/locks/2024-12-14T17:20:13.510654.json\n", - "2024-12-14 17:20:13.551 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal.\n", - "2024-12-14 17:20:13.551 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet to determine row count.\n", - "2024-12-14 17:20:13.553 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:13.553 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:51.597 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['diagnosisEnteredTimestamp', 'diagnosispriority', 'diagnosisstring', 'icd9code', 'patienthealthsystemstayid']\n", + "2024-12-14 23:38:51.597 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, diagnosisEnteredTimestamp, diagnosispriority, diagnosisstring, icd9code, patienthealthsystemstayid\n", + "2024-12-14 23:38:51.597 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:51.597 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet\n", + "2024-12-14 23:38:51.616 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.019687\n", + "2024-12-14 23:38:51.616 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/.[0-24978).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/.[0-24978).parquet_cache/locks/2024-12-14T23:38:51.596337.json\n", + "2024-12-14 23:38:51.621 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting.\n", + "2024-12-14 23:38:51.621 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet to determine row count.\n", + "2024-12-14 23:38:51.626 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:51.626 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:13.553 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanGoalEnteredTimestamp', 'cplgoalcategory', 'cplgoalstatus', 'cplgoalvalue', 'patienthealthsystemstayid']\n", - "2024-12-14 17:20:13.553 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanGoalEnteredTimestamp, cplgoalcategory, cplgoalstatus, cplgoalvalue, patienthealthsystemstayid\n", - "2024-12-14 17:20:13.554 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 3633 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet.\n", - "2024-12-14 17:20:13.554 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:20:13.554 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet row-chunk [0-3633) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet.\n", - "2024-12-14 17:20:13.557 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:13.557013. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:13.557 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet\n", - "2024-12-14 17:20:13.557 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:13.557 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:51.626 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['patienthealthsystemstayid', 'respChartEnteredTimestamp', 'respChartPerformedTimestamp', 'respchartid', 'respcharttypecat', 'respchartvalue', 'respchartvaluelabel']\n", + "2024-12-14 23:38:51.627 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, patienthealthsystemstayid, respChartEnteredTimestamp, respChartPerformedTimestamp, respchartid, respcharttypecat, respchartvalue, respchartvaluelabel\n", + "2024-12-14 23:38:51.631 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 176089 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet.\n", + "2024-12-14 23:38:51.632 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:38:51.632 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet row-chunk [0-176089) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet.\n", + "2024-12-14 23:38:51.635 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:51.635455. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:51.635 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet\n", + "2024-12-14 23:38:51.635 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:51.636 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:13.557 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanGoalEnteredTimestamp', 'cplgoalcategory', 'cplgoalstatus', 'cplgoalvalue', 'patienthealthsystemstayid']\n", - "2024-12-14 17:20:13.557 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanGoalEnteredTimestamp, cplgoalcategory, cplgoalstatus, cplgoalvalue, patienthealthsystemstayid\n", - "2024-12-14 17:20:13.557 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:13.557 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet\n", - "2024-12-14 17:20:13.560 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.003061\n", - "2024-12-14 17:20:13.560 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/.[0-3633).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/.[0-3633).parquet_cache/locks/2024-12-14T17:20:13.557013.json\n", - "2024-12-14 17:20:13.562 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease.\n", - "2024-12-14 17:20:13.562 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet to determine row count.\n", - "2024-12-14 17:20:13.564 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:13.564 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:51.636 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['patienthealthsystemstayid', 'respChartEnteredTimestamp', 'respChartPerformedTimestamp', 'respchartid', 'respcharttypecat', 'respchartvalue', 'respchartvaluelabel']\n", + "2024-12-14 23:38:51.636 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, patienthealthsystemstayid, respChartEnteredTimestamp, respChartPerformedTimestamp, respchartid, respcharttypecat, respchartvalue, respchartvaluelabel\n", + "2024-12-14 23:38:51.636 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:51.636 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 23:38:51.742 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.107129\n", + "2024-12-14 23:38:51.742 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/.[0-176089).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/.[0-176089).parquet_cache/locks/2024-12-14T23:38:51.635455.json\n", + "2024-12-14 23:38:51.749 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient.\n", + "2024-12-14 23:38:51.749 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet to determine row count.\n", + "2024-12-14 23:38:51.752 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:51.752 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:13.564 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanInfectDiseaseEnteredTimestamp', 'infectdiseaseassessment', 'infectdiseasesite', 'patienthealthsystemstayid', 'responsetotherapy', 'treatment']\n", - "2024-12-14 17:20:13.564 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanInfectDiseaseEnteredTimestamp, infectdiseaseassessment, infectdiseasesite, patienthealthsystemstayid, responsetotherapy, treatment\n", - "2024-12-14 17:20:13.565 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 112 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet.\n", - "2024-12-14 17:20:13.565 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:20:13.565 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet row-chunk [0-112) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet.\n", - "2024-12-14 17:20:13.567 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:13.567600. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:13.567 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet\n", - "2024-12-14 17:20:13.567 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:13.567 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:51.752 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['dateofbirth', 'ethnicity', 'gender', 'hospitaladmitsource', 'hospitaladmittimestamp', 'hospitaldischargelocation', 'hospitaldischargestatus', 'hospitaldischargetimestamp', 'hospitalid', 'hospitalnumbedscategory', 'hospitalregion', 'hospitalteachingstatus', 'patienthealthsystemstayid', 'patientunitstayid', 'uniquepid', 'unitadmissionheight', 'unitadmissionweight', 'unitadmitsource', 'unitadmittimestamp', 'unitdischargelocation', 'unitdischargestatus', 'unitdischargetimestamp', 'unitdischargeweight', 'unitstaytype', 'wardid']\n", + "2024-12-14 23:38:51.753 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, dateofbirth, ethnicity, gender, hospitaladmitsource, hospitaladmittimestamp, hospitaldischargelocation, hospitaldischargestatus, hospitaldischargetimestamp, hospitalid, hospitalnumbedscategory, hospitalregion, hospitalteachingstatus, patienthealthsystemstayid, patientunitstayid, uniquepid, unitadmissionheight, unitadmissionweight, unitadmitsource, unitadmittimestamp, unitdischargelocation, unitdischargestatus, unitdischargetimestamp, unitdischargeweight, unitstaytype, wardid\n", + "2024-12-14 23:38:51.754 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 2520 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet.\n", + "2024-12-14 23:38:51.754 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:38:51.754 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet row-chunk [0-2520) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet.\n", + "2024-12-14 23:38:51.758 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:51.758024. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:51.758 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet\n", + "2024-12-14 23:38:51.758 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:51.758 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:13.568 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanInfectDiseaseEnteredTimestamp', 'infectdiseaseassessment', 'infectdiseasesite', 'patienthealthsystemstayid', 'responsetotherapy', 'treatment']\n", - "2024-12-14 17:20:13.568 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanInfectDiseaseEnteredTimestamp, infectdiseaseassessment, infectdiseasesite, patienthealthsystemstayid, responsetotherapy, treatment\n", - "2024-12-14 17:20:13.568 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:13.568 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet\n", - "2024-12-14 17:20:13.569 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.001941\n", - "2024-12-14 17:20:13.569 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/.[0-112).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/.[0-112).parquet_cache/locks/2024-12-14T17:20:13.567600.json\n", - "2024-12-14 17:20:13.571 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic.\n", - "2024-12-14 17:20:13.571 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet to determine row count.\n", - "2024-12-14 17:20:13.574 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:13.574 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:51.758 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['dateofbirth', 'ethnicity', 'gender', 'hospitaladmitsource', 'hospitaladmittimestamp', 'hospitaldischargelocation', 'hospitaldischargestatus', 'hospitaldischargetimestamp', 'hospitalid', 'hospitalnumbedscategory', 'hospitalregion', 'hospitalteachingstatus', 'patienthealthsystemstayid', 'patientunitstayid', 'uniquepid', 'unitadmissionheight', 'unitadmissionweight', 'unitadmitsource', 'unitadmittimestamp', 'unitdischargelocation', 'unitdischargestatus', 'unitdischargetimestamp', 'unitdischargeweight', 'unitstaytype', 'wardid']\n", + "2024-12-14 23:38:51.759 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, dateofbirth, ethnicity, gender, hospitaladmitsource, hospitaladmittimestamp, hospitaldischargelocation, hospitaldischargestatus, hospitaldischargetimestamp, hospitalid, hospitalnumbedscategory, hospitalregion, hospitalteachingstatus, patienthealthsystemstayid, patientunitstayid, uniquepid, unitadmissionheight, unitadmissionweight, unitadmitsource, unitadmittimestamp, unitdischargelocation, unitdischargestatus, unitdischargetimestamp, unitdischargeweight, unitstaytype, wardid\n", + "2024-12-14 23:38:51.759 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:51.759 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet\n", + "2024-12-14 23:38:51.775 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.017470\n", + "2024-12-14 23:38:51.775 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/.[0-2520).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/.[0-2520).parquet_cache/locks/2024-12-14T23:38:51.758024.json\n", + "2024-12-14 23:38:51.779 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal.\n", + "2024-12-14 23:38:51.779 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet to determine row count.\n", + "2024-12-14 23:38:51.782 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:51.782 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:13.574 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cvp', 'etco2', 'heartrate', 'icp', 'observationEnteredTimestamp', 'padiastolic', 'pamean', 'pasystolic', 'patienthealthsystemstayid', 'respiration', 'sao2', 'st1', 'st2', 'st3', 'systemicdiastolic', 'systemicmean', 'systemicsystolic', 'temperature', 'vitalperiodicid']\n", - "2024-12-14 17:20:13.574 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cvp, etco2, heartrate, icp, observationEnteredTimestamp, padiastolic, pamean, pasystolic, patienthealthsystemstayid, respiration, sao2, st1, st2, st3, systemicdiastolic, systemicmean, systemicsystolic, temperature, vitalperiodicid\n", - "2024-12-14 17:20:13.576 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 1634960 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet.\n", - "2024-12-14 17:20:13.576 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:20:13.576 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet row-chunk [0-1634960) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet.\n", - "2024-12-14 17:20:13.579 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:13.578964. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:13.579 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet\n", - "2024-12-14 17:20:13.579 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:13.579 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:51.782 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanGoalEnteredTimestamp', 'cplgoalcategory', 'cplgoalstatus', 'cplgoalvalue', 'patienthealthsystemstayid']\n", + "2024-12-14 23:38:51.783 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanGoalEnteredTimestamp, cplgoalcategory, cplgoalstatus, cplgoalvalue, patienthealthsystemstayid\n", + "2024-12-14 23:38:51.784 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 3633 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet.\n", + "2024-12-14 23:38:51.784 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:38:51.784 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet row-chunk [0-3633) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet.\n", + "2024-12-14 23:38:51.787 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:51.787731. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:51.788 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet\n", + "2024-12-14 23:38:51.788 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:51.788 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGoal.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:13.579 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cvp', 'etco2', 'heartrate', 'icp', 'observationEnteredTimestamp', 'padiastolic', 'pamean', 'pasystolic', 'patienthealthsystemstayid', 'respiration', 'sao2', 'st1', 'st2', 'st3', 'systemicdiastolic', 'systemicmean', 'systemicsystolic', 'temperature', 'vitalperiodicid']\n", - "2024-12-14 17:20:13.579 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cvp, etco2, heartrate, icp, observationEnteredTimestamp, padiastolic, pamean, pasystolic, patienthealthsystemstayid, respiration, sao2, st1, st2, st3, systemicdiastolic, systemicmean, systemicsystolic, temperature, vitalperiodicid\n", - "2024-12-14 17:20:13.579 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:13.579 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet\n", - "2024-12-14 17:20:13.942 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.363615\n", - "2024-12-14 17:20:13.942 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/.[0-1634960).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/.[0-1634960).parquet_cache/locks/2024-12-14T17:20:13.578964.json\n", - "2024-12-14 17:20:13.945 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient.\n", - "2024-12-14 17:20:13.945 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet to determine row count.\n", - "2024-12-14 17:20:13.947 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:13.947 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:51.788 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanGoalEnteredTimestamp', 'cplgoalcategory', 'cplgoalstatus', 'cplgoalvalue', 'patienthealthsystemstayid']\n", + "2024-12-14 23:38:51.788 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanGoalEnteredTimestamp, cplgoalcategory, cplgoalstatus, cplgoalvalue, patienthealthsystemstayid\n", + "2024-12-14 23:38:51.788 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:51.788 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 23:38:51.794 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006389\n", + "2024-12-14 23:38:51.794 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/.[0-3633).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/.[0-3633).parquet_cache/locks/2024-12-14T23:38:51.787731.json\n", + "2024-12-14 23:38:51.801 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy.\n", + "2024-12-14 23:38:51.802 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet to determine row count.\n", + "2024-12-14 23:38:51.810 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:51.810 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:13.947 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['dateofbirth', 'ethnicity', 'gender', 'hospitaladmitsource', 'hospitaladmittimestamp', 'hospitaldischargelocation', 'hospitaldischargestatus', 'hospitaldischargetimestamp', 'hospitalid', 'hospitalnumbedscategory', 'hospitalregion', 'hospitalteachingstatus', 'patienthealthsystemstayid', 'patientunitstayid', 'uniquepid', 'unitadmissionheight', 'unitadmissionweight', 'unitadmitsource', 'unitadmittimestamp', 'unitdischargelocation', 'unitdischargestatus', 'unitdischargetimestamp', 'unitdischargeweight', 'unitstaytype', 'wardid']\n", - "2024-12-14 17:20:13.948 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, dateofbirth, ethnicity, gender, hospitaladmitsource, hospitaladmittimestamp, hospitaldischargelocation, hospitaldischargestatus, hospitaldischargetimestamp, hospitalid, hospitalnumbedscategory, hospitalregion, hospitalteachingstatus, patienthealthsystemstayid, patientunitstayid, uniquepid, unitadmissionheight, unitadmissionweight, unitadmitsource, unitadmittimestamp, unitdischargelocation, unitdischargestatus, unitdischargetimestamp, unitdischargeweight, unitstaytype, wardid\n", - "2024-12-14 17:20:13.948 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 2520 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet.\n", - "2024-12-14 17:20:13.948 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:20:13.948 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet row-chunk [0-2520) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet.\n", - "2024-12-14 17:20:13.951 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:13.950962. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:13.951 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet\n", - "2024-12-14 17:20:13.951 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:13.951 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/patient.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:51.810 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['allergyEnteredTimestamp', 'allergyname', 'allergytype', 'patienthealthsystemstayid']\n", + "2024-12-14 23:38:51.810 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, allergyEnteredTimestamp, allergyname, allergytype, patienthealthsystemstayid\n", + "2024-12-14 23:38:51.811 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 2475 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet.\n", + "2024-12-14 23:38:51.812 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:38:51.812 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet row-chunk [0-2475) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet.\n", + "2024-12-14 23:38:51.815 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:51.815505. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:51.815 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet\n", + "2024-12-14 23:38:51.815 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:51.816 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:13.951 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['dateofbirth', 'ethnicity', 'gender', 'hospitaladmitsource', 'hospitaladmittimestamp', 'hospitaldischargelocation', 'hospitaldischargestatus', 'hospitaldischargetimestamp', 'hospitalid', 'hospitalnumbedscategory', 'hospitalregion', 'hospitalteachingstatus', 'patienthealthsystemstayid', 'patientunitstayid', 'uniquepid', 'unitadmissionheight', 'unitadmissionweight', 'unitadmitsource', 'unitadmittimestamp', 'unitdischargelocation', 'unitdischargestatus', 'unitdischargetimestamp', 'unitdischargeweight', 'unitstaytype', 'wardid']\n", - "2024-12-14 17:20:13.951 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, dateofbirth, ethnicity, gender, hospitaladmitsource, hospitaladmittimestamp, hospitaldischargelocation, hospitaldischargestatus, hospitaldischargetimestamp, hospitalid, hospitalnumbedscategory, hospitalregion, hospitalteachingstatus, patienthealthsystemstayid, patientunitstayid, uniquepid, unitadmissionheight, unitadmissionweight, unitadmitsource, unitadmittimestamp, unitdischargelocation, unitdischargestatus, unitdischargetimestamp, unitdischargeweight, unitstaytype, wardid\n", - "2024-12-14 17:20:13.951 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:13.951 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet\n", - "2024-12-14 17:20:13.961 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010891\n", - "2024-12-14 17:20:13.961 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/.[0-2520).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/.[0-2520).parquet_cache/locks/2024-12-14T17:20:13.950962.json\n", - "2024-12-14 17:20:13.964 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab.\n", - "2024-12-14 17:20:13.964 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet to determine row count.\n", - "2024-12-14 17:20:13.966 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:13.967 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:51.816 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['allergyEnteredTimestamp', 'allergyname', 'allergytype', 'patienthealthsystemstayid']\n", + "2024-12-14 23:38:51.816 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, allergyEnteredTimestamp, allergyname, allergytype, patienthealthsystemstayid\n", + "2024-12-14 23:38:51.816 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:51.816 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet\n", + "2024-12-14 23:38:51.820 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.004640\n", + "2024-12-14 23:38:51.820 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/.[0-2475).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/.[0-2475).parquet_cache/locks/2024-12-14T23:38:51.815505.json\n", + "2024-12-14 23:38:51.823 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease.\n", + "2024-12-14 23:38:51.823 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet to determine row count.\n", + "2024-12-14 23:38:51.826 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:51.827 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:13.967 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['labResultDrawnTimestamp', 'labmeasurenameinterface', 'labmeasurenamesystem', 'labname', 'labresult', 'labresulttext', 'labtypeid', 'patienthealthsystemstayid']\n", - "2024-12-14 17:20:13.967 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, labResultDrawnTimestamp, labmeasurenameinterface, labmeasurenamesystem, labname, labresult, labresulttext, labtypeid, patienthealthsystemstayid\n", - "2024-12-14 17:20:13.969 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 434660 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet.\n", - "2024-12-14 17:20:13.969 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:20:13.969 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet row-chunk [0-434660) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet.\n", - "2024-12-14 17:20:13.972 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:13.972482. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:13.972 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet\n", - "2024-12-14 17:20:13.972 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:13.972 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:51.827 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanInfectDiseaseEnteredTimestamp', 'infectdiseaseassessment', 'infectdiseasesite', 'patienthealthsystemstayid', 'responsetotherapy', 'treatment']\n", + "2024-12-14 23:38:51.827 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanInfectDiseaseEnteredTimestamp, infectdiseaseassessment, infectdiseasesite, patienthealthsystemstayid, responsetotherapy, treatment\n", + "2024-12-14 23:38:51.828 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 112 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet.\n", + "2024-12-14 23:38:51.828 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:38:51.828 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet row-chunk [0-112) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet.\n", + "2024-12-14 23:38:51.831 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:51.831540. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:51.831 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet\n", + "2024-12-14 23:38:51.831 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:51.832 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanInfectiousDisease.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:13.972 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['labResultDrawnTimestamp', 'labmeasurenameinterface', 'labmeasurenamesystem', 'labname', 'labresult', 'labresulttext', 'labtypeid', 'patienthealthsystemstayid']\n", - "2024-12-14 17:20:13.973 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, labResultDrawnTimestamp, labmeasurenameinterface, labmeasurenamesystem, labname, labresult, labresulttext, labtypeid, patienthealthsystemstayid\n", - "2024-12-14 17:20:13.973 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:13.973 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet\n", - "2024-12-14 17:20:14.058 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.085536\n", - "2024-12-14 17:20:14.058 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/.[0-434660).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/.[0-434660).parquet_cache/locks/2024-12-14T17:20:13.972482.json\n", - "2024-12-14 17:20:14.061 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment.\n", - "2024-12-14 17:20:14.061 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet to determine row count.\n", - "2024-12-14 17:20:14.063 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:14.063 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:51.832 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanInfectDiseaseEnteredTimestamp', 'infectdiseaseassessment', 'infectdiseasesite', 'patienthealthsystemstayid', 'responsetotherapy', 'treatment']\n", + "2024-12-14 23:38:51.832 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanInfectDiseaseEnteredTimestamp, infectdiseaseassessment, infectdiseasesite, patienthealthsystemstayid, responsetotherapy, treatment\n", + "2024-12-14 23:38:51.832 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:51.832 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 23:38:51.835 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.003717\n", + "2024-12-14 23:38:51.835 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/.[0-112).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/.[0-112).parquet_cache/locks/2024-12-14T23:38:51.831540.json\n", + "2024-12-14 23:38:51.841 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment.\n", + "2024-12-14 23:38:51.841 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet to determine row count.\n", + "2024-12-14 23:38:51.846 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:51.846 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:14.063 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['patienthealthsystemstayid', 'treatmentEnteredTimestamp', 'treatmentid', 'treatmentstring']\n", - "2024-12-14 17:20:14.063 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, patienthealthsystemstayid, treatmentEnteredTimestamp, treatmentid, treatmentstring\n", - "2024-12-14 17:20:14.064 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 38290 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet.\n", - "2024-12-14 17:20:14.064 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:20:14.064 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet row-chunk [0-38290) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet.\n", - "2024-12-14 17:20:14.066 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.066637. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:14.066 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet\n", - "2024-12-14 17:20:14.067 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:14.067 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/treatment.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:51.846 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cellattribute', 'cellattributevalue', 'celllabel', 'nurseAssessEnteredTimestamp', 'nurseAssessPerformedTimestamp', 'nurseassessid', 'patienthealthsystemstayid']\n", + "2024-12-14 23:38:51.847 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cellattribute, cellattributevalue, celllabel, nurseAssessEnteredTimestamp, nurseAssessPerformedTimestamp, nurseassessid, patienthealthsystemstayid\n", + "2024-12-14 23:38:51.850 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 91589 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet.\n", + "2024-12-14 23:38:51.850 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:38:51.850 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet row-chunk [0-91589) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet.\n", + "2024-12-14 23:38:51.853 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:51.853273. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:51.853 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet\n", + "2024-12-14 23:38:51.853 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:51.853 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:14.067 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['patienthealthsystemstayid', 'treatmentEnteredTimestamp', 'treatmentid', 'treatmentstring']\n", - "2024-12-14 17:20:14.067 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, patienthealthsystemstayid, treatmentEnteredTimestamp, treatmentid, treatmentstring\n", - "2024-12-14 17:20:14.067 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:14.067 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet\n", - "2024-12-14 17:20:14.076 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.009360\n", - "2024-12-14 17:20:14.076 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/.[0-38290).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/.[0-38290).parquet_cache/locks/2024-12-14T17:20:14.066637.json\n", - "2024-12-14 17:20:14.078 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment.\n", - "2024-12-14 17:20:14.079 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet to determine row count.\n", - "2024-12-14 17:20:14.080 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:14.081 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:51.853 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cellattribute', 'cellattributevalue', 'celllabel', 'nurseAssessEnteredTimestamp', 'nurseAssessPerformedTimestamp', 'nurseassessid', 'patienthealthsystemstayid']\n", + "2024-12-14 23:38:51.854 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cellattribute, cellattributevalue, celllabel, nurseAssessEnteredTimestamp, nurseAssessPerformedTimestamp, nurseassessid, patienthealthsystemstayid\n", + "2024-12-14 23:38:51.854 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:51.854 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 23:38:51.922 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.069599\n", + "2024-12-14 23:38:51.923 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/.[0-91589).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/.[0-91589).parquet_cache/locks/2024-12-14T23:38:51.853273.json\n", + "2024-12-14 23:38:51.928 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare.\n", + "2024-12-14 23:38:51.928 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet to determine row count.\n", + "2024-12-14 23:38:51.931 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:51.931 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:14.081 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cellattribute', 'cellattributevalue', 'celllabel', 'nurseAssessEnteredTimestamp', 'nurseAssessPerformedTimestamp', 'nurseassessid', 'patienthealthsystemstayid']\n", - "2024-12-14 17:20:14.081 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cellattribute, cellattributevalue, celllabel, nurseAssessEnteredTimestamp, nurseAssessPerformedTimestamp, nurseassessid, patienthealthsystemstayid\n", - "2024-12-14 17:20:14.081 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 91589 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet.\n", - "2024-12-14 17:20:14.081 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:20:14.081 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet row-chunk [0-91589) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet.\n", - "2024-12-14 17:20:14.084 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.084291. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:14.084 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet\n", - "2024-12-14 17:20:14.084 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:14.084 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseAssessment.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:51.931 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['airwayposition', 'airwaysize', 'airwaytype', 'cpaplimit', 'cuffpressure', 'hiexhmvlimit', 'highironoxlimit', 'hipeakpreslimit', 'hirespratelimit', 'lowexhmvlimit', 'lowexhtvlimit', 'lowironoxlimit', 'lowpeakpreslimit', 'lowrespratelimit', 'meanairwaypreslimit', 'patienthealthsystemstayid', 'peeplimit', 'respCareStatusEnteredTimestamp', 'respcareid', 'setapneafio2', 'setapneaie', 'setapneainsptime', 'setapneainterval', 'setapneaippeephigh', 'setapneapeakflow', 'setapnearr', 'setapneatv', 'sighpreslimit', 'ventEndTimestamp', 'ventStartTimestamp']\n", + "2024-12-14 23:38:51.932 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, airwayposition, airwaysize, airwaytype, cpaplimit, cuffpressure, hiexhmvlimit, highironoxlimit, hipeakpreslimit, hirespratelimit, lowexhmvlimit, lowexhtvlimit, lowironoxlimit, lowpeakpreslimit, lowrespratelimit, meanairwaypreslimit, patienthealthsystemstayid, peeplimit, respCareStatusEnteredTimestamp, respcareid, setapneafio2, setapneaie, setapneainsptime, setapneainterval, setapneaippeephigh, setapneapeakflow, setapnearr, setapneatv, sighpreslimit, ventEndTimestamp, ventStartTimestamp\n", + "2024-12-14 23:38:51.933 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 5436 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet.\n", + "2024-12-14 23:38:51.933 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:38:51.933 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet row-chunk [0-5436) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet.\n", + "2024-12-14 23:38:51.937 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:51.936942. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:51.937 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet\n", + "2024-12-14 23:38:51.937 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:51.937 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:14.084 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cellattribute', 'cellattributevalue', 'celllabel', 'nurseAssessEnteredTimestamp', 'nurseAssessPerformedTimestamp', 'nurseassessid', 'patienthealthsystemstayid']\n", - "2024-12-14 17:20:14.084 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cellattribute, cellattributevalue, celllabel, nurseAssessEnteredTimestamp, nurseAssessPerformedTimestamp, nurseassessid, patienthealthsystemstayid\n", - "2024-12-14 17:20:14.084 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:14.085 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet\n", - "2024-12-14 17:20:14.106 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.021893\n", - "2024-12-14 17:20:14.106 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/.[0-91589).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/.[0-91589).parquet_cache/locks/2024-12-14T17:20:14.084291.json\n", - "2024-12-14 17:20:14.108 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam.\n", - "2024-12-14 17:20:14.108 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet to determine row count.\n", - "2024-12-14 17:20:14.110 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:14.110 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:51.937 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['airwayposition', 'airwaysize', 'airwaytype', 'cpaplimit', 'cuffpressure', 'hiexhmvlimit', 'highironoxlimit', 'hipeakpreslimit', 'hirespratelimit', 'lowexhmvlimit', 'lowexhtvlimit', 'lowironoxlimit', 'lowpeakpreslimit', 'lowrespratelimit', 'meanairwaypreslimit', 'patienthealthsystemstayid', 'peeplimit', 'respCareStatusEnteredTimestamp', 'respcareid', 'setapneafio2', 'setapneaie', 'setapneainsptime', 'setapneainterval', 'setapneaippeephigh', 'setapneapeakflow', 'setapnearr', 'setapneatv', 'sighpreslimit', 'ventEndTimestamp', 'ventStartTimestamp']\n", + "2024-12-14 23:38:51.938 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, airwayposition, airwaysize, airwaytype, cpaplimit, cuffpressure, hiexhmvlimit, highironoxlimit, hipeakpreslimit, hirespratelimit, lowexhmvlimit, lowexhtvlimit, lowironoxlimit, lowpeakpreslimit, lowrespratelimit, meanairwaypreslimit, patienthealthsystemstayid, peeplimit, respCareStatusEnteredTimestamp, respcareid, setapneafio2, setapneaie, setapneainsptime, setapneainterval, setapneaippeephigh, setapneapeakflow, setapnearr, setapneatv, sighpreslimit, ventEndTimestamp, ventStartTimestamp\n", + "2024-12-14 23:38:51.938 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:51.938 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 23:38:51.960 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.023307\n", + "2024-12-14 23:38:51.960 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/.[0-5436).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/.[0-5436).parquet_cache/locks/2024-12-14T23:38:51.936942.json\n", + "2024-12-14 23:38:51.968 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory.\n", + "2024-12-14 23:38:51.968 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet to determine row count.\n", + "2024-12-14 23:38:51.975 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:51.975 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:14.111 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['patienthealthsystemstayid', 'physicalExamEnteredTimestamp', 'physicalexamid', 'physicalexampath', 'physicalexamtext', 'physicalexamvalue']\n", - "2024-12-14 17:20:14.111 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, patienthealthsystemstayid, physicalExamEnteredTimestamp, physicalexamid, physicalexampath, physicalexamtext, physicalexamvalue\n", - "2024-12-14 17:20:14.111 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 84058 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet.\n", - "2024-12-14 17:20:14.111 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:20:14.111 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet row-chunk [0-84058) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet.\n", - "2024-12-14 17:20:14.114 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.113975. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:14.114 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet\n", - "2024-12-14 17:20:14.114 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:14.114 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:51.975 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['pastHistoryEnteredTimestamp', 'pastHistoryTakenTimestamp', 'pasthistoryid', 'pasthistorynotetype', 'pasthistorypath', 'pasthistoryvalue', 'pasthistoryvaluetext', 'patienthealthsystemstayid']\n", + "2024-12-14 23:38:51.976 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, pastHistoryEnteredTimestamp, pastHistoryTakenTimestamp, pasthistoryid, pasthistorynotetype, pasthistorypath, pasthistoryvalue, pasthistoryvaluetext, patienthealthsystemstayid\n", + "2024-12-14 23:38:51.977 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 12109 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet.\n", + "2024-12-14 23:38:51.978 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:38:51.978 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet row-chunk [0-12109) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet.\n", + "2024-12-14 23:38:51.985 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:51.985459. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:51.986 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet\n", + "2024-12-14 23:38:51.986 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:51.986 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:14.114 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['patienthealthsystemstayid', 'physicalExamEnteredTimestamp', 'physicalexamid', 'physicalexampath', 'physicalexamtext', 'physicalexamvalue']\n", - "2024-12-14 17:20:14.114 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, patienthealthsystemstayid, physicalExamEnteredTimestamp, physicalexamid, physicalexampath, physicalexamtext, physicalexamvalue\n", - "2024-12-14 17:20:14.114 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:14.114 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet\n", - "2024-12-14 17:20:14.139 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.025145\n", - "2024-12-14 17:20:14.139 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/.[0-84058).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/.[0-84058).parquet_cache/locks/2024-12-14T17:20:14.113975.json\n", - "2024-12-14 17:20:14.141 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx.\n", - "2024-12-14 17:20:14.141 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet to determine row count.\n", - "2024-12-14 17:20:14.143 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:14.144 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:51.986 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['pastHistoryEnteredTimestamp', 'pastHistoryTakenTimestamp', 'pasthistoryid', 'pasthistorynotetype', 'pasthistorypath', 'pasthistoryvalue', 'pasthistoryvaluetext', 'patienthealthsystemstayid']\n", + "2024-12-14 23:38:51.987 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, pastHistoryEnteredTimestamp, pastHistoryTakenTimestamp, pasthistoryid, pasthistorynotetype, pasthistorypath, pasthistoryvalue, pasthistoryvaluetext, patienthealthsystemstayid\n", + "2024-12-14 23:38:51.987 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:51.987 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet\n", + "2024-12-14 23:38:52.008 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.022533\n", + "2024-12-14 23:38:52.008 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/.[0-12109).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/.[0-12109).parquet_cache/locks/2024-12-14T23:38:51.985459.json\n", + "2024-12-14 23:38:52.013 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral.\n", + "2024-12-14 23:38:52.013 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet to determine row count.\n", + "2024-12-14 23:38:52.017 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:52.017 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:14.144 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['admissiondxid', 'admitDxEnteredTimestamp', 'admitdxname', 'patienthealthsystemstayid', 'patientunitstayid']\n", - "2024-12-14 17:20:14.144 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, admissiondxid, admitDxEnteredTimestamp, admitdxname, patienthealthsystemstayid, patientunitstayid\n", - "2024-12-14 17:20:14.144 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 7578 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet.\n", - "2024-12-14 17:20:14.144 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:20:14.144 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet row-chunk [0-7578) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet.\n", - "2024-12-14 17:20:14.146 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.146860. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:14.147 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet\n", - "2024-12-14 17:20:14.147 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:14.147 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/admissionDx.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:52.017 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanGeneralItemEnteredTimestamp', 'cplgroup', 'cplitemvalue', 'patienthealthsystemstayid']\n", + "2024-12-14 23:38:52.018 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanGeneralItemEnteredTimestamp, cplgroup, cplitemvalue, patienthealthsystemstayid\n", + "2024-12-14 23:38:52.019 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 33148 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet.\n", + "2024-12-14 23:38:52.019 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:38:52.019 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet row-chunk [0-33148) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet.\n", + "2024-12-14 23:38:52.023 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:52.023034. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:52.023 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet\n", + "2024-12-14 23:38:52.023 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:52.023 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:14.147 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['admissiondxid', 'admitDxEnteredTimestamp', 'admitdxname', 'patienthealthsystemstayid', 'patientunitstayid']\n", - "2024-12-14 17:20:14.147 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, admissiondxid, admitDxEnteredTimestamp, admitdxname, patienthealthsystemstayid, patientunitstayid\n", - "2024-12-14 17:20:14.147 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:14.147 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet\n", - "2024-12-14 17:20:14.150 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.003467\n", - "2024-12-14 17:20:14.150 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/.[0-7578).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/.[0-7578).parquet_cache/locks/2024-12-14T17:20:14.146860.json\n", - "2024-12-14 17:20:14.152 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis.\n", - "2024-12-14 17:20:14.152 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet to determine row count.\n", - "2024-12-14 17:20:14.155 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:14.155 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:52.023 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanGeneralItemEnteredTimestamp', 'cplgroup', 'cplitemvalue', 'patienthealthsystemstayid']\n", + "2024-12-14 23:38:52.024 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanGeneralItemEnteredTimestamp, cplgroup, cplitemvalue, patienthealthsystemstayid\n", + "2024-12-14 23:38:52.024 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:52.024 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 23:38:52.035 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.012887\n", + "2024-12-14 23:38:52.036 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/.[0-33148).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/.[0-33148).parquet_cache/locks/2024-12-14T23:38:52.023034.json\n", + "2024-12-14 23:38:52.039 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting.\n", + "2024-12-14 23:38:52.039 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet to determine row count.\n", + "2024-12-14 23:38:52.042 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:52.042 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:14.155 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['diagnosisEnteredTimestamp', 'diagnosispriority', 'diagnosisstring', 'icd9code', 'patienthealthsystemstayid']\n", - "2024-12-14 17:20:14.155 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, diagnosisEnteredTimestamp, diagnosispriority, diagnosisstring, icd9code, patienthealthsystemstayid\n", - "2024-12-14 17:20:14.155 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 24978 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet.\n", - "2024-12-14 17:20:14.155 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:20:14.155 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet row-chunk [0-24978) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet.\n", - "2024-12-14 17:20:14.158 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.158055. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:14.158 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet\n", - "2024-12-14 17:20:14.158 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:14.158 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/diagnosis.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:52.043 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['nursingChartEnteredTimestamp', 'nursingChartPerformedTimestamp', 'nursingchartcelltypecat', 'nursingchartcelltypevallabel', 'nursingchartcelltypevalname', 'nursingchartid', 'nursingchartvalue', 'patienthealthsystemstayid']\n", + "2024-12-14 23:38:52.043 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, nursingChartEnteredTimestamp, nursingChartPerformedTimestamp, nursingchartcelltypecat, nursingchartcelltypevallabel, nursingchartcelltypevalname, nursingchartid, nursingchartvalue, patienthealthsystemstayid\n", + "2024-12-14 23:38:52.078 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 1477163 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet.\n", + "2024-12-14 23:38:52.078 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:38:52.078 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet row-chunk [0-1477163) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet.\n", + "2024-12-14 23:38:52.086 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:52.086061. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:52.086 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet\n", + "2024-12-14 23:38:52.086 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:52.087 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCharting.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:14.158 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['diagnosisEnteredTimestamp', 'diagnosispriority', 'diagnosisstring', 'icd9code', 'patienthealthsystemstayid']\n", - "2024-12-14 17:20:14.158 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, diagnosisEnteredTimestamp, diagnosispriority, diagnosisstring, icd9code, patienthealthsystemstayid\n", - "2024-12-14 17:20:14.158 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:14.158 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet\n", - "2024-12-14 17:20:14.164 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006441\n", - "2024-12-14 17:20:14.164 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/.[0-24978).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/.[0-24978).parquet_cache/locks/2024-12-14T17:20:14.158055.json\n", - "2024-12-14 17:20:14.166 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting.\n", - "2024-12-14 17:20:14.167 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet to determine row count.\n", - "2024-12-14 17:20:14.169 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:14.169 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:52.087 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['nursingChartEnteredTimestamp', 'nursingChartPerformedTimestamp', 'nursingchartcelltypecat', 'nursingchartcelltypevallabel', 'nursingchartcelltypevalname', 'nursingchartid', 'nursingchartvalue', 'patienthealthsystemstayid']\n", + "2024-12-14 23:38:52.087 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, nursingChartEnteredTimestamp, nursingChartPerformedTimestamp, nursingchartcelltypecat, nursingchartcelltypevallabel, nursingchartcelltypevalname, nursingchartid, nursingchartvalue, patienthealthsystemstayid\n", + "2024-12-14 23:38:52.087 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:52.088 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 23:38:52.798 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.712396\n", + "2024-12-14 23:38:52.798 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/.[0-1477163).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/.[0-1477163).parquet_cache/locks/2024-12-14T23:38:52.086061.json\n", + "2024-12-14 23:38:52.805 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare.\n", + "2024-12-14 23:38:52.805 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet to determine row count.\n", + "2024-12-14 23:38:52.809 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:52.809 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:14.169 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['patienthealthsystemstayid', 'respChartEnteredTimestamp', 'respChartPerformedTimestamp', 'respchartid', 'respcharttypecat', 'respchartvalue', 'respchartvaluelabel']\n", - "2024-12-14 17:20:14.169 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, patienthealthsystemstayid, respChartEnteredTimestamp, respChartPerformedTimestamp, respchartid, respcharttypecat, respchartvalue, respchartvaluelabel\n", - "2024-12-14 17:20:14.170 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 176089 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet.\n", - "2024-12-14 17:20:14.170 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:20:14.170 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet row-chunk [0-176089) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet.\n", - "2024-12-14 17:20:14.172 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.172792. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:14.173 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet\n", - "2024-12-14 17:20:14.173 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:14.173 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCharting.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:52.809 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cellattribute', 'cellattributevalue', 'celllabel', 'nurseCareEnteredTimestamp', 'nurseCarePerformedTimestamp', 'nursecareid', 'patienthealthsystemstayid']\n", + "2024-12-14 23:38:52.810 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cellattribute, cellattributevalue, celllabel, nurseCareEnteredTimestamp, nurseCarePerformedTimestamp, nursecareid, patienthealthsystemstayid\n", + "2024-12-14 23:38:52.811 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 42080 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet.\n", + "2024-12-14 23:38:52.811 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:38:52.811 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet row-chunk [0-42080) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet.\n", + "2024-12-14 23:38:52.814 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:52.814747. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:52.815 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet\n", + "2024-12-14 23:38:52.815 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:52.815 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:14.173 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['patienthealthsystemstayid', 'respChartEnteredTimestamp', 'respChartPerformedTimestamp', 'respchartid', 'respcharttypecat', 'respchartvalue', 'respchartvaluelabel']\n", - "2024-12-14 17:20:14.173 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, patienthealthsystemstayid, respChartEnteredTimestamp, respChartPerformedTimestamp, respchartid, respcharttypecat, respchartvalue, respchartvaluelabel\n", - "2024-12-14 17:20:14.173 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:14.173 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet\n", - "2024-12-14 17:20:14.217 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.044580\n", - "2024-12-14 17:20:14.217 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/.[0-176089).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/.[0-176089).parquet_cache/locks/2024-12-14T17:20:14.172792.json\n", - "2024-12-14 17:20:14.220 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare.\n", - "2024-12-14 17:20:14.220 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet to determine row count.\n", - "2024-12-14 17:20:14.222 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:14.222 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:52.815 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cellattribute', 'cellattributevalue', 'celllabel', 'nurseCareEnteredTimestamp', 'nurseCarePerformedTimestamp', 'nursecareid', 'patienthealthsystemstayid']\n", + "2024-12-14 23:38:52.815 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cellattribute, cellattributevalue, celllabel, nurseCareEnteredTimestamp, nurseCarePerformedTimestamp, nursecareid, patienthealthsystemstayid\n", + "2024-12-14 23:38:52.815 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:52.816 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet\n", + "2024-12-14 23:38:52.846 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.032092\n", + "2024-12-14 23:38:52.847 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/.[0-42080).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/.[0-42080).parquet_cache/locks/2024-12-14T23:38:52.814747.json\n", + "2024-12-14 23:38:52.851 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam.\n", + "2024-12-14 23:38:52.851 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet to determine row count.\n", + "2024-12-14 23:38:52.856 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:52.856 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:14.222 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cellattribute', 'cellattributevalue', 'celllabel', 'nurseCareEnteredTimestamp', 'nurseCarePerformedTimestamp', 'nursecareid', 'patienthealthsystemstayid']\n", - "2024-12-14 17:20:14.222 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cellattribute, cellattributevalue, celllabel, nurseCareEnteredTimestamp, nurseCarePerformedTimestamp, nursecareid, patienthealthsystemstayid\n", - "2024-12-14 17:20:14.223 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 42080 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet.\n", - "2024-12-14 17:20:14.223 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:20:14.223 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet row-chunk [0-42080) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet.\n", - "2024-12-14 17:20:14.225 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.225610. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:14.225 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet\n", - "2024-12-14 17:20:14.225 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:14.226 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/nurseCare.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:52.856 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['patienthealthsystemstayid', 'physicalExamEnteredTimestamp', 'physicalexamid', 'physicalexampath', 'physicalexamtext', 'physicalexamvalue']\n", + "2024-12-14 23:38:52.857 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, patienthealthsystemstayid, physicalExamEnteredTimestamp, physicalexamid, physicalexampath, physicalexamtext, physicalexamvalue\n", + "2024-12-14 23:38:52.861 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 84058 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet.\n", + "2024-12-14 23:38:52.861 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:38:52.861 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet row-chunk [0-84058) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet.\n", + "2024-12-14 23:38:52.867 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:52.867521. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:52.867 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet\n", + "2024-12-14 23:38:52.868 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:52.868 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/physicalExam.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:14.226 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cellattribute', 'cellattributevalue', 'celllabel', 'nurseCareEnteredTimestamp', 'nurseCarePerformedTimestamp', 'nursecareid', 'patienthealthsystemstayid']\n", - "2024-12-14 17:20:14.226 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cellattribute, cellattributevalue, celllabel, nurseCareEnteredTimestamp, nurseCarePerformedTimestamp, nursecareid, patienthealthsystemstayid\n", - "2024-12-14 17:20:14.226 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:14.226 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet\n", - "2024-12-14 17:20:14.237 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.012115\n", - "2024-12-14 17:20:14.237 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/.[0-42080).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/.[0-42080).parquet_cache/locks/2024-12-14T17:20:14.225610.json\n", - "2024-12-14 17:20:14.240 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral.\n", - "2024-12-14 17:20:14.240 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet to determine row count.\n", - "2024-12-14 17:20:14.242 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:14.242 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:52.868 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['patienthealthsystemstayid', 'physicalExamEnteredTimestamp', 'physicalexamid', 'physicalexampath', 'physicalexamtext', 'physicalexamvalue']\n", + "2024-12-14 23:38:52.868 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, patienthealthsystemstayid, physicalExamEnteredTimestamp, physicalexamid, physicalexampath, physicalexamtext, physicalexamvalue\n", + "2024-12-14 23:38:52.868 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:52.868 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet\n", + "2024-12-14 23:38:52.915 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.047932\n", + "2024-12-14 23:38:52.915 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/.[0-84058).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/.[0-84058).parquet_cache/locks/2024-12-14T23:38:52.867521.json\n", + "2024-12-14 23:38:52.919 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication.\n", + "2024-12-14 23:38:52.919 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet to determine row count.\n", + "2024-12-14 23:38:52.922 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:52.923 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:14.242 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanGeneralItemEnteredTimestamp', 'cplgroup', 'cplitemvalue', 'patienthealthsystemstayid']\n", - "2024-12-14 17:20:14.242 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanGeneralItemEnteredTimestamp, cplgroup, cplitemvalue, patienthealthsystemstayid\n", - "2024-12-14 17:20:14.243 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 33148 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet.\n", - "2024-12-14 17:20:14.243 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:20:14.243 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet row-chunk [0-33148) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet.\n", - "2024-12-14 17:20:14.246 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.245975. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:14.246 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet\n", - "2024-12-14 17:20:14.246 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:14.246 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanGeneral.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:52.923 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['dosage', 'drugivadmixture', 'drugname', 'drugordertimestamp', 'drugstarttimestamp', 'drugstoptimestamp', 'frequency', 'gtc', 'loadingdose', 'medicationid', 'patienthealthsystemstayid', 'prn', 'routeadmin']\n", + "2024-12-14 23:38:52.923 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, dosage, drugivadmixture, drugname, drugordertimestamp, drugstarttimestamp, drugstoptimestamp, frequency, gtc, loadingdose, medicationid, patienthealthsystemstayid, prn, routeadmin\n", + "2024-12-14 23:38:52.927 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 75604 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet.\n", + "2024-12-14 23:38:52.927 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:38:52.927 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet row-chunk [0-75604) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet.\n", + "2024-12-14 23:38:52.931 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:52.931042. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:52.931 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet\n", + "2024-12-14 23:38:52.931 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:52.931 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/medication.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:14.246 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanGeneralItemEnteredTimestamp', 'cplgroup', 'cplitemvalue', 'patienthealthsystemstayid']\n", - "2024-12-14 17:20:14.246 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanGeneralItemEnteredTimestamp, cplgroup, cplitemvalue, patienthealthsystemstayid\n", - "2024-12-14 17:20:14.246 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:14.246 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet\n", - "2024-12-14 17:20:14.252 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006840\n", - "2024-12-14 17:20:14.252 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/.[0-33148).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/.[0-33148).parquet_cache/locks/2024-12-14T17:20:14.245975.json\n", - "2024-12-14 17:20:14.255 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory.\n", - "2024-12-14 17:20:14.255 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet to determine row count.\n", - "2024-12-14 17:20:14.257 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:14.257 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:52.931 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['dosage', 'drugivadmixture', 'drugname', 'drugordertimestamp', 'drugstarttimestamp', 'drugstoptimestamp', 'frequency', 'gtc', 'loadingdose', 'medicationid', 'patienthealthsystemstayid', 'prn', 'routeadmin']\n", + "2024-12-14 23:38:52.932 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, dosage, drugivadmixture, drugname, drugordertimestamp, drugstarttimestamp, drugstoptimestamp, frequency, gtc, loadingdose, medicationid, patienthealthsystemstayid, prn, routeadmin\n", + "2024-12-14 23:38:52.932 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:52.932 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet\n", + "2024-12-14 23:38:53.004 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.072941\n", + "2024-12-14 23:38:53.004 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/.[0-75604).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/.[0-75604).parquet_cache/locks/2024-12-14T23:38:52.931042.json\n", + "2024-12-14 23:38:53.007 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL.\n", + "2024-12-14 23:38:53.008 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet to determine row count.\n", + "2024-12-14 23:38:53.011 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:53.011 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:14.257 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['pastHistoryEnteredTimestamp', 'pastHistoryTakenTimestamp', 'pasthistoryid', 'pasthistorynotetype', 'pasthistorypath', 'pasthistoryvalue', 'pasthistoryvaluetext', 'patienthealthsystemstayid']\n", - "2024-12-14 17:20:14.258 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, pastHistoryEnteredTimestamp, pastHistoryTakenTimestamp, pasthistoryid, pasthistorynotetype, pasthistorypath, pasthistoryvalue, pasthistoryvaluetext, patienthealthsystemstayid\n", - "2024-12-14 17:20:14.258 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 12109 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet.\n", - "2024-12-14 17:20:14.258 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:20:14.258 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet row-chunk [0-12109) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet.\n", - "2024-12-14 17:20:14.260 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.260683. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:14.260 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet\n", - "2024-12-14 17:20:14.261 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:14.261 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/pastHistory.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:53.011 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanEolDiscussionOccurredTimestamp', 'patienthealthsystemstayid']\n", + "2024-12-14 23:38:53.011 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanEolDiscussionOccurredTimestamp, patienthealthsystemstayid\n", + "2024-12-14 23:38:53.012 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 15 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet.\n", + "2024-12-14 23:38:53.012 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:38:53.012 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet row-chunk [0-15) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet.\n", + "2024-12-14 23:38:53.016 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:53.016327. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:53.016 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet\n", + "2024-12-14 23:38:53.016 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:53.016 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/carePlanEOL.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:14.261 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['pastHistoryEnteredTimestamp', 'pastHistoryTakenTimestamp', 'pasthistoryid', 'pasthistorynotetype', 'pasthistorypath', 'pasthistoryvalue', 'pasthistoryvaluetext', 'patienthealthsystemstayid']\n", - "2024-12-14 17:20:14.261 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, pastHistoryEnteredTimestamp, pastHistoryTakenTimestamp, pasthistoryid, pasthistorynotetype, pasthistorypath, pasthistoryvalue, pasthistoryvaluetext, patienthealthsystemstayid\n", - "2024-12-14 17:20:14.261 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:14.261 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet\n", - "2024-12-14 17:20:14.268 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007372\n", - "2024-12-14 17:20:14.268 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/.[0-12109).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/.[0-12109).parquet_cache/locks/2024-12-14T17:20:14.260683.json\n", - "2024-12-14 17:20:14.270 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic.\n", - "2024-12-14 17:20:14.270 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet to determine row count.\n", - "2024-12-14 17:20:14.273 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:14.273 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:53.017 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['carePlanEolDiscussionOccurredTimestamp', 'patienthealthsystemstayid']\n", + "2024-12-14 23:38:53.017 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, carePlanEolDiscussionOccurredTimestamp, patienthealthsystemstayid\n", + "2024-12-14 23:38:53.017 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:53.017 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet\n", + "2024-12-14 23:38:53.019 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.002913\n", + "2024-12-14 23:38:53.019 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/.[0-15).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/.[0-15).parquet_cache/locks/2024-12-14T23:38:53.016327.json\n", + "2024-12-14 23:38:53.024 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic.\n", + "2024-12-14 23:38:53.024 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet to determine row count.\n", + "2024-12-14 23:38:53.029 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:53.029 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:14.273 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cardiacinput', 'cardiacoutput', 'noninvasivediastolic', 'noninvasivemean', 'noninvasivesystolic', 'observationEnteredTimestamp', 'paop', 'patienthealthsystemstayid', 'pvr', 'pvri', 'svr', 'svri', 'vitalaperiodicid']\n", - "2024-12-14 17:20:14.273 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cardiacinput, cardiacoutput, noninvasivediastolic, noninvasivemean, noninvasivesystolic, observationEnteredTimestamp, paop, patienthealthsystemstayid, pvr, pvri, svr, svri, vitalaperiodicid\n", - "2024-12-14 17:20:14.274 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 274088 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet.\n", - "2024-12-14 17:20:14.274 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:20:14.274 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet row-chunk [0-274088) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet.\n", - "2024-12-14 17:20:14.276 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.276629. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:14.276 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet\n", - "2024-12-14 17:20:14.276 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:14.277 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:53.029 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cvp', 'etco2', 'heartrate', 'icp', 'observationEnteredTimestamp', 'padiastolic', 'pamean', 'pasystolic', 'patienthealthsystemstayid', 'respiration', 'sao2', 'st1', 'st2', 'st3', 'systemicdiastolic', 'systemicmean', 'systemicsystolic', 'temperature', 'vitalperiodicid']\n", + "2024-12-14 23:38:53.030 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cvp, etco2, heartrate, icp, observationEnteredTimestamp, padiastolic, pamean, pasystolic, patienthealthsystemstayid, respiration, sao2, st1, st2, st3, systemicdiastolic, systemicmean, systemicsystolic, temperature, vitalperiodicid\n", + "2024-12-14 23:38:53.052 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 1634960 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet.\n", + "2024-12-14 23:38:53.052 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:38:53.052 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet row-chunk [0-1634960) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet.\n", + "2024-12-14 23:38:53.057 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:53.057447. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:53.057 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet\n", + "2024-12-14 23:38:53.058 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:53.058 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalPeriodic.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:14.277 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cardiacinput', 'cardiacoutput', 'noninvasivediastolic', 'noninvasivemean', 'noninvasivesystolic', 'observationEnteredTimestamp', 'paop', 'patienthealthsystemstayid', 'pvr', 'pvri', 'svr', 'svri', 'vitalaperiodicid']\n", - "2024-12-14 17:20:14.277 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cardiacinput, cardiacoutput, noninvasivediastolic, noninvasivemean, noninvasivesystolic, observationEnteredTimestamp, paop, patienthealthsystemstayid, pvr, pvri, svr, svri, vitalaperiodicid\n", - "2024-12-14 17:20:14.277 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:14.277 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet\n", - "2024-12-14 17:20:14.323 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.046469\n", - "2024-12-14 17:20:14.323 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/.[0-274088).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/.[0-274088).parquet_cache/locks/2024-12-14T17:20:14.276629.json\n", - "2024-12-14 17:20:14.326 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy.\n", - "2024-12-14 17:20:14.326 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet to determine row count.\n", - "2024-12-14 17:20:14.328 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:14.328 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:53.058 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cvp', 'etco2', 'heartrate', 'icp', 'observationEnteredTimestamp', 'padiastolic', 'pamean', 'pasystolic', 'patienthealthsystemstayid', 'respiration', 'sao2', 'st1', 'st2', 'st3', 'systemicdiastolic', 'systemicmean', 'systemicsystolic', 'temperature', 'vitalperiodicid']\n", + "2024-12-14 23:38:53.058 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cvp, etco2, heartrate, icp, observationEnteredTimestamp, padiastolic, pamean, pasystolic, patienthealthsystemstayid, respiration, sao2, st1, st2, st3, systemicdiastolic, systemicmean, systemicsystolic, temperature, vitalperiodicid\n", + "2024-12-14 23:38:53.058 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:53.059 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 23:38:53.741 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.683798\n", + "2024-12-14 23:38:53.741 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/.[0-1634960).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/.[0-1634960).parquet_cache/locks/2024-12-14T23:38:53.057447.json\n", + "2024-12-14 23:38:53.745 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic.\n", + "2024-12-14 23:38:53.745 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet to determine row count.\n", + "2024-12-14 23:38:53.748 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:53.749 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:14.328 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['allergyEnteredTimestamp', 'allergyname', 'allergytype', 'patienthealthsystemstayid']\n", - "2024-12-14 17:20:14.328 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, allergyEnteredTimestamp, allergyname, allergytype, patienthealthsystemstayid\n", - "2024-12-14 17:20:14.329 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 2475 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet.\n", - "2024-12-14 17:20:14.329 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:20:14.329 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet row-chunk [0-2475) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet.\n", - "2024-12-14 17:20:14.331 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.331616. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:14.331 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet\n", - "2024-12-14 17:20:14.331 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:14.332 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/allergy.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:53.749 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cardiacinput', 'cardiacoutput', 'noninvasivediastolic', 'noninvasivemean', 'noninvasivesystolic', 'observationEnteredTimestamp', 'paop', 'patienthealthsystemstayid', 'pvr', 'pvri', 'svr', 'svri', 'vitalaperiodicid']\n", + "2024-12-14 23:38:53.749 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cardiacinput, cardiacoutput, noninvasivediastolic, noninvasivemean, noninvasivesystolic, observationEnteredTimestamp, paop, patienthealthsystemstayid, pvr, pvri, svr, svri, vitalaperiodicid\n", + "2024-12-14 23:38:53.751 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 274088 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet.\n", + "2024-12-14 23:38:53.752 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:38:53.752 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet row-chunk [0-274088) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet.\n", + "2024-12-14 23:38:53.755 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:53.755524. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:53.755 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet\n", + "2024-12-14 23:38:53.756 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:53.756 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/vitalAperiodic.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:14.332 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['allergyEnteredTimestamp', 'allergyname', 'allergytype', 'patienthealthsystemstayid']\n", - "2024-12-14 17:20:14.332 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, allergyEnteredTimestamp, allergyname, allergytype, patienthealthsystemstayid\n", - "2024-12-14 17:20:14.332 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:14.332 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet\n", - "2024-12-14 17:20:14.334 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.002488\n", - "2024-12-14 17:20:14.334 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/.[0-2475).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/.[0-2475).parquet_cache/locks/2024-12-14T17:20:14.331616.json\n", - "2024-12-14 17:20:14.336 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare.\n", - "2024-12-14 17:20:14.336 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet to determine row count.\n", - "2024-12-14 17:20:14.338 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:14.338 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:53.756 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['cardiacinput', 'cardiacoutput', 'noninvasivediastolic', 'noninvasivemean', 'noninvasivesystolic', 'observationEnteredTimestamp', 'paop', 'patienthealthsystemstayid', 'pvr', 'pvri', 'svr', 'svri', 'vitalaperiodicid']\n", + "2024-12-14 23:38:53.756 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, cardiacinput, cardiacoutput, noninvasivediastolic, noninvasivemean, noninvasivesystolic, observationEnteredTimestamp, paop, patienthealthsystemstayid, pvr, pvri, svr, svri, vitalaperiodicid\n", + "2024-12-14 23:38:53.756 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:53.756 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 23:38:53.865 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.110275\n", + "2024-12-14 23:38:53.866 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/.[0-274088).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/.[0-274088).parquet_cache/locks/2024-12-14T23:38:53.755524.json\n", + "2024-12-14 23:38:53.869 | INFO | MEDS_transforms.extract.shard_events:main:383 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab.\n", + "2024-12-14 23:38:53.870 | INFO | MEDS_transforms.extract.shard_events:main:385 - Performing preliminary read of /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet to determine row count.\n", + "2024-12-14 23:38:53.873 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:53.873 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:14.338 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['airwayposition', 'airwaysize', 'airwaytype', 'cpaplimit', 'cuffpressure', 'hiexhmvlimit', 'highironoxlimit', 'hipeakpreslimit', 'hirespratelimit', 'lowexhmvlimit', 'lowexhtvlimit', 'lowironoxlimit', 'lowpeakpreslimit', 'lowrespratelimit', 'meanairwaypreslimit', 'patienthealthsystemstayid', 'peeplimit', 'respCareStatusEnteredTimestamp', 'respcareid', 'setapneafio2', 'setapneaie', 'setapneainsptime', 'setapneainterval', 'setapneaippeephigh', 'setapneapeakflow', 'setapnearr', 'setapneatv', 'sighpreslimit', 'ventEndTimestamp', 'ventStartTimestamp']\n", - "2024-12-14 17:20:14.339 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, airwayposition, airwaysize, airwaytype, cpaplimit, cuffpressure, hiexhmvlimit, highironoxlimit, hipeakpreslimit, hirespratelimit, lowexhmvlimit, lowexhtvlimit, lowironoxlimit, lowpeakpreslimit, lowrespratelimit, meanairwaypreslimit, patienthealthsystemstayid, peeplimit, respCareStatusEnteredTimestamp, respcareid, setapneafio2, setapneaie, setapneainsptime, setapneainterval, setapneaippeephigh, setapneapeakflow, setapnearr, setapneatv, sighpreslimit, ventEndTimestamp, ventStartTimestamp\n", - "2024-12-14 17:20:14.339 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 5436 rows from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet.\n", - "2024-12-14 17:20:14.339 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet into 1 row-chunks of size 200000000.\n", - "2024-12-14 17:20:14.339 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet row-chunk [0-5436) to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet.\n", - "2024-12-14 17:20:14.341 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:14.341778. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:14.342 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet\n", - "2024-12-14 17:20:14.342 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", - "2024-12-14 17:20:14.342 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/respiratoryCare.parquet as Parquet with kwargs:\n", + "2024-12-14 23:38:53.873 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['labResultDrawnTimestamp', 'labmeasurenameinterface', 'labmeasurenamesystem', 'labname', 'labresult', 'labresulttext', 'labtypeid', 'patienthealthsystemstayid']\n", + "2024-12-14 23:38:53.873 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, labResultDrawnTimestamp, labmeasurenameinterface, labmeasurenamesystem, labname, labresult, labresulttext, labtypeid, patienthealthsystemstayid\n", + "2024-12-14 23:38:53.889 | INFO | MEDS_transforms.extract.shard_events:main:405 - Read 434660 rows from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet.\n", + "2024-12-14 23:38:53.889 | INFO | MEDS_transforms.extract.shard_events:main:409 - Splitting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet into 1 row-chunks of size 200000000.\n", + "2024-12-14 23:38:53.890 | INFO | MEDS_transforms.extract.shard_events:main:416 - Writing file 1/1: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet row-chunk [0-434660) to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet.\n", + "2024-12-14 23:38:53.894 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:53.894665. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:53.895 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet\n", + "2024-12-14 23:38:53.895 | INFO | MEDS_transforms.extract.shard_events:scan_with_row_idx:148 - Ignoring infer_schema_length=999999999 for Parquet files.\n", + "2024-12-14 23:38:53.895 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:150 - Reading /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/lab.parquet as Parquet with kwargs:\n", ".\n", - "2024-12-14 17:20:14.342 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['airwayposition', 'airwaysize', 'airwaytype', 'cpaplimit', 'cuffpressure', 'hiexhmvlimit', 'highironoxlimit', 'hipeakpreslimit', 'hirespratelimit', 'lowexhmvlimit', 'lowexhtvlimit', 'lowironoxlimit', 'lowpeakpreslimit', 'lowrespratelimit', 'meanairwaypreslimit', 'patienthealthsystemstayid', 'peeplimit', 'respCareStatusEnteredTimestamp', 'respcareid', 'setapneafio2', 'setapneaie', 'setapneainsptime', 'setapneainterval', 'setapneaippeephigh', 'setapneapeakflow', 'setapnearr', 'setapneatv', 'sighpreslimit', 'ventEndTimestamp', 'ventStartTimestamp']\n", - "2024-12-14 17:20:14.342 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, airwayposition, airwaysize, airwaytype, cpaplimit, cuffpressure, hiexhmvlimit, highironoxlimit, hipeakpreslimit, hirespratelimit, lowexhmvlimit, lowexhtvlimit, lowironoxlimit, lowpeakpreslimit, lowrespratelimit, meanairwaypreslimit, patienthealthsystemstayid, peeplimit, respCareStatusEnteredTimestamp, respcareid, setapneafio2, setapneaie, setapneainsptime, setapneainterval, setapneaippeephigh, setapneapeakflow, setapnearr, setapneatv, sighpreslimit, ventEndTimestamp, ventStartTimestamp\n", - "2024-12-14 17:20:14.342 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:14.342 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet\n", - "2024-12-14 17:20:14.357 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.016024\n", - "2024-12-14 17:20:14.357 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/.[0-5436).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/.[0-5436).parquet_cache/locks/2024-12-14T17:20:14.341778.json\n", - "2024-12-14 17:20:14.358 | INFO | MEDS_transforms.extract.shard_events:main:430 - Sub-sharding completed in 0:00:01.286791\n", + "2024-12-14 23:38:53.895 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:157 - Selecting columns: ['labResultDrawnTimestamp', 'labmeasurenameinterface', 'labmeasurenamesystem', 'labname', 'labresult', 'labresulttext', 'labtypeid', 'patienthealthsystemstayid']\n", + "2024-12-14 23:38:53.895 | DEBUG | MEDS_transforms.extract.shard_events:scan_with_row_idx:162 - Returning df with columns: __row_idx__, labResultDrawnTimestamp, labmeasurenameinterface, labmeasurenamesystem, labname, labresult, labresulttext, labtypeid, patienthealthsystemstayid\n", + "2024-12-14 23:38:53.895 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:53.896 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet\n", + "2024-12-14 23:38:54.059 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.164809\n", + "2024-12-14 23:38:54.059 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/.[0-434660).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/.[0-434660).parquet_cache/locks/2024-12-14T23:38:53.894665.json\n", + "2024-12-14 23:38:54.059 | INFO | MEDS_transforms.extract.shard_events:main:430 - Sub-sharding completed in 0:00:02.568015\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:14.432\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: split_and_shard_subjects\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:14.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-split_and_shard_subjects --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=split_and_shard_subjects\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:15.172\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 23:38:54.176\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: split_and_shard_subjects\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:54.191\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-split_and_shard_subjects --config-dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=split_and_shard_subjects\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:55.748\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:15.172\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 17:20:14.978 | INFO | MEDS_transforms.utils:stage_init:73 - Running split_and_shard_subjects with the following configuration:\n", + "\u001b[32m2024-12-14 23:38:55.748\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 23:38:54.895 | INFO | MEDS_transforms.utils:stage_init:73 - Running split_and_shard_subjects with the following configuration:\n", "input_dir: ${oc.env:EICU_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:EICU_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -4705,26 +4723,26 @@ "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", "\n", - "2024-12-14 17:20:14.991 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "2024-12-14 23:38:54.914 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", "is_metadata: true\n", - "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo//meds//metadata\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo//meds//metadata\n", "n_subjects_per_shard: 10000\n", "external_splits_json_fp: null\n", "split_fracs:\n", " train: 0.5\n", " tuning: 0.25\n", " held_out: 0.25\n", - "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events\n", - "metadata_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/metadata\n", - "reducer_output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects\n", + "data_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events\n", + "metadata_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/metadata\n", + "reducer_output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects\n", "train_only: true\n", "\n", "Paths: (checkbox indicates if it exists)\n", - " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events\n", - " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata\n", - " - metadata_input_dir: ❌ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/metadata\n", - "2024-12-14 17:20:14.992 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:215 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs/event_configs.yaml (needed for subject ID columns)\n", - "2024-12-14 17:20:15.041 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:219 - Event conversion config:\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events\n", + " - output_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata\n", + " - metadata_input_dir: ❌ /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/pre_meds/metadata\n", + "2024-12-14 23:38:54.914 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:215 - Reading event conversion config from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs/event_configs.yaml (needed for subject ID columns)\n", + "2024-12-14 23:38:55.000 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:219 - Event conversion config:\n", "subject_id_col: patienthealthsystemstayid\n", "patient:\n", " dob:\n", @@ -5272,61 +5290,61 @@ " vital_id: vitalperiodicid\n", " numeric_value: icp\n", "\n", - "2024-12-14 17:20:15.041 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from patient files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet\n", - "2024-12-14 17:20:15.042 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from admissionDx files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet\n", - "2024-12-14 17:20:15.043 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from allergy files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet\n", - "2024-12-14 17:20:15.043 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from carePlanGeneral files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet\n", - "2024-12-14 17:20:15.043 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from carePlanEOL files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet\n", - "2024-12-14 17:20:15.043 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from carePlanGoal files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet\n", - "2024-12-14 17:20:15.044 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from carePlanInfectiousDisease files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet\n", - "2024-12-14 17:20:15.044 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from diagnosis files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet\n", - "2024-12-14 17:20:15.044 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from lab files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet\n", - "2024-12-14 17:20:15.044 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from medication files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet\n", - "2024-12-14 17:20:15.045 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from nurseAssessment files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet\n", - "2024-12-14 17:20:15.045 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from nurseCare files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet\n", - "2024-12-14 17:20:15.045 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from nurseCharting files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet\n", - "2024-12-14 17:20:15.045 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from pastHistory files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet\n", - "2024-12-14 17:20:15.045 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from physicalExam files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet\n", - "2024-12-14 17:20:15.046 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from respiratoryCare files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet\n", - "2024-12-14 17:20:15.046 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from respiratoryCharting files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet\n", - "2024-12-14 17:20:15.046 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from treatment files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet\n", - "2024-12-14 17:20:15.046 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from vitalAperiodic files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet\n", - "2024-12-14 17:20:15.047 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from vitalPeriodic files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet\n", - "2024-12-14 17:20:15.047 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:239 - Joining all subject IDs from 20 dataframes\n", - "2024-12-14 17:20:15.108 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:247 - Found 2174 unique subject IDs of type int64\n", - "2024-12-14 17:20:15.112 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:262 - Sharding and splitting subjects\n", - "2024-12-14 17:20:15.131 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split train/0 has 1087 subjects.\n", - "2024-12-14 17:20:15.131 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split tuning/0 has 544 subjects.\n", - "2024-12-14 17:20:15.131 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split held_out/0 has 543 subjects.\n", - "2024-12-14 17:20:15.132 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:273 - Writing sharded subjects to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata/.shards.json\n", - "2024-12-14 17:20:15.132 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:276 - Done writing sharded subjects\n", + "2024-12-14 23:38:55.001 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from patient files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet\n", + "2024-12-14 23:38:55.003 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from admissionDx files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet\n", + "2024-12-14 23:38:55.004 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from allergy files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet\n", + "2024-12-14 23:38:55.005 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from carePlanGeneral files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 23:38:55.006 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from carePlanEOL files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet\n", + "2024-12-14 23:38:55.007 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from carePlanGoal files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 23:38:55.007 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from carePlanInfectiousDisease files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 23:38:55.008 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from diagnosis files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet\n", + "2024-12-14 23:38:55.008 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from lab files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet\n", + "2024-12-14 23:38:55.009 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from medication files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet\n", + "2024-12-14 23:38:55.009 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from nurseAssessment files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 23:38:55.010 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from nurseCare files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet\n", + "2024-12-14 23:38:55.010 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from nurseCharting files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 23:38:55.011 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from pastHistory files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet\n", + "2024-12-14 23:38:55.012 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from physicalExam files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet\n", + "2024-12-14 23:38:55.012 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from respiratoryCare files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 23:38:55.013 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from respiratoryCharting files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 23:38:55.014 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from treatment files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet\n", + "2024-12-14 23:38:55.014 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from vitalAperiodic files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 23:38:55.015 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:230 - Reading subject IDs from vitalPeriodic files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 23:38:55.015 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:239 - Joining all subject IDs from 20 dataframes\n", + "2024-12-14 23:38:55.516 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:247 - Found 2174 unique subject IDs of type int64\n", + "2024-12-14 23:38:55.522 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:262 - Sharding and splitting subjects\n", + "2024-12-14 23:38:55.532 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split train/0 has 1087 subjects.\n", + "2024-12-14 23:38:55.532 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split tuning/0 has 544 subjects.\n", + "2024-12-14 23:38:55.532 | INFO | MEDS_transforms.extract.split_and_shard_subjects:shard_subjects:164 - Split held_out/0 has 543 subjects.\n", + "2024-12-14 23:38:55.533 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:273 - Writing sharded subjects to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata/.shards.json\n", + "2024-12-14 23:38:55.534 | INFO | MEDS_transforms.extract.split_and_shard_subjects:main:276 - Done writing sharded subjects\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:15.176\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: convert_to_sharded_events\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:15.182\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-convert_to_sharded_events --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=convert_to_sharded_events\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:24.817\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 23:38:55.750\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: convert_to_sharded_events\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:55.763\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-convert_to_sharded_events --config-dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=convert_to_sharded_events\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:13.480\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:24.820\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 17:20:15.716 | INFO | MEDS_transforms.utils:stage_init:73 - Running convert_to_sharded_events with the following configuration:\n", + "\u001b[32m2024-12-14 23:39:13.482\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 23:38:56.475 | INFO | MEDS_transforms.utils:stage_init:73 - Running convert_to_sharded_events with the following configuration:\n", "input_dir: ${oc.env:EICU_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:EICU_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -5394,21 +5412,21 @@ "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", "\n", - "2024-12-14 17:20:15.729 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "2024-12-14 23:38:56.495 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", "do_dedup_text_and_numeric: true\n", "is_metadata: false\n", - "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events\n", - "metadata_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects\n", - "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events\n", + "data_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events\n", + "metadata_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events\n", "reducer_output_dir: null\n", "\n", "Paths: (checkbox indicates if it exists)\n", - " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events\n", - " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events\n", - " - metadata_input_dir: ❌ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects\n", - "2024-12-14 17:20:15.730 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:769 - Starting event conversion.\n", - "2024-12-14 17:20:15.730 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:771 - Reading event conversion config from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs/event_configs.yaml\n", - "2024-12-14 17:20:15.779 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:773 - Event conversion config:\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events\n", + " - output_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events\n", + " - metadata_input_dir: ❌ /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects\n", + "2024-12-14 23:38:56.496 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:769 - Starting event conversion.\n", + "2024-12-14 23:38:56.496 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:771 - Reading event conversion config from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs/event_configs.yaml\n", + "2024-12-14 23:38:56.572 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:773 - Event conversion config:\n", "subject_id_col: patienthealthsystemstayid\n", "patient:\n", " dob:\n", @@ -5956,1159 +5974,1159 @@ " vital_id: vitalperiodicid\n", " numeric_value: icp\n", "\n", - "2024-12-14 17:20:15.793 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCare/[0-42080).parquet\n", - "2024-12-14 17:20:15.794 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:15.794543. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:15.795 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet\n", - "2024-12-14 17:20:15.796 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:802: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + "2024-12-14 23:38:56.592 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 23:38:56.593 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:56.593349. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:56.595 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 23:38:56.595 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:802: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", " typed_subjects = pl.Series(subjects, dtype=df.schema[input_subject_id_column])\n", - "2024-12-14 17:20:15.802 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseCare/[0-42080).parquet\n", - "2024-12-14 17:20:15.806 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_care_performed\n", - "2024-12-14 17:20:15.807 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseCarePerformedTimestamp should already be in Date/time format\n", - "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:513: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + "2024-12-14 23:38:56.597 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 23:38:56.603 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_charting_performed\n", + "2024-12-14 23:38:56.604 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respChartPerformedTimestamp should already be in Date/time format\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:513: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", " if v not in df.schema:\n", - "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:517: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:517: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", " is_numeric = df.schema[v].is_numeric()\n", - "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:518: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:518: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", " is_str = df.schema[v] == pl.Utf8\n", - "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:519: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:519: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", " is_cat = isinstance(df.schema[v], pl.Categorical)\n", - "2024-12-14 17:20:15.809 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseCarePerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:15.809 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_care_entered\n", - "2024-12-14 17:20:15.809 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseCareEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:15.809 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseCareEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:15.810 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCare/[0-42080).parquet\n", - "2024-12-14 17:20:15.857 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.063106\n", - "2024-12-14 17:20:15.857 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCare/.[0-42080).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCare/.[0-42080).parquet_cache/locks/2024-12-14T17:20:15.794543.json\n", - "2024-12-14 17:20:15.858 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanEOL/[0-15).parquet\n", - "2024-12-14 17:20:15.858 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:15.858597. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:15.858 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet\n", - "2024-12-14 17:20:15.858 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:15.859 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanEOL/[0-15).parquet\n", - "2024-12-14 17:20:15.862 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplEolDiscussion\n", - "2024-12-14 17:20:15.862 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanEolDiscussionOccurredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:15.862 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanEolDiscussionOccurredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:15.863 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanEOL/[0-15).parquet\n", - "2024-12-14 17:20:15.865 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007113\n", - "2024-12-14 17:20:15.865 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanEOL/.[0-15).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanEOL/.[0-15).parquet_cache/locks/2024-12-14T17:20:15.858597.json\n", - "2024-12-14 17:20:15.866 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCharting/[0-1477163).parquet\n", - "2024-12-14 17:20:15.866 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:15.866664. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:15.866 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet\n", - "2024-12-14 17:20:15.867 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:15.867 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseCharting/[0-1477163).parquet\n", - "2024-12-14 17:20:15.871 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_charting_performed\n", - "2024-12-14 17:20:15.871 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nursingChartPerformedTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:15.871 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nursingChartPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:15.871 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_charting_entered\n", - "2024-12-14 17:20:15.872 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nursingChartEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:15.872 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nursingChartEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:15.872 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCharting/[0-1477163).parquet\n", - "2024-12-14 17:20:16.546 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.680040\n", - "2024-12-14 17:20:16.546 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCharting/.[0-1477163).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCharting/.[0-1477163).parquet_cache/locks/2024-12-14T17:20:15.866664.json\n", - "2024-12-14 17:20:16.548 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalAperiodic/[0-274088).parquet\n", - "2024-12-14 17:20:16.549 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:16.549008. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:16.549 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet\n", - "2024-12-14 17:20:16.549 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:16.550 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for vitalAperiodic/[0-274088).parquet\n", - "2024-12-14 17:20:16.555 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_systolic\n", - "2024-12-14 17:20:16.555 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:16.556 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:16.556 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_diastolic\n", - "2024-12-14 17:20:16.556 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:16.556 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:16.556 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_mean\n", - "2024-12-14 17:20:16.557 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:16.557 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:16.557 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting paop\n", - "2024-12-14 17:20:16.557 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:16.557 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:16.557 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cardiac_output\n", - "2024-12-14 17:20:16.558 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:16.558 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:16.558 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cardiac_input\n", - "2024-12-14 17:20:16.558 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:16.558 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:16.558 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting svr\n", - "2024-12-14 17:20:16.559 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:16.559 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:16.559 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting svri\n", - "2024-12-14 17:20:16.559 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:16.559 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:16.559 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pvr\n", - "2024-12-14 17:20:16.560 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:16.560 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:16.560 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pvri\n", - "2024-12-14 17:20:16.560 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:16.560 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:16.560 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalAperiodic/[0-274088).parquet\n", - "2024-12-14 17:20:16.862 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.313909\n", - "2024-12-14 17:20:16.863 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalAperiodic/.[0-274088).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalAperiodic/.[0-274088).parquet_cache/locks/2024-12-14T17:20:16.549008.json\n", - "2024-12-14 17:20:16.863 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseAssessment/[0-91589).parquet\n", - "2024-12-14 17:20:16.864 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:16.864327. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:16.864 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet\n", - "2024-12-14 17:20:16.864 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:16.865 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseAssessment/[0-91589).parquet\n", - "2024-12-14 17:20:16.868 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_assessment_performed\n", - "2024-12-14 17:20:16.869 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseAssessPerformedTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:16.869 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseAssessPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:16.869 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_assessment_entered\n", - "2024-12-14 17:20:16.869 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseAssessEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:16.870 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseAssessEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:16.870 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseAssessment/[0-91589).parquet\n", - "2024-12-14 17:20:16.912 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.048560\n", - "2024-12-14 17:20:16.912 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseAssessment/.[0-91589).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseAssessment/.[0-91589).parquet_cache/locks/2024-12-14T17:20:16.864327.json\n", - "2024-12-14 17:20:16.913 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/medication/[0-75604).parquet\n", - "2024-12-14 17:20:16.914 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:16.914149. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:16.914 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet\n", - "2024-12-14 17:20:16.914 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:16.914 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for medication/[0-75604).parquet\n", - "2024-12-14 17:20:16.918 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_ordered\n", - "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:468: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + "2024-12-14 23:38:56.605 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respChartPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:56.605 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_charting_entered\n", + "2024-12-14 23:38:56.606 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respChartEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:56.606 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respChartEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:56.606 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 23:38:56.758 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.164848\n", + "2024-12-14 23:38:56.758 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCharting/.[0-176089).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCharting/.[0-176089).parquet_cache/locks/2024-12-14T23:38:56.593349.json\n", + "2024-12-14 23:38:56.760 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 23:38:56.761 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:56.760897. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:56.761 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 23:38:56.761 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:56.762 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for respiratoryCare/[0-5436).parquet\n", + "2024-12-14 23:38:56.772 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_care_status\n", + "2024-12-14 23:38:56.774 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respCareStatusEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:56.778 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respCareStatusEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:56.779 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting vent_start\n", + "2024-12-14 23:38:56.779 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - ventStartTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:56.779 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"ventStartTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:56.779 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting vent_end\n", + "2024-12-14 23:38:56.780 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - ventEndTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:56.780 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"ventEndTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:56.780 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 23:38:56.808 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.047406\n", + "2024-12-14 23:38:56.808 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCare/.[0-5436).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCare/.[0-5436).parquet_cache/locks/2024-12-14T23:38:56.760897.json\n", + "2024-12-14 23:38:56.811 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 23:38:56.812 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:56.811872. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:56.812 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 23:38:56.812 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:56.813 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 23:38:56.820 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_systolic\n", + "2024-12-14 23:38:56.820 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:56.821 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:56.821 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_diastolic\n", + "2024-12-14 23:38:56.821 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:56.822 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:56.822 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_mean\n", + "2024-12-14 23:38:56.822 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:56.823 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:56.823 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting paop\n", + "2024-12-14 23:38:56.823 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:56.823 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:56.824 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cardiac_output\n", + "2024-12-14 23:38:56.824 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:56.824 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:56.824 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cardiac_input\n", + "2024-12-14 23:38:56.825 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:56.825 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:56.825 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting svr\n", + "2024-12-14 23:38:56.826 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:56.826 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:56.826 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting svri\n", + "2024-12-14 23:38:56.827 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:56.827 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:56.827 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pvr\n", + "2024-12-14 23:38:56.828 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:56.828 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:56.828 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pvri\n", + "2024-12-14 23:38:56.829 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:56.829 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:56.829 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 23:38:57.465 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.653174\n", + "2024-12-14 23:38:57.465 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalAperiodic/.[0-274088).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalAperiodic/.[0-274088).parquet_cache/locks/2024-12-14T23:38:56.811872.json\n", + "2024-12-14 23:38:57.469 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/medication/[0-75604).parquet\n", + "2024-12-14 23:38:57.470 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:57.469945. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:57.470 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet\n", + "2024-12-14 23:38:57.471 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:57.472 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for medication/[0-75604).parquet\n", + "2024-12-14 23:38:57.483 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_ordered\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:468: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", " if col not in df.schema:\n", - "2024-12-14 17:20:16.919 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", - "2024-12-14 17:20:16.919 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugordertimestamp should already be in Date/time format\n", - "2024-12-14 17:20:16.920 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugordertimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:16.920 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_started\n", - "2024-12-14 17:20:16.920 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", - "2024-12-14 17:20:16.920 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugstarttimestamp should already be in Date/time format\n", - "2024-12-14 17:20:16.920 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugstarttimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:16.920 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_stopped\n", - "2024-12-14 17:20:16.920 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", - "2024-12-14 17:20:16.920 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugstoptimestamp should already be in Date/time format\n", - "2024-12-14 17:20:16.921 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugstoptimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:16.921 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/medication/[0-75604).parquet\n", - "2024-12-14 17:20:16.976 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.062129\n", - "2024-12-14 17:20:16.976 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/medication/.[0-75604).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/medication/.[0-75604).parquet_cache/locks/2024-12-14T17:20:16.914149.json\n", - "2024-12-14 17:20:16.976 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/diagnosis/[0-24978).parquet\n", - "2024-12-14 17:20:16.977 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:16.977324. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:16.977 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet\n", - "2024-12-14 17:20:16.977 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:16.978 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for diagnosis/[0-24978).parquet\n", - "2024-12-14 17:20:16.981 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", - "2024-12-14 17:20:16.982 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column diagnosispriority\n", - "2024-12-14 17:20:16.982 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd9code\n", - "2024-12-14 17:20:16.982 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - diagnosisEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:16.982 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"diagnosisEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:16.982 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/diagnosis/[0-24978).parquet\n", - "2024-12-14 17:20:16.989 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.012023\n", - "2024-12-14 17:20:16.989 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/diagnosis/.[0-24978).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/diagnosis/.[0-24978).parquet_cache/locks/2024-12-14T17:20:16.977324.json\n", - "2024-12-14 17:20:16.989 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/allergy/[0-2475).parquet\n", - "2024-12-14 17:20:16.990 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:16.990072. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:16.990 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet\n", - "2024-12-14 17:20:16.990 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:16.990 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for allergy/[0-2475).parquet\n", - "2024-12-14 17:20:16.994 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting allergy\n", - "2024-12-14 17:20:16.994 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column allergyname\n", - "2024-12-14 17:20:16.994 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column allergytype\n", - "2024-12-14 17:20:16.994 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - allergyEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:16.994 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"allergyEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:16.994 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/allergy/[0-2475).parquet\n", - "2024-12-14 17:20:16.997 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007124\n", - "2024-12-14 17:20:16.997 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/allergy/.[0-2475).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/allergy/.[0-2475).parquet_cache/locks/2024-12-14T17:20:16.990072.json\n", - "2024-12-14 17:20:16.997 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanInfectiousDisease/[0-112).parquet\n", - "2024-12-14 17:20:16.998 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:16.998026. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:16.998 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet\n", - "2024-12-14 17:20:16.998 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:16.998 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanInfectiousDisease/[0-112).parquet\n", - "2024-12-14 17:20:17.002 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplInfectDisease\n", - "2024-12-14 17:20:17.002 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column infectdiseaseassessment\n", - "2024-12-14 17:20:17.002 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column responsetotherapy\n", - "2024-12-14 17:20:17.002 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column infectdiseasesite\n", - "2024-12-14 17:20:17.002 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column treatment\n", - "2024-12-14 17:20:17.002 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanInfectDiseaseEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.002 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanInfectDiseaseEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.002 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanInfectiousDisease/[0-112).parquet\n", - "2024-12-14 17:20:17.004 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006326\n", - "2024-12-14 17:20:17.004 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanInfectiousDisease/.[0-112).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanInfectiousDisease/.[0-112).parquet_cache/locks/2024-12-14T17:20:16.998026.json\n", - "2024-12-14 17:20:17.004 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/physicalExam/[0-84058).parquet\n", - "2024-12-14 17:20:17.005 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.005067. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:17.005 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet\n", - "2024-12-14 17:20:17.005 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:17.005 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for physicalExam/[0-84058).parquet\n", - "2024-12-14 17:20:17.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting physical_exam_entered\n", - "2024-12-14 17:20:17.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - physicalExamEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"physicalExamEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.009 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/physicalExam/[0-84058).parquet\n", - "2024-12-14 17:20:17.032 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.026926\n", - "2024-12-14 17:20:17.032 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/physicalExam/.[0-84058).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/physicalExam/.[0-84058).parquet_cache/locks/2024-12-14T17:20:17.005067.json\n", - "2024-12-14 17:20:17.032 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/treatment/[0-38290).parquet\n", - "2024-12-14 17:20:17.033 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.032910. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:17.033 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet\n", - "2024-12-14 17:20:17.033 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:17.033 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for treatment/[0-38290).parquet\n", - "2024-12-14 17:20:17.037 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting treatment\n", - "2024-12-14 17:20:17.037 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column treatmentstring\n", - "2024-12-14 17:20:17.037 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - treatmentEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.038 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"treatmentEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.038 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/treatment/[0-38290).parquet\n", - "2024-12-14 17:20:17.046 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.013724\n", - "2024-12-14 17:20:17.046 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/treatment/.[0-38290).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/treatment/.[0-38290).parquet_cache/locks/2024-12-14T17:20:17.032910.json\n", - "2024-12-14 17:20:17.047 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/pastHistory/[0-12109).parquet\n", - "2024-12-14 17:20:17.047 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.047613. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:17.047 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet\n", - "2024-12-14 17:20:17.047 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:17.048 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for pastHistory/[0-12109).parquet\n", - "2024-12-14 17:20:17.051 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting past_history_taken\n", - "2024-12-14 17:20:17.052 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - pastHistoryTakenTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.052 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"pastHistoryTakenTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.052 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting past_history_entered\n", - "2024-12-14 17:20:17.053 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - pastHistoryEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.053 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"pastHistoryEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.053 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/pastHistory/[0-12109).parquet\n", - "2024-12-14 17:20:17.063 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.015651\n", - "2024-12-14 17:20:17.063 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/pastHistory/.[0-12109).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/pastHistory/.[0-12109).parquet_cache/locks/2024-12-14T17:20:17.047613.json\n", - "2024-12-14 17:20:17.064 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGeneral/[0-33148).parquet\n", - "2024-12-14 17:20:17.064 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.064617. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:17.064 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet\n", - "2024-12-14 17:20:17.064 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:17.065 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanGeneral/[0-33148).parquet\n", - "2024-12-14 17:20:17.068 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplItem\n", - "2024-12-14 17:20:17.068 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgroup\n", - "2024-12-14 17:20:17.068 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplitemvalue\n", - "2024-12-14 17:20:17.068 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanGeneralItemEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.069 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanGeneralItemEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.069 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGeneral/[0-33148).parquet\n", - "2024-12-14 17:20:17.075 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010798\n", - "2024-12-14 17:20:17.075 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGeneral/.[0-33148).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGeneral/.[0-33148).parquet_cache/locks/2024-12-14T17:20:17.064617.json\n", - "2024-12-14 17:20:17.075 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGoal/[0-3633).parquet\n", - "2024-12-14 17:20:17.076 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.076091. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:17.076 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet\n", - "2024-12-14 17:20:17.076 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:17.076 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanGoal/[0-3633).parquet\n", - "2024-12-14 17:20:17.079 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplGoal\n", - "2024-12-14 17:20:17.080 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalcategory\n", - "2024-12-14 17:20:17.080 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalvalue\n", - "2024-12-14 17:20:17.080 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalstatus\n", - "2024-12-14 17:20:17.080 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanGoalEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.080 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanGoalEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.080 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGoal/[0-3633).parquet\n", - "2024-12-14 17:20:17.083 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007029\n", - "2024-12-14 17:20:17.083 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGoal/.[0-3633).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGoal/.[0-3633).parquet_cache/locks/2024-12-14T17:20:17.076091.json\n", - "2024-12-14 17:20:17.084 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCare/[0-5436).parquet\n", - "2024-12-14 17:20:17.084 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.084405. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:17.084 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet\n", - "2024-12-14 17:20:17.084 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:17.085 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for respiratoryCare/[0-5436).parquet\n", - "2024-12-14 17:20:17.089 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_care_status\n", - "2024-12-14 17:20:17.089 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respCareStatusEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respCareStatusEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting vent_start\n", - "2024-12-14 17:20:17.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - ventStartTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"ventStartTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting vent_end\n", - "2024-12-14 17:20:17.093 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - ventEndTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.093 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"ventEndTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.093 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCare/[0-5436).parquet\n", - "2024-12-14 17:20:17.103 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.018818\n", - "2024-12-14 17:20:17.103 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCare/.[0-5436).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCare/.[0-5436).parquet_cache/locks/2024-12-14T17:20:17.084405.json\n", - "2024-12-14 17:20:17.103 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/admissionDx/[0-7578).parquet\n", - "2024-12-14 17:20:17.103 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.103882. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:17.104 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet\n", - "2024-12-14 17:20:17.104 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:17.104 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for admissionDx/[0-7578).parquet\n", - "2024-12-14 17:20:17.107 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission_diagnosis\n", - "2024-12-14 17:20:17.108 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admitdxname\n", - "2024-12-14 17:20:17.108 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - admitDxEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.108 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admitDxEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.108 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/admissionDx/[0-7578).parquet\n", - "2024-12-14 17:20:17.111 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007481\n", - "2024-12-14 17:20:17.111 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/admissionDx/.[0-7578).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/admissionDx/.[0-7578).parquet_cache/locks/2024-12-14T17:20:17.103882.json\n", - "2024-12-14 17:20:17.111 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/lab/[0-434660).parquet\n", - "2024-12-14 17:20:17.112 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.112132. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:17.112 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet\n", - "2024-12-14 17:20:17.112 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:17.112 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for lab/[0-434660).parquet\n", - "2024-12-14 17:20:17.116 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", - "2024-12-14 17:20:17.116 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labname\n", - "2024-12-14 17:20:17.116 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labmeasurenamesystem\n", - "2024-12-14 17:20:17.116 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labmeasurenameinterface\n", - "2024-12-14 17:20:17.116 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - labResultDrawnTimestamp should already be in Date/time format\n", - "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:529: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", + "2024-12-14 23:38:57.484 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", + "2024-12-14 23:38:57.484 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugordertimestamp should already be in Date/time format\n", + "2024-12-14 23:38:57.486 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugordertimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:57.486 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_started\n", + "2024-12-14 23:38:57.486 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", + "2024-12-14 23:38:57.487 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugstarttimestamp should already be in Date/time format\n", + "2024-12-14 23:38:57.487 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugstarttimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:57.487 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_stopped\n", + "2024-12-14 23:38:57.488 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", + "2024-12-14 23:38:57.488 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugstoptimestamp should already be in Date/time format\n", + "2024-12-14 23:38:57.488 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugstoptimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:57.489 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/medication/[0-75604).parquet\n", + "2024-12-14 23:38:57.629 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.159914\n", + "2024-12-14 23:38:57.630 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/medication/.[0-75604).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/medication/.[0-75604).parquet_cache/locks/2024-12-14T23:38:57.469945.json\n", + "2024-12-14 23:38:57.631 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCare/[0-42080).parquet\n", + "2024-12-14 23:38:57.632 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:57.631802. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:57.632 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet\n", + "2024-12-14 23:38:57.632 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:57.633 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseCare/[0-42080).parquet\n", + "2024-12-14 23:38:57.639 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_care_performed\n", + "2024-12-14 23:38:57.639 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseCarePerformedTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:57.640 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseCarePerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:57.640 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_care_entered\n", + "2024-12-14 23:38:57.640 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseCareEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:57.641 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseCareEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:57.641 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCare/[0-42080).parquet\n", + "2024-12-14 23:38:57.682 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.050829\n", + "2024-12-14 23:38:57.682 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCare/.[0-42080).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCare/.[0-42080).parquet_cache/locks/2024-12-14T23:38:57.631802.json\n", + "2024-12-14 23:38:57.683 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/admissionDx/[0-7578).parquet\n", + "2024-12-14 23:38:57.684 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:57.684149. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:57.684 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet\n", + "2024-12-14 23:38:57.684 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:57.685 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for admissionDx/[0-7578).parquet\n", + "2024-12-14 23:38:57.690 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission_diagnosis\n", + "2024-12-14 23:38:57.691 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admitdxname\n", + "2024-12-14 23:38:57.691 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - admitDxEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:57.691 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admitDxEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:57.691 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/admissionDx/[0-7578).parquet\n", + "2024-12-14 23:38:57.704 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.020754\n", + "2024-12-14 23:38:57.705 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/admissionDx/.[0-7578).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/admissionDx/.[0-7578).parquet_cache/locks/2024-12-14T23:38:57.684149.json\n", + "2024-12-14 23:38:57.706 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/physicalExam/[0-84058).parquet\n", + "2024-12-14 23:38:57.706 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:57.706700. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:57.707 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet\n", + "2024-12-14 23:38:57.707 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:57.708 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for physicalExam/[0-84058).parquet\n", + "2024-12-14 23:38:57.715 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting physical_exam_entered\n", + "2024-12-14 23:38:57.715 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - physicalExamEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:57.716 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"physicalExamEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:57.716 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/physicalExam/[0-84058).parquet\n", + "2024-12-14 23:38:57.781 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.074725\n", + "2024-12-14 23:38:57.781 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/physicalExam/.[0-84058).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/physicalExam/.[0-84058).parquet_cache/locks/2024-12-14T23:38:57.706700.json\n", + "2024-12-14 23:38:57.783 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/diagnosis/[0-24978).parquet\n", + "2024-12-14 23:38:57.784 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:57.784525. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:57.785 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet\n", + "2024-12-14 23:38:57.785 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:57.788 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for diagnosis/[0-24978).parquet\n", + "2024-12-14 23:38:57.810 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", + "2024-12-14 23:38:57.811 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd9code\n", + "2024-12-14 23:38:57.811 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column diagnosispriority\n", + "2024-12-14 23:38:57.812 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - diagnosisEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:57.812 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"diagnosisEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:57.812 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/diagnosis/[0-24978).parquet\n", + "2024-12-14 23:38:57.826 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.042326\n", + "2024-12-14 23:38:57.826 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/diagnosis/.[0-24978).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/diagnosis/.[0-24978).parquet_cache/locks/2024-12-14T23:38:57.784525.json\n", + "2024-12-14 23:38:57.827 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanEOL/[0-15).parquet\n", + "2024-12-14 23:38:57.828 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:57.828158. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:57.828 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet\n", + "2024-12-14 23:38:57.828 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:57.829 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanEOL/[0-15).parquet\n", + "2024-12-14 23:38:57.835 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplEolDiscussion\n", + "2024-12-14 23:38:57.835 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanEolDiscussionOccurredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:57.835 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanEolDiscussionOccurredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:57.836 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanEOL/[0-15).parquet\n", + "2024-12-14 23:38:57.841 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.013534\n", + "2024-12-14 23:38:57.841 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanEOL/.[0-15).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanEOL/.[0-15).parquet_cache/locks/2024-12-14T23:38:57.828158.json\n", + "2024-12-14 23:38:57.843 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/pastHistory/[0-12109).parquet\n", + "2024-12-14 23:38:57.844 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:57.844601. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:57.845 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet\n", + "2024-12-14 23:38:57.845 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:57.846 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for pastHistory/[0-12109).parquet\n", + "2024-12-14 23:38:57.858 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting past_history_taken\n", + "2024-12-14 23:38:57.859 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - pastHistoryTakenTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:57.861 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"pastHistoryTakenTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:57.861 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting past_history_entered\n", + "2024-12-14 23:38:57.862 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - pastHistoryEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:57.864 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"pastHistoryEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:57.864 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/pastHistory/[0-12109).parquet\n", + "2024-12-14 23:38:57.895 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.051369\n", + "2024-12-14 23:38:57.896 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/pastHistory/.[0-12109).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/pastHistory/.[0-12109).parquet_cache/locks/2024-12-14T23:38:57.844601.json\n", + "2024-12-14 23:38:57.897 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/treatment/[0-38290).parquet\n", + "2024-12-14 23:38:57.897 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:57.897717. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:57.898 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet\n", + "2024-12-14 23:38:57.898 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:57.898 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for treatment/[0-38290).parquet\n", + "2024-12-14 23:38:57.904 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting treatment\n", + "2024-12-14 23:38:57.905 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column treatmentstring\n", + "2024-12-14 23:38:57.905 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - treatmentEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:57.905 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"treatmentEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:57.905 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/treatment/[0-38290).parquet\n", + "2024-12-14 23:38:57.927 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.029774\n", + "2024-12-14 23:38:57.927 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/treatment/.[0-38290).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/treatment/.[0-38290).parquet_cache/locks/2024-12-14T23:38:57.897717.json\n", + "2024-12-14 23:38:57.928 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 23:38:57.929 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:57.929020. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:57.929 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 23:38:57.929 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:57.930 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseAssessment/[0-91589).parquet\n", + "2024-12-14 23:38:57.935 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_assessment_performed\n", + "2024-12-14 23:38:57.936 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseAssessPerformedTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:57.936 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseAssessPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:57.936 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_assessment_entered\n", + "2024-12-14 23:38:57.937 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseAssessEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:57.937 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseAssessEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:57.937 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 23:38:58.030 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.101323\n", + "2024-12-14 23:38:58.030 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseAssessment/.[0-91589).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseAssessment/.[0-91589).parquet_cache/locks/2024-12-14T23:38:57.929020.json\n", + "2024-12-14 23:38:58.031 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/allergy/[0-2475).parquet\n", + "2024-12-14 23:38:58.032 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:58.031860. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:58.032 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet\n", + "2024-12-14 23:38:58.032 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:58.033 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for allergy/[0-2475).parquet\n", + "2024-12-14 23:38:58.038 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting allergy\n", + "2024-12-14 23:38:58.039 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column allergytype\n", + "2024-12-14 23:38:58.039 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column allergyname\n", + "2024-12-14 23:38:58.039 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - allergyEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:58.039 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"allergyEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:58.039 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/allergy/[0-2475).parquet\n", + "2024-12-14 23:38:58.046 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.014604\n", + "2024-12-14 23:38:58.046 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/allergy/.[0-2475).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/allergy/.[0-2475).parquet_cache/locks/2024-12-14T23:38:58.031860.json\n", + "2024-12-14 23:38:58.047 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 23:38:58.048 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:58.048545. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:58.049 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 23:38:58.049 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:58.050 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 23:38:58.056 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplInfectDisease\n", + "2024-12-14 23:38:58.057 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column infectdiseaseassessment\n", + "2024-12-14 23:38:58.057 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column infectdiseasesite\n", + "2024-12-14 23:38:58.057 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column responsetotherapy\n", + "2024-12-14 23:38:58.057 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column treatment\n", + "2024-12-14 23:38:58.057 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanInfectDiseaseEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:58.057 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanInfectDiseaseEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:58.057 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 23:38:58.062 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.013618\n", + "2024-12-14 23:38:58.062 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanInfectiousDisease/.[0-112).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanInfectiousDisease/.[0-112).parquet_cache/locks/2024-12-14T23:38:58.048545.json\n", + "2024-12-14 23:38:58.063 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 23:38:58.063 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:58.063626. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:58.064 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 23:38:58.064 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:58.064 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 23:38:58.070 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplItem\n", + "2024-12-14 23:38:58.070 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgroup\n", + "2024-12-14 23:38:58.071 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplitemvalue\n", + "2024-12-14 23:38:58.071 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanGeneralItemEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:58.071 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanGeneralItemEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:58.071 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 23:38:58.093 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.029782\n", + "2024-12-14 23:38:58.093 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGeneral/.[0-33148).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGeneral/.[0-33148).parquet_cache/locks/2024-12-14T23:38:58.063626.json\n", + "2024-12-14 23:38:58.099 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 23:38:58.100 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:38:58.100250. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:38:58.100 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 23:38:58.100 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:38:58.101 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 23:38:58.110 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting temperature\n", + "2024-12-14 23:38:58.111 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:58.111 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:58.111 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting saO2\n", + "2024-12-14 23:38:58.112 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:58.112 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:58.112 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting heartRate\n", + "2024-12-14 23:38:58.112 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:58.113 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:58.113 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting respiration\n", + "2024-12-14 23:38:58.113 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:58.114 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:58.114 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cvp\n", + "2024-12-14 23:38:58.114 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:58.115 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:58.115 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting etCo2\n", + "2024-12-14 23:38:58.116 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:58.116 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:58.117 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_systolic\n", + "2024-12-14 23:38:58.118 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:58.118 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:58.118 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_diastolic\n", + "2024-12-14 23:38:58.119 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:58.120 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:58.121 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_mean\n", + "2024-12-14 23:38:58.122 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:58.123 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:58.123 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_systolic\n", + "2024-12-14 23:38:58.125 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:58.126 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:58.126 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_diastolic\n", + "2024-12-14 23:38:58.128 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:58.129 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:58.129 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_mean\n", + "2024-12-14 23:38:58.130 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:58.131 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:58.131 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st1\n", + "2024-12-14 23:38:58.132 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:58.133 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:58.133 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st2\n", + "2024-12-14 23:38:58.134 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:58.135 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:58.135 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st3\n", + "2024-12-14 23:38:58.136 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:58.137 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:58.137 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ICP\n", + "2024-12-14 23:38:58.138 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:38:58.139 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:38:58.139 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 23:39:03.492 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:05.392268\n", + "2024-12-14 23:39:03.493 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalPeriodic/.[0-1634960).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalPeriodic/.[0-1634960).parquet_cache/locks/2024-12-14T23:38:58.100250.json\n", + "2024-12-14 23:39:03.497 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/lab/[0-434660).parquet\n", + "2024-12-14 23:39:03.498 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:03.498480. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:03.499 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet\n", + "2024-12-14 23:39:03.499 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:03.502 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for lab/[0-434660).parquet\n", + "2024-12-14 23:39:03.510 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", + "2024-12-14 23:39:03.512 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labmeasurenamesystem\n", + "2024-12-14 23:39:03.512 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labmeasurenameinterface\n", + "2024-12-14 23:39:03.512 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labname\n", + "2024-12-14 23:39:03.512 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - labResultDrawnTimestamp should already be in Date/time format\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_transforms/extract/convert_to_sharded_events.py:529: PerformanceWarning: Resolving the schema of a LazyFrame is a potentially expensive operation. Use `LazyFrame.collect_schema()` to get the schema without this warning.\n", " case \"text_value\" if not df.schema[v] == pl.Utf8:\n", - "2024-12-14 17:20:17.117 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"labResultDrawnTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.117 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/lab/[0-434660).parquet\n", - "2024-12-14 17:20:17.185 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.073375\n", - "2024-12-14 17:20:17.185 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/lab/.[0-434660).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/lab/.[0-434660).parquet_cache/locks/2024-12-14T17:20:17.112132.json\n", - "2024-12-14 17:20:17.186 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/patient/[0-2520).parquet\n", - "2024-12-14 17:20:17.187 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.187269. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:17.187 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet\n", - "2024-12-14 17:20:17.187 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:17.188 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for patient/[0-2520).parquet\n", - "2024-12-14 17:20:17.192 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", - "2024-12-14 17:20:17.192 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - dateofbirth should already be in Date/time format\n", - "2024-12-14 17:20:17.192 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dateofbirth\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.193 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", - "2024-12-14 17:20:17.193 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", - "2024-12-14 17:20:17.193 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", - "2024-12-14 17:20:17.193 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ethnicity\n", - "2024-12-14 17:20:17.193 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column ethnicity\n", - "2024-12-14 17:20:17.193 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", - "2024-12-14 17:20:17.193 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hosp_admission\n", - "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaladmitsource\n", - "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalteachingstatus\n", - "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalregion\n", - "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalnumbedscategory\n", - "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - hospitaladmittimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hospitaladmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hosp_discharge\n", - "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaldischargestatus\n", - "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaldischargelocation\n", - "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - hospitaldischargetimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hospitaldischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission\n", - "2024-12-14 17:20:17.195 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitstaytype\n", - "2024-12-14 17:20:17.195 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitadmitsource\n", - "2024-12-14 17:20:17.195 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.195 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.195 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission_weight\n", - "2024-12-14 17:20:17.195 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.195 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.195 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission_height\n", - "2024-12-14 17:20:17.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_discharge\n", - "2024-12-14 17:20:17.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitdischargelocation\n", - "2024-12-14 17:20:17.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitdischargestatus\n", - "2024-12-14 17:20:17.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitdischargetimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitdischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_discharge_weight\n", - "2024-12-14 17:20:17.196 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitdischargetimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.197 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitdischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.197 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/patient/[0-2520).parquet\n", - "2024-12-14 17:20:17.211 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.024675\n", - "2024-12-14 17:20:17.212 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/patient/.[0-2520).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/patient/.[0-2520).parquet_cache/locks/2024-12-14T17:20:17.187269.json\n", - "2024-12-14 17:20:17.212 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCharting/[0-176089).parquet\n", - "2024-12-14 17:20:17.212 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.212843. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:17.213 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet\n", - "2024-12-14 17:20:17.213 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:17.213 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for respiratoryCharting/[0-176089).parquet\n", - "2024-12-14 17:20:17.217 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_charting_performed\n", - "2024-12-14 17:20:17.217 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respChartPerformedTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.217 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respChartPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.217 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_charting_entered\n", - "2024-12-14 17:20:17.218 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respChartEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.218 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respChartEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.218 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCharting/[0-176089).parquet\n", - "2024-12-14 17:20:17.284 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.071501\n", - "2024-12-14 17:20:17.284 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCharting/.[0-176089).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCharting/.[0-176089).parquet_cache/locks/2024-12-14T17:20:17.212843.json\n", - "2024-12-14 17:20:17.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalPeriodic/[0-1634960).parquet\n", - "2024-12-14 17:20:17.288 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:17.288028. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:17.288 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet\n", - "2024-12-14 17:20:17.288 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:17.288 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for vitalPeriodic/[0-1634960).parquet\n", - "2024-12-14 17:20:17.294 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting temperature\n", - "2024-12-14 17:20:17.294 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.295 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.295 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting saO2\n", - "2024-12-14 17:20:17.295 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.295 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.295 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting heartRate\n", - "2024-12-14 17:20:17.296 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.296 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.296 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting respiration\n", - "2024-12-14 17:20:17.296 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.296 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.296 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cvp\n", - "2024-12-14 17:20:17.297 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.297 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.297 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting etCo2\n", - "2024-12-14 17:20:17.297 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.297 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.298 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_systolic\n", - "2024-12-14 17:20:17.298 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.298 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.298 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_diastolic\n", - "2024-12-14 17:20:17.298 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.299 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.299 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_mean\n", - "2024-12-14 17:20:17.299 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.299 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.299 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_systolic\n", - "2024-12-14 17:20:17.299 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.300 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.300 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_diastolic\n", - "2024-12-14 17:20:17.300 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.300 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.300 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_mean\n", - "2024-12-14 17:20:17.301 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.301 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.301 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st1\n", - "2024-12-14 17:20:17.301 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.301 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.301 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st2\n", - "2024-12-14 17:20:17.302 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.302 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.302 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st3\n", - "2024-12-14 17:20:17.302 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.302 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.302 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ICP\n", - "2024-12-14 17:20:17.303 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:17.303 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:17.303 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalPeriodic/[0-1634960).parquet\n", - "2024-12-14 17:20:20.250 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:02.962939\n", - "2024-12-14 17:20:20.251 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalPeriodic/.[0-1634960).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalPeriodic/.[0-1634960).parquet_cache/locks/2024-12-14T17:20:17.288028.json\n", - "2024-12-14 17:20:20.251 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCare/[0-42080).parquet\n", - "2024-12-14 17:20:20.252 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.252404. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:20.252 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet\n", - "2024-12-14 17:20:20.252 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:20.253 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseCare/[0-42080).parquet\n", - "2024-12-14 17:20:20.257 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_care_performed\n", - "2024-12-14 17:20:20.257 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseCarePerformedTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.257 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseCarePerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.257 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_care_entered\n", - "2024-12-14 17:20:20.258 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseCareEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.258 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseCareEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.258 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCare/[0-42080).parquet\n", - "2024-12-14 17:20:20.273 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.021446\n", - "2024-12-14 17:20:20.273 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCare/.[0-42080).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCare/.[0-42080).parquet_cache/locks/2024-12-14T17:20:20.252404.json\n", - "2024-12-14 17:20:20.274 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanEOL/[0-15).parquet\n", - "2024-12-14 17:20:20.274 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.274563. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:20.274 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet\n", - "2024-12-14 17:20:20.274 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:20.275 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanEOL/[0-15).parquet\n", - "2024-12-14 17:20:20.278 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplEolDiscussion\n", - "2024-12-14 17:20:20.278 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanEolDiscussionOccurredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.278 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanEolDiscussionOccurredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.278 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanEOL/[0-15).parquet\n", - "2024-12-14 17:20:20.280 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006231\n", - "2024-12-14 17:20:20.280 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanEOL/.[0-15).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanEOL/.[0-15).parquet_cache/locks/2024-12-14T17:20:20.274563.json\n", - "2024-12-14 17:20:20.281 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCharting/[0-1477163).parquet\n", - "2024-12-14 17:20:20.281 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.281752. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:20.282 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet\n", - "2024-12-14 17:20:20.282 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:20.282 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseCharting/[0-1477163).parquet\n", - "2024-12-14 17:20:20.285 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_charting_performed\n", - "2024-12-14 17:20:20.286 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nursingChartPerformedTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.286 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nursingChartPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.286 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_charting_entered\n", - "2024-12-14 17:20:20.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nursingChartEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nursingChartEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.287 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCharting/[0-1477163).parquet\n", - "2024-12-14 17:20:20.622 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.341012\n", - "2024-12-14 17:20:20.622 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCharting/.[0-1477163).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCharting/.[0-1477163).parquet_cache/locks/2024-12-14T17:20:20.281752.json\n", - "2024-12-14 17:20:20.624 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalAperiodic/[0-274088).parquet\n", - "2024-12-14 17:20:20.625 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.625045. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:20.625 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet\n", - "2024-12-14 17:20:20.625 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:20.625 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for vitalAperiodic/[0-274088).parquet\n", - "2024-12-14 17:20:20.630 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_systolic\n", - "2024-12-14 17:20:20.630 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.631 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.631 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_diastolic\n", - "2024-12-14 17:20:20.631 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.631 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.631 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_mean\n", - "2024-12-14 17:20:20.632 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.632 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.632 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting paop\n", - "2024-12-14 17:20:20.632 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.632 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.632 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cardiac_output\n", - "2024-12-14 17:20:20.633 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.633 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.633 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cardiac_input\n", - "2024-12-14 17:20:20.633 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.633 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.633 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting svr\n", - "2024-12-14 17:20:20.634 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.634 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.634 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting svri\n", - "2024-12-14 17:20:20.634 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.634 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.634 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pvr\n", - "2024-12-14 17:20:20.635 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.635 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.635 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pvri\n", - "2024-12-14 17:20:20.635 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.636 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.636 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalAperiodic/[0-274088).parquet\n", - "2024-12-14 17:20:20.756 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.131885\n", - "2024-12-14 17:20:20.757 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalAperiodic/.[0-274088).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalAperiodic/.[0-274088).parquet_cache/locks/2024-12-14T17:20:20.625045.json\n", - "2024-12-14 17:20:20.757 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseAssessment/[0-91589).parquet\n", - "2024-12-14 17:20:20.758 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.758208. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:20.758 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet\n", - "2024-12-14 17:20:20.758 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:20.758 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseAssessment/[0-91589).parquet\n", - "2024-12-14 17:20:20.763 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_assessment_performed\n", - "2024-12-14 17:20:20.763 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseAssessPerformedTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.764 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseAssessPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.764 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_assessment_entered\n", - "2024-12-14 17:20:20.764 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseAssessEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.764 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseAssessEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.764 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseAssessment/[0-91589).parquet\n", - "2024-12-14 17:20:20.785 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.026920\n", - "2024-12-14 17:20:20.785 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseAssessment/.[0-91589).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseAssessment/.[0-91589).parquet_cache/locks/2024-12-14T17:20:20.758208.json\n", - "2024-12-14 17:20:20.786 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/medication/[0-75604).parquet\n", - "2024-12-14 17:20:20.786 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.786460. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:20.786 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet\n", - "2024-12-14 17:20:20.786 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:20.787 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for medication/[0-75604).parquet\n", - "2024-12-14 17:20:20.791 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_ordered\n", - "2024-12-14 17:20:20.791 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", - "2024-12-14 17:20:20.791 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugordertimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.792 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugordertimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.792 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_started\n", - "2024-12-14 17:20:20.792 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", - "2024-12-14 17:20:20.792 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugstarttimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.792 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugstarttimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.792 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_stopped\n", - "2024-12-14 17:20:20.793 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", - "2024-12-14 17:20:20.793 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugstoptimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.793 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugstoptimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.793 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/medication/[0-75604).parquet\n", - "2024-12-14 17:20:20.825 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.038700\n", - "2024-12-14 17:20:20.825 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/medication/.[0-75604).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/medication/.[0-75604).parquet_cache/locks/2024-12-14T17:20:20.786460.json\n", - "2024-12-14 17:20:20.825 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/diagnosis/[0-24978).parquet\n", - "2024-12-14 17:20:20.826 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.826051. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:20.826 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet\n", - "2024-12-14 17:20:20.826 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:20.826 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for diagnosis/[0-24978).parquet\n", - "2024-12-14 17:20:20.830 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", - "2024-12-14 17:20:20.830 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column diagnosispriority\n", - "2024-12-14 17:20:20.830 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd9code\n", - "2024-12-14 17:20:20.830 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - diagnosisEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.830 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"diagnosisEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.830 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/diagnosis/[0-24978).parquet\n", - "2024-12-14 17:20:20.834 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008767\n", - "2024-12-14 17:20:20.834 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/diagnosis/.[0-24978).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/diagnosis/.[0-24978).parquet_cache/locks/2024-12-14T17:20:20.826051.json\n", - "2024-12-14 17:20:20.835 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/allergy/[0-2475).parquet\n", - "2024-12-14 17:20:20.835 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.835439. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:20.835 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet\n", - "2024-12-14 17:20:20.835 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:20.835 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for allergy/[0-2475).parquet\n", - "2024-12-14 17:20:20.849 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting allergy\n", - "2024-12-14 17:20:20.849 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column allergyname\n", - "2024-12-14 17:20:20.849 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column allergytype\n", - "2024-12-14 17:20:20.850 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - allergyEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.850 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"allergyEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.850 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/allergy/[0-2475).parquet\n", - "2024-12-14 17:20:20.855 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.019692\n", - "2024-12-14 17:20:20.855 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/allergy/.[0-2475).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/allergy/.[0-2475).parquet_cache/locks/2024-12-14T17:20:20.835439.json\n", - "2024-12-14 17:20:20.858 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanInfectiousDisease/[0-112).parquet\n", - "2024-12-14 17:20:20.860 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.859824. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:20.861 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet\n", - "2024-12-14 17:20:20.861 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:20.861 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanInfectiousDisease/[0-112).parquet\n", - "2024-12-14 17:20:20.866 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplInfectDisease\n", - "2024-12-14 17:20:20.866 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column infectdiseaseassessment\n", - "2024-12-14 17:20:20.866 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column responsetotherapy\n", - "2024-12-14 17:20:20.866 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column infectdiseasesite\n", - "2024-12-14 17:20:20.866 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column treatment\n", - "2024-12-14 17:20:20.866 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanInfectDiseaseEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.866 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanInfectDiseaseEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.866 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanInfectiousDisease/[0-112).parquet\n", - "2024-12-14 17:20:20.869 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.009974\n", - "2024-12-14 17:20:20.869 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanInfectiousDisease/.[0-112).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanInfectiousDisease/.[0-112).parquet_cache/locks/2024-12-14T17:20:20.859824.json\n", - "2024-12-14 17:20:20.870 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/physicalExam/[0-84058).parquet\n", - "2024-12-14 17:20:20.870 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.870785. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:20.871 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet\n", - "2024-12-14 17:20:20.871 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:20.871 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for physicalExam/[0-84058).parquet\n", - "2024-12-14 17:20:20.875 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting physical_exam_entered\n", - "2024-12-14 17:20:20.875 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - physicalExamEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.875 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"physicalExamEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.876 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/physicalExam/[0-84058).parquet\n", - "2024-12-14 17:20:20.886 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.016078\n", - "2024-12-14 17:20:20.886 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/physicalExam/.[0-84058).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/physicalExam/.[0-84058).parquet_cache/locks/2024-12-14T17:20:20.870785.json\n", - "2024-12-14 17:20:20.887 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/treatment/[0-38290).parquet\n", - "2024-12-14 17:20:20.887 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.887774. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:20.888 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet\n", - "2024-12-14 17:20:20.888 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:20.888 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for treatment/[0-38290).parquet\n", - "2024-12-14 17:20:20.892 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting treatment\n", - "2024-12-14 17:20:20.892 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column treatmentstring\n", - "2024-12-14 17:20:20.892 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - treatmentEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.892 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"treatmentEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.892 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/treatment/[0-38290).parquet\n", - "2024-12-14 17:20:20.898 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010328\n", - "2024-12-14 17:20:20.898 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/treatment/.[0-38290).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/treatment/.[0-38290).parquet_cache/locks/2024-12-14T17:20:20.887774.json\n", - "2024-12-14 17:20:20.898 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/pastHistory/[0-12109).parquet\n", - "2024-12-14 17:20:20.899 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.899071. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:20.899 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet\n", - "2024-12-14 17:20:20.899 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:20.899 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for pastHistory/[0-12109).parquet\n", - "2024-12-14 17:20:20.903 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting past_history_taken\n", - "2024-12-14 17:20:20.903 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - pastHistoryTakenTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.904 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"pastHistoryTakenTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.904 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting past_history_entered\n", - "2024-12-14 17:20:20.904 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - pastHistoryEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.905 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"pastHistoryEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.905 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/pastHistory/[0-12109).parquet\n", - "2024-12-14 17:20:20.911 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.012724\n", - "2024-12-14 17:20:20.911 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/pastHistory/.[0-12109).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/pastHistory/.[0-12109).parquet_cache/locks/2024-12-14T17:20:20.899071.json\n", - "2024-12-14 17:20:20.912 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGeneral/[0-33148).parquet\n", - "2024-12-14 17:20:20.912 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.912592. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:20.912 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet\n", - "2024-12-14 17:20:20.912 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:20.913 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanGeneral/[0-33148).parquet\n", - "2024-12-14 17:20:20.916 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplItem\n", - "2024-12-14 17:20:20.917 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgroup\n", - "2024-12-14 17:20:20.917 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplitemvalue\n", - "2024-12-14 17:20:20.917 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanGeneralItemEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.917 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanGeneralItemEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.917 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGeneral/[0-33148).parquet\n", - "2024-12-14 17:20:20.921 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008875\n", - "2024-12-14 17:20:20.921 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGeneral/.[0-33148).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGeneral/.[0-33148).parquet_cache/locks/2024-12-14T17:20:20.912592.json\n", - "2024-12-14 17:20:20.921 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGoal/[0-3633).parquet\n", - "2024-12-14 17:20:20.922 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.922179. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:20.922 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet\n", - "2024-12-14 17:20:20.922 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:20.922 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanGoal/[0-3633).parquet\n", - "2024-12-14 17:20:20.926 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplGoal\n", - "2024-12-14 17:20:20.927 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalcategory\n", - "2024-12-14 17:20:20.927 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalvalue\n", - "2024-12-14 17:20:20.927 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalstatus\n", - "2024-12-14 17:20:20.927 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanGoalEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.927 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanGoalEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.927 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGoal/[0-3633).parquet\n", - "2024-12-14 17:20:20.929 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007137\n", - "2024-12-14 17:20:20.929 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGoal/.[0-3633).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGoal/.[0-3633).parquet_cache/locks/2024-12-14T17:20:20.922179.json\n", - "2024-12-14 17:20:20.930 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCare/[0-5436).parquet\n", - "2024-12-14 17:20:20.930 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.930594. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:20.930 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet\n", - "2024-12-14 17:20:20.930 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:20.931 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for respiratoryCare/[0-5436).parquet\n", - "2024-12-14 17:20:20.935 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_care_status\n", - "2024-12-14 17:20:20.935 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respCareStatusEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.938 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respCareStatusEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.938 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting vent_start\n", - "2024-12-14 17:20:20.938 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - ventStartTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.939 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"ventStartTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.939 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting vent_end\n", - "2024-12-14 17:20:20.939 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - ventEndTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.939 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"ventEndTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.939 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCare/[0-5436).parquet\n", - "2024-12-14 17:20:20.948 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.018362\n", - "2024-12-14 17:20:20.949 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCare/.[0-5436).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCare/.[0-5436).parquet_cache/locks/2024-12-14T17:20:20.930594.json\n", - "2024-12-14 17:20:20.949 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/admissionDx/[0-7578).parquet\n", - "2024-12-14 17:20:20.949 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.949702. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:20.949 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet\n", - "2024-12-14 17:20:20.950 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:20.950 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for admissionDx/[0-7578).parquet\n", - "2024-12-14 17:20:20.953 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission_diagnosis\n", - "2024-12-14 17:20:20.954 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admitdxname\n", - "2024-12-14 17:20:20.954 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - admitDxEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.954 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admitDxEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.954 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/admissionDx/[0-7578).parquet\n", - "2024-12-14 17:20:20.957 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007359\n", - "2024-12-14 17:20:20.957 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/admissionDx/.[0-7578).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/admissionDx/.[0-7578).parquet_cache/locks/2024-12-14T17:20:20.949702.json\n", - "2024-12-14 17:20:20.957 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/lab/[0-434660).parquet\n", - "2024-12-14 17:20:20.958 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:20.958027. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:20.958 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet\n", - "2024-12-14 17:20:20.958 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:20.958 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for lab/[0-434660).parquet\n", - "2024-12-14 17:20:20.962 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", - "2024-12-14 17:20:20.962 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labname\n", - "2024-12-14 17:20:20.962 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labmeasurenamesystem\n", - "2024-12-14 17:20:20.962 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labmeasurenameinterface\n", - "2024-12-14 17:20:20.962 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - labResultDrawnTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:20.962 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"labResultDrawnTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:20.963 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/lab/[0-434660).parquet\n", - "2024-12-14 17:20:20.999 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.041938\n", - "2024-12-14 17:20:21.000 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/lab/.[0-434660).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/lab/.[0-434660).parquet_cache/locks/2024-12-14T17:20:20.958027.json\n", - "2024-12-14 17:20:21.001 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/patient/[0-2520).parquet\n", - "2024-12-14 17:20:21.002 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:21.001855. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:21.002 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet\n", - "2024-12-14 17:20:21.002 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:21.002 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for patient/[0-2520).parquet\n", - "2024-12-14 17:20:21.007 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", - "2024-12-14 17:20:21.007 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - dateofbirth should already be in Date/time format\n", - "2024-12-14 17:20:21.007 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dateofbirth\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:21.007 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", - "2024-12-14 17:20:21.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", - "2024-12-14 17:20:21.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", - "2024-12-14 17:20:21.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ethnicity\n", - "2024-12-14 17:20:21.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column ethnicity\n", - "2024-12-14 17:20:21.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", - "2024-12-14 17:20:21.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hosp_admission\n", - "2024-12-14 17:20:21.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaladmitsource\n", - "2024-12-14 17:20:21.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalteachingstatus\n", - "2024-12-14 17:20:21.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalregion\n", - "2024-12-14 17:20:21.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalnumbedscategory\n", - "2024-12-14 17:20:21.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - hospitaladmittimestamp should already be in Date/time format\n", - "2024-12-14 17:20:21.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hospitaladmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:21.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hosp_discharge\n", - "2024-12-14 17:20:21.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaldischargestatus\n", - "2024-12-14 17:20:21.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaldischargelocation\n", - "2024-12-14 17:20:21.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - hospitaldischargetimestamp should already be in Date/time format\n", - "2024-12-14 17:20:21.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hospitaldischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:21.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission\n", - "2024-12-14 17:20:21.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitstaytype\n", - "2024-12-14 17:20:21.009 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitadmitsource\n", - "2024-12-14 17:20:21.010 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", - "2024-12-14 17:20:21.010 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:21.010 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission_weight\n", - "2024-12-14 17:20:21.010 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", - "2024-12-14 17:20:21.010 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:21.010 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission_height\n", - "2024-12-14 17:20:21.010 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", - "2024-12-14 17:20:21.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:21.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_discharge\n", - "2024-12-14 17:20:21.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitdischargelocation\n", - "2024-12-14 17:20:21.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitdischargestatus\n", - "2024-12-14 17:20:21.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitdischargetimestamp should already be in Date/time format\n", - "2024-12-14 17:20:21.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitdischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:21.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_discharge_weight\n", - "2024-12-14 17:20:21.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitdischargetimestamp should already be in Date/time format\n", - "2024-12-14 17:20:21.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitdischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:21.011 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/patient/[0-2520).parquet\n", - "2024-12-14 17:20:21.027 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.025725\n", - "2024-12-14 17:20:21.027 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/patient/.[0-2520).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/patient/.[0-2520).parquet_cache/locks/2024-12-14T17:20:21.001855.json\n", - "2024-12-14 17:20:21.028 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCharting/[0-176089).parquet\n", - "2024-12-14 17:20:21.028 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:21.028566. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:21.028 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet\n", - "2024-12-14 17:20:21.028 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:21.029 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for respiratoryCharting/[0-176089).parquet\n", - "2024-12-14 17:20:21.032 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_charting_performed\n", - "2024-12-14 17:20:21.033 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respChartPerformedTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:21.033 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respChartPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:21.033 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_charting_entered\n", - "2024-12-14 17:20:21.034 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respChartEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:21.034 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respChartEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:21.034 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCharting/[0-176089).parquet\n", - "2024-12-14 17:20:21.075 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.046489\n", - "2024-12-14 17:20:21.075 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCharting/.[0-176089).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCharting/.[0-176089).parquet_cache/locks/2024-12-14T17:20:21.028566.json\n", - "2024-12-14 17:20:21.077 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalPeriodic/[0-1634960).parquet\n", - "2024-12-14 17:20:21.078 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:21.078339. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:21.078 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet\n", - "2024-12-14 17:20:21.078 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:21.079 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for vitalPeriodic/[0-1634960).parquet\n", - "2024-12-14 17:20:21.085 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting temperature\n", - "2024-12-14 17:20:21.086 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:21.086 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:21.086 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting saO2\n", - "2024-12-14 17:20:21.086 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:21.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:21.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting heartRate\n", - "2024-12-14 17:20:21.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:21.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:21.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting respiration\n", - "2024-12-14 17:20:21.087 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:21.088 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:21.088 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cvp\n", - "2024-12-14 17:20:21.088 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:21.088 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:21.088 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting etCo2\n", - "2024-12-14 17:20:21.089 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:21.089 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:21.089 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_systolic\n", - "2024-12-14 17:20:21.089 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:21.089 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:21.089 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_diastolic\n", - "2024-12-14 17:20:21.090 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:21.090 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:21.090 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_mean\n", - "2024-12-14 17:20:21.090 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:21.091 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:21.091 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_systolic\n", - "2024-12-14 17:20:21.091 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:21.091 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:21.091 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_diastolic\n", - "2024-12-14 17:20:21.091 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:21.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:21.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_mean\n", - "2024-12-14 17:20:21.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:21.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:21.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st1\n", - "2024-12-14 17:20:21.093 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:21.093 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:21.093 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st2\n", - "2024-12-14 17:20:21.093 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:21.093 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:21.093 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st3\n", - "2024-12-14 17:20:21.094 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:21.094 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:21.094 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ICP\n", - "2024-12-14 17:20:21.094 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:21.094 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:21.094 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalPeriodic/[0-1634960).parquet\n", - "2024-12-14 17:20:22.505 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:01.427590\n", - "2024-12-14 17:20:22.506 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalPeriodic/.[0-1634960).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalPeriodic/.[0-1634960).parquet_cache/locks/2024-12-14T17:20:21.078339.json\n", - "2024-12-14 17:20:22.507 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCare/[0-42080).parquet\n", - "2024-12-14 17:20:22.507 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:22.507490. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:22.507 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet\n", - "2024-12-14 17:20:22.507 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:22.508 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseCare/[0-42080).parquet\n", - "2024-12-14 17:20:22.512 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_care_performed\n", - "2024-12-14 17:20:22.512 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseCarePerformedTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:22.512 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseCarePerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:22.512 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_care_entered\n", - "2024-12-14 17:20:22.513 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseCareEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:22.513 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseCareEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:22.513 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCare/[0-42080).parquet\n", - "2024-12-14 17:20:22.523 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.015754\n", - "2024-12-14 17:20:22.523 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCare/.[0-42080).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCare/.[0-42080).parquet_cache/locks/2024-12-14T17:20:22.507490.json\n", - "2024-12-14 17:20:22.523 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanEOL/[0-15).parquet\n", - "2024-12-14 17:20:22.524 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:22.524001. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:22.524 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet\n", - "2024-12-14 17:20:22.524 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:22.524 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanEOL/[0-15).parquet\n", - "2024-12-14 17:20:22.528 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplEolDiscussion\n", - "2024-12-14 17:20:22.528 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanEolDiscussionOccurredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:22.528 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanEolDiscussionOccurredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:22.528 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanEOL/[0-15).parquet\n", - "2024-12-14 17:20:22.529 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.005803\n", - "2024-12-14 17:20:22.529 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanEOL/.[0-15).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanEOL/.[0-15).parquet_cache/locks/2024-12-14T17:20:22.524001.json\n", - "2024-12-14 17:20:22.530 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCharting/[0-1477163).parquet\n", - "2024-12-14 17:20:22.530 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:22.530690. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:22.530 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet\n", - "2024-12-14 17:20:22.530 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:22.531 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseCharting/[0-1477163).parquet\n", - "2024-12-14 17:20:22.534 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_charting_performed\n", - "2024-12-14 17:20:22.535 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nursingChartPerformedTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:22.535 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nursingChartPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:22.535 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_charting_entered\n", - "2024-12-14 17:20:22.536 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nursingChartEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:22.536 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nursingChartEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:22.536 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCharting/[0-1477163).parquet\n", - "2024-12-14 17:20:22.829 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.299273\n", - "2024-12-14 17:20:22.830 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCharting/.[0-1477163).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCharting/.[0-1477163).parquet_cache/locks/2024-12-14T17:20:22.530690.json\n", - "2024-12-14 17:20:22.832 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalAperiodic/[0-274088).parquet\n", - "2024-12-14 17:20:22.832 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:22.832372. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:22.832 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet\n", - "2024-12-14 17:20:22.832 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:22.833 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for vitalAperiodic/[0-274088).parquet\n", - "2024-12-14 17:20:22.838 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_systolic\n", - "2024-12-14 17:20:22.838 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:22.838 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:22.838 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_diastolic\n", - "2024-12-14 17:20:22.839 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:22.839 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:22.839 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_mean\n", - "2024-12-14 17:20:22.839 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:22.839 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:22.839 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting paop\n", - "2024-12-14 17:20:22.840 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:22.840 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:22.840 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cardiac_output\n", - "2024-12-14 17:20:22.840 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:22.840 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:22.840 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cardiac_input\n", - "2024-12-14 17:20:22.841 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:22.841 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:22.841 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting svr\n", - "2024-12-14 17:20:22.841 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:22.841 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:22.841 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting svri\n", - "2024-12-14 17:20:22.842 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:22.842 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:22.842 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pvr\n", - "2024-12-14 17:20:22.842 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:22.842 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:22.843 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pvri\n", - "2024-12-14 17:20:22.843 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:22.843 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:22.843 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalAperiodic/[0-274088).parquet\n", - "2024-12-14 17:20:22.960 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.128224\n", - "2024-12-14 17:20:22.960 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalAperiodic/.[0-274088).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalAperiodic/.[0-274088).parquet_cache/locks/2024-12-14T17:20:22.832372.json\n", - "2024-12-14 17:20:22.961 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseAssessment/[0-91589).parquet\n", - "2024-12-14 17:20:22.962 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:22.962000. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:22.962 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet\n", - "2024-12-14 17:20:22.962 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:22.962 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseAssessment/[0-91589).parquet\n", - "2024-12-14 17:20:22.966 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_assessment_performed\n", - "2024-12-14 17:20:22.966 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseAssessPerformedTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:22.967 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseAssessPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:22.967 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_assessment_entered\n", - "2024-12-14 17:20:22.967 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseAssessEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:22.967 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseAssessEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:22.968 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseAssessment/[0-91589).parquet\n", - "2024-12-14 17:20:22.990 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.028308\n", - "2024-12-14 17:20:22.991 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseAssessment/.[0-91589).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseAssessment/.[0-91589).parquet_cache/locks/2024-12-14T17:20:22.962000.json\n", - "2024-12-14 17:20:22.995 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/medication/[0-75604).parquet\n", - "2024-12-14 17:20:22.996 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:22.996131. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:22.996 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet\n", - "2024-12-14 17:20:22.996 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:22.998 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for medication/[0-75604).parquet\n", - "2024-12-14 17:20:23.005 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_ordered\n", - "2024-12-14 17:20:23.006 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", - "2024-12-14 17:20:23.006 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugordertimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.007 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugordertimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.007 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_started\n", - "2024-12-14 17:20:23.007 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", - "2024-12-14 17:20:23.007 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugstarttimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugstarttimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_stopped\n", - "2024-12-14 17:20:23.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", - "2024-12-14 17:20:23.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugstoptimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.008 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugstoptimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.008 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/medication/[0-75604).parquet\n", - "2024-12-14 17:20:23.039 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.043148\n", - "2024-12-14 17:20:23.039 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/medication/.[0-75604).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/medication/.[0-75604).parquet_cache/locks/2024-12-14T17:20:22.996131.json\n", - "2024-12-14 17:20:23.039 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/diagnosis/[0-24978).parquet\n", - "2024-12-14 17:20:23.040 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.040126. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:23.040 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet\n", - "2024-12-14 17:20:23.040 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:23.040 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for diagnosis/[0-24978).parquet\n", - "2024-12-14 17:20:23.044 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", - "2024-12-14 17:20:23.044 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column diagnosispriority\n", - "2024-12-14 17:20:23.044 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd9code\n", - "2024-12-14 17:20:23.044 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - diagnosisEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.044 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"diagnosisEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.044 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/diagnosis/[0-24978).parquet\n", - "2024-12-14 17:20:23.049 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008941\n", - "2024-12-14 17:20:23.049 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/diagnosis/.[0-24978).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/diagnosis/.[0-24978).parquet_cache/locks/2024-12-14T17:20:23.040126.json\n", - "2024-12-14 17:20:23.049 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/allergy/[0-2475).parquet\n", - "2024-12-14 17:20:23.050 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.049948. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:23.050 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet\n", - "2024-12-14 17:20:23.050 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:23.050 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for allergy/[0-2475).parquet\n", - "2024-12-14 17:20:23.054 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting allergy\n", - "2024-12-14 17:20:23.054 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column allergyname\n", - "2024-12-14 17:20:23.054 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column allergytype\n", - "2024-12-14 17:20:23.054 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - allergyEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.054 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"allergyEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.055 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/allergy/[0-2475).parquet\n", - "2024-12-14 17:20:23.056 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007014\n", - "2024-12-14 17:20:23.057 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/allergy/.[0-2475).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/allergy/.[0-2475).parquet_cache/locks/2024-12-14T17:20:23.049948.json\n", - "2024-12-14 17:20:23.057 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanInfectiousDisease/[0-112).parquet\n", - "2024-12-14 17:20:23.057 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.057704. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:23.057 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet\n", - "2024-12-14 17:20:23.058 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:23.058 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanInfectiousDisease/[0-112).parquet\n", - "2024-12-14 17:20:23.061 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplInfectDisease\n", - "2024-12-14 17:20:23.062 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column infectdiseaseassessment\n", - "2024-12-14 17:20:23.062 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column responsetotherapy\n", - "2024-12-14 17:20:23.062 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column infectdiseasesite\n", - "2024-12-14 17:20:23.062 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column treatment\n", - "2024-12-14 17:20:23.062 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanInfectDiseaseEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.062 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanInfectDiseaseEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.062 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanInfectiousDisease/[0-112).parquet\n", - "2024-12-14 17:20:23.064 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.006415\n", - "2024-12-14 17:20:23.064 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanInfectiousDisease/.[0-112).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanInfectiousDisease/.[0-112).parquet_cache/locks/2024-12-14T17:20:23.057704.json\n", - "2024-12-14 17:20:23.064 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/physicalExam/[0-84058).parquet\n", - "2024-12-14 17:20:23.064 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.064773. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:23.065 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet\n", - "2024-12-14 17:20:23.065 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:23.065 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for physicalExam/[0-84058).parquet\n", - "2024-12-14 17:20:23.068 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting physical_exam_entered\n", - "2024-12-14 17:20:23.069 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - physicalExamEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.069 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"physicalExamEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.069 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/physicalExam/[0-84058).parquet\n", - "2024-12-14 17:20:23.081 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.016276\n", - "2024-12-14 17:20:23.081 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/physicalExam/.[0-84058).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/physicalExam/.[0-84058).parquet_cache/locks/2024-12-14T17:20:23.064773.json\n", - "2024-12-14 17:20:23.081 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/treatment/[0-38290).parquet\n", - "2024-12-14 17:20:23.081 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.081778. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:23.082 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet\n", - "2024-12-14 17:20:23.082 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:23.082 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for treatment/[0-38290).parquet\n", - "2024-12-14 17:20:23.085 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting treatment\n", - "2024-12-14 17:20:23.086 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column treatmentstring\n", - "2024-12-14 17:20:23.086 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - treatmentEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.086 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"treatmentEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.086 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/treatment/[0-38290).parquet\n", - "2024-12-14 17:20:23.091 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.010117\n", - "2024-12-14 17:20:23.091 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/treatment/.[0-38290).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/treatment/.[0-38290).parquet_cache/locks/2024-12-14T17:20:23.081778.json\n", - "2024-12-14 17:20:23.092 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/pastHistory/[0-12109).parquet\n", - "2024-12-14 17:20:23.092 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.092753. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:23.092 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet\n", - "2024-12-14 17:20:23.093 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:23.093 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for pastHistory/[0-12109).parquet\n", - "2024-12-14 17:20:23.096 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting past_history_taken\n", - "2024-12-14 17:20:23.097 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - pastHistoryTakenTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.097 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"pastHistoryTakenTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.097 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting past_history_entered\n", - "2024-12-14 17:20:23.098 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - pastHistoryEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.098 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"pastHistoryEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.098 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/pastHistory/[0-12109).parquet\n", - "2024-12-14 17:20:23.105 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.012818\n", - "2024-12-14 17:20:23.105 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/pastHistory/.[0-12109).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/pastHistory/.[0-12109).parquet_cache/locks/2024-12-14T17:20:23.092753.json\n", - "2024-12-14 17:20:23.106 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGeneral/[0-33148).parquet\n", - "2024-12-14 17:20:23.106 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.106417. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:23.106 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet\n", - "2024-12-14 17:20:23.106 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:23.106 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanGeneral/[0-33148).parquet\n", - "2024-12-14 17:20:23.110 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplItem\n", - "2024-12-14 17:20:23.110 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgroup\n", - "2024-12-14 17:20:23.110 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplitemvalue\n", - "2024-12-14 17:20:23.110 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanGeneralItemEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.110 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanGeneralItemEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.110 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGeneral/[0-33148).parquet\n", - "2024-12-14 17:20:23.115 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.008873\n", - "2024-12-14 17:20:23.115 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGeneral/.[0-33148).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGeneral/.[0-33148).parquet_cache/locks/2024-12-14T17:20:23.106417.json\n", - "2024-12-14 17:20:23.115 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGoal/[0-3633).parquet\n", - "2024-12-14 17:20:23.116 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.116227. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:23.116 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet\n", - "2024-12-14 17:20:23.116 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:23.116 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanGoal/[0-3633).parquet\n", - "2024-12-14 17:20:23.120 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplGoal\n", - "2024-12-14 17:20:23.120 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalcategory\n", - "2024-12-14 17:20:23.120 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalvalue\n", - "2024-12-14 17:20:23.120 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalstatus\n", - "2024-12-14 17:20:23.120 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanGoalEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.121 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanGoalEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.121 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGoal/[0-3633).parquet\n", - "2024-12-14 17:20:23.123 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007319\n", - "2024-12-14 17:20:23.123 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGoal/.[0-3633).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGoal/.[0-3633).parquet_cache/locks/2024-12-14T17:20:23.116227.json\n", - "2024-12-14 17:20:23.124 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCare/[0-5436).parquet\n", - "2024-12-14 17:20:23.125 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.124966. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:23.125 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet\n", - "2024-12-14 17:20:23.125 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:23.125 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for respiratoryCare/[0-5436).parquet\n", - "2024-12-14 17:20:23.130 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_care_status\n", - "2024-12-14 17:20:23.130 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respCareStatusEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.133 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respCareStatusEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.133 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting vent_start\n", - "2024-12-14 17:20:23.133 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - ventStartTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.134 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"ventStartTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.134 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting vent_end\n", - "2024-12-14 17:20:23.134 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - ventEndTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.134 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"ventEndTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.134 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCare/[0-5436).parquet\n", - "2024-12-14 17:20:23.143 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.018378\n", - "2024-12-14 17:20:23.143 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCare/.[0-5436).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCare/.[0-5436).parquet_cache/locks/2024-12-14T17:20:23.124966.json\n", - "2024-12-14 17:20:23.143 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/admissionDx/[0-7578).parquet\n", - "2024-12-14 17:20:23.144 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.144139. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:23.144 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet\n", - "2024-12-14 17:20:23.144 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:23.144 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for admissionDx/[0-7578).parquet\n", - "2024-12-14 17:20:23.148 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission_diagnosis\n", - "2024-12-14 17:20:23.148 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admitdxname\n", - "2024-12-14 17:20:23.148 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - admitDxEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.149 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admitDxEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.149 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/admissionDx/[0-7578).parquet\n", - "2024-12-14 17:20:23.151 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.007778\n", - "2024-12-14 17:20:23.151 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/admissionDx/.[0-7578).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/admissionDx/.[0-7578).parquet_cache/locks/2024-12-14T17:20:23.144139.json\n", - "2024-12-14 17:20:23.152 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/lab/[0-434660).parquet\n", - "2024-12-14 17:20:23.152 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.152636. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:23.152 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet\n", - "2024-12-14 17:20:23.152 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:23.153 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for lab/[0-434660).parquet\n", - "2024-12-14 17:20:23.156 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", - "2024-12-14 17:20:23.157 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labname\n", - "2024-12-14 17:20:23.157 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labmeasurenamesystem\n", - "2024-12-14 17:20:23.157 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labmeasurenameinterface\n", - "2024-12-14 17:20:23.157 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - labResultDrawnTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.157 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"labResultDrawnTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.157 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/lab/[0-434660).parquet\n", - "2024-12-14 17:20:23.193 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.040408\n", - "2024-12-14 17:20:23.193 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/lab/.[0-434660).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/lab/.[0-434660).parquet_cache/locks/2024-12-14T17:20:23.152636.json\n", - "2024-12-14 17:20:23.194 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/patient/[0-2520).parquet\n", - "2024-12-14 17:20:23.195 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.195057. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:23.195 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet\n", - "2024-12-14 17:20:23.195 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:23.195 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for patient/[0-2520).parquet\n", - "2024-12-14 17:20:23.200 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", - "2024-12-14 17:20:23.200 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - dateofbirth should already be in Date/time format\n", - "2024-12-14 17:20:23.200 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dateofbirth\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.200 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", - "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", - "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", - "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ethnicity\n", - "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column ethnicity\n", - "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", - "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hosp_admission\n", - "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaladmitsource\n", - "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalteachingstatus\n", - "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalregion\n", - "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalnumbedscategory\n", - "2024-12-14 17:20:23.201 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - hospitaladmittimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.202 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hospitaladmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.202 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hosp_discharge\n", - "2024-12-14 17:20:23.202 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaldischargestatus\n", - "2024-12-14 17:20:23.202 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaldischargelocation\n", - "2024-12-14 17:20:23.202 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - hospitaldischargetimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.202 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hospitaldischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.202 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission\n", - "2024-12-14 17:20:23.202 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitstaytype\n", - "2024-12-14 17:20:23.202 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitadmitsource\n", - "2024-12-14 17:20:23.202 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.203 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.203 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission_weight\n", - "2024-12-14 17:20:23.203 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.203 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.203 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission_height\n", - "2024-12-14 17:20:23.203 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.203 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.203 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_discharge\n", - "2024-12-14 17:20:23.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitdischargelocation\n", - "2024-12-14 17:20:23.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitdischargestatus\n", - "2024-12-14 17:20:23.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitdischargetimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitdischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_discharge_weight\n", - "2024-12-14 17:20:23.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitdischargetimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.204 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitdischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.204 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/patient/[0-2520).parquet\n", - "2024-12-14 17:20:23.220 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.024942\n", - "2024-12-14 17:20:23.220 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/patient/.[0-2520).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/patient/.[0-2520).parquet_cache/locks/2024-12-14T17:20:23.195057.json\n", - "2024-12-14 17:20:23.220 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCharting/[0-176089).parquet\n", - "2024-12-14 17:20:23.221 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.221246. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:23.221 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet\n", - "2024-12-14 17:20:23.221 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:23.222 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for respiratoryCharting/[0-176089).parquet\n", - "2024-12-14 17:20:23.226 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_charting_performed\n", - "2024-12-14 17:20:23.226 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respChartPerformedTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.226 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respChartPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.227 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_charting_entered\n", - "2024-12-14 17:20:23.227 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respChartEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.227 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respChartEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.227 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCharting/[0-176089).parquet\n", - "2024-12-14 17:20:23.268 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.047219\n", - "2024-12-14 17:20:23.268 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCharting/.[0-176089).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCharting/.[0-176089).parquet_cache/locks/2024-12-14T17:20:23.221246.json\n", - "2024-12-14 17:20:23.271 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet to events and saving to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalPeriodic/[0-1634960).parquet\n", - "2024-12-14 17:20:23.272 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:23.271971. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:23.272 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet\n", - "2024-12-14 17:20:23.272 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:23.272 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for vitalPeriodic/[0-1634960).parquet\n", - "2024-12-14 17:20:23.278 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting temperature\n", - "2024-12-14 17:20:23.279 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.279 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.279 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting saO2\n", - "2024-12-14 17:20:23.279 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.279 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.279 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting heartRate\n", - "2024-12-14 17:20:23.280 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.280 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.280 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting respiration\n", - "2024-12-14 17:20:23.280 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.281 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.281 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cvp\n", - "2024-12-14 17:20:23.281 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.281 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.281 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting etCo2\n", - "2024-12-14 17:20:23.282 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.282 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.282 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_systolic\n", - "2024-12-14 17:20:23.282 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.283 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.283 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_diastolic\n", - "2024-12-14 17:20:23.283 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.283 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.283 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_mean\n", - "2024-12-14 17:20:23.284 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.284 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.284 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_systolic\n", - "2024-12-14 17:20:23.284 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.284 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.285 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_diastolic\n", - "2024-12-14 17:20:23.285 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.285 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.285 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_mean\n", - "2024-12-14 17:20:23.285 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.286 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.286 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st1\n", - "2024-12-14 17:20:23.286 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.286 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.286 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st2\n", - "2024-12-14 17:20:23.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st3\n", - "2024-12-14 17:20:23.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ICP\n", - "2024-12-14 17:20:23.288 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", - "2024-12-14 17:20:23.288 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", - "2024-12-14 17:20:23.288 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalPeriodic/[0-1634960).parquet\n", - "2024-12-14 17:20:24.698 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:01.426391\n", - "2024-12-14 17:20:24.698 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalPeriodic/.[0-1634960).parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalPeriodic/.[0-1634960).parquet_cache/locks/2024-12-14T17:20:23.271971.json\n", - "2024-12-14 17:20:24.698 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:823 - Subsharded into converted events.\n", + "2024-12-14 23:39:03.513 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"labResultDrawnTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:03.514 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/lab/[0-434660).parquet\n", + "2024-12-14 23:39:03.675 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.176762\n", + "2024-12-14 23:39:03.675 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/lab/.[0-434660).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/lab/.[0-434660).parquet_cache/locks/2024-12-14T23:39:03.498480.json\n", + "2024-12-14 23:39:03.678 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/patient/[0-2520).parquet\n", + "2024-12-14 23:39:03.679 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:03.679061. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:03.679 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet\n", + "2024-12-14 23:39:03.679 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:03.680 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for patient/[0-2520).parquet\n", + "2024-12-14 23:39:03.692 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", + "2024-12-14 23:39:03.693 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - dateofbirth should already be in Date/time format\n", + "2024-12-14 23:39:03.693 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dateofbirth\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:03.693 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", + "2024-12-14 23:39:03.694 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", + "2024-12-14 23:39:03.694 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", + "2024-12-14 23:39:03.694 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ethnicity\n", + "2024-12-14 23:39:03.695 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column ethnicity\n", + "2024-12-14 23:39:03.695 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", + "2024-12-14 23:39:03.695 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hosp_admission\n", + "2024-12-14 23:39:03.695 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalteachingstatus\n", + "2024-12-14 23:39:03.696 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaladmitsource\n", + "2024-12-14 23:39:03.696 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalnumbedscategory\n", + "2024-12-14 23:39:03.696 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalregion\n", + "2024-12-14 23:39:03.696 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - hospitaladmittimestamp should already be in Date/time format\n", + "2024-12-14 23:39:03.696 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hospitaladmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:03.696 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hosp_discharge\n", + "2024-12-14 23:39:03.697 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaldischargestatus\n", + "2024-12-14 23:39:03.697 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaldischargelocation\n", + "2024-12-14 23:39:03.697 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - hospitaldischargetimestamp should already be in Date/time format\n", + "2024-12-14 23:39:03.697 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hospitaldischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:03.697 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission\n", + "2024-12-14 23:39:03.698 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitstaytype\n", + "2024-12-14 23:39:03.698 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitadmitsource\n", + "2024-12-14 23:39:03.698 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", + "2024-12-14 23:39:03.698 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:03.698 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission_weight\n", + "2024-12-14 23:39:03.699 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", + "2024-12-14 23:39:03.699 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:03.699 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission_height\n", + "2024-12-14 23:39:03.700 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", + "2024-12-14 23:39:03.700 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:03.700 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_discharge\n", + "2024-12-14 23:39:03.700 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitdischargelocation\n", + "2024-12-14 23:39:03.701 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitdischargestatus\n", + "2024-12-14 23:39:03.701 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitdischargetimestamp should already be in Date/time format\n", + "2024-12-14 23:39:03.701 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitdischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:03.701 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_discharge_weight\n", + "2024-12-14 23:39:03.701 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitdischargetimestamp should already be in Date/time format\n", + "2024-12-14 23:39:03.701 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitdischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:03.702 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/patient/[0-2520).parquet\n", + "2024-12-14 23:39:03.742 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.063086\n", + "2024-12-14 23:39:03.742 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/patient/.[0-2520).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/patient/.[0-2520).parquet_cache/locks/2024-12-14T23:39:03.679061.json\n", + "2024-12-14 23:39:03.744 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 23:39:03.745 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:03.744919. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:03.745 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 23:39:03.745 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:03.746 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseCharting/[0-1477163).parquet\n", + "2024-12-14 23:39:03.755 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_charting_performed\n", + "2024-12-14 23:39:03.756 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nursingChartPerformedTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:03.757 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nursingChartPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:03.757 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_charting_entered\n", + "2024-12-14 23:39:03.757 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nursingChartEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:03.758 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nursingChartEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:03.758 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 23:39:04.884 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:01.139674\n", + "2024-12-14 23:39:04.884 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCharting/.[0-1477163).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCharting/.[0-1477163).parquet_cache/locks/2024-12-14T23:39:03.744919.json\n", + "2024-12-14 23:39:04.885 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 23:39:04.886 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:04.886413. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:04.886 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 23:39:04.887 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:04.887 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanGoal/[0-3633).parquet\n", + "2024-12-14 23:39:04.893 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplGoal\n", + "2024-12-14 23:39:04.893 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalstatus\n", + "2024-12-14 23:39:04.893 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalcategory\n", + "2024-12-14 23:39:04.893 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalvalue\n", + "2024-12-14 23:39:04.894 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanGoalEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:04.894 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanGoalEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:04.894 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 23:39:04.904 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.018455\n", + "2024-12-14 23:39:04.905 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGoal/.[0-3633).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGoal/.[0-3633).parquet_cache/locks/2024-12-14T23:39:04.886413.json\n", + "2024-12-14 23:39:04.907 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 23:39:04.909 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:04.909015. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:04.909 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 23:39:04.910 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:04.910 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 23:39:04.919 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_charting_performed\n", + "2024-12-14 23:39:04.919 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respChartPerformedTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:04.920 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respChartPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:04.920 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_charting_entered\n", + "2024-12-14 23:39:04.921 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respChartEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:04.921 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respChartEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:04.922 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 23:39:05.009 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.100303\n", + "2024-12-14 23:39:05.009 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCharting/.[0-176089).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCharting/.[0-176089).parquet_cache/locks/2024-12-14T23:39:04.909015.json\n", + "2024-12-14 23:39:05.011 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 23:39:05.011 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:05.011822. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:05.012 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 23:39:05.012 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:05.013 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for respiratoryCare/[0-5436).parquet\n", + "2024-12-14 23:39:05.019 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_care_status\n", + "2024-12-14 23:39:05.020 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respCareStatusEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.023 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respCareStatusEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.024 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting vent_start\n", + "2024-12-14 23:39:05.024 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - ventStartTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.024 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"ventStartTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.024 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting vent_end\n", + "2024-12-14 23:39:05.025 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - ventEndTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.025 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"ventEndTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.025 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 23:39:05.051 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.039451\n", + "2024-12-14 23:39:05.051 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCare/.[0-5436).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCare/.[0-5436).parquet_cache/locks/2024-12-14T23:39:05.011822.json\n", + "2024-12-14 23:39:05.054 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 23:39:05.055 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:05.055380. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:05.055 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 23:39:05.055 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:05.056 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 23:39:05.063 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_systolic\n", + "2024-12-14 23:39:05.063 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.064 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.064 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_diastolic\n", + "2024-12-14 23:39:05.064 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.065 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.065 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_mean\n", + "2024-12-14 23:39:05.065 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.066 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.066 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting paop\n", + "2024-12-14 23:39:05.066 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.066 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.067 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cardiac_output\n", + "2024-12-14 23:39:05.067 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.067 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.067 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cardiac_input\n", + "2024-12-14 23:39:05.068 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.068 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.068 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting svr\n", + "2024-12-14 23:39:05.069 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.069 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.069 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting svri\n", + "2024-12-14 23:39:05.070 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.070 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.070 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pvr\n", + "2024-12-14 23:39:05.071 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.071 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.071 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pvri\n", + "2024-12-14 23:39:05.071 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.072 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.072 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 23:39:05.321 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.266513\n", + "2024-12-14 23:39:05.322 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalAperiodic/.[0-274088).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalAperiodic/.[0-274088).parquet_cache/locks/2024-12-14T23:39:05.055380.json\n", + "2024-12-14 23:39:05.323 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/medication/[0-75604).parquet\n", + "2024-12-14 23:39:05.324 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:05.324044. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:05.324 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet\n", + "2024-12-14 23:39:05.324 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:05.325 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for medication/[0-75604).parquet\n", + "2024-12-14 23:39:05.331 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_ordered\n", + "2024-12-14 23:39:05.331 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", + "2024-12-14 23:39:05.332 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugordertimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.332 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugordertimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.333 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_started\n", + "2024-12-14 23:39:05.333 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", + "2024-12-14 23:39:05.333 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugstarttimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.333 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugstarttimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.333 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_stopped\n", + "2024-12-14 23:39:05.334 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", + "2024-12-14 23:39:05.334 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugstoptimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.334 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugstoptimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.335 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/medication/[0-75604).parquet\n", + "2024-12-14 23:39:05.411 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.087250\n", + "2024-12-14 23:39:05.411 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/medication/.[0-75604).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/medication/.[0-75604).parquet_cache/locks/2024-12-14T23:39:05.324044.json\n", + "2024-12-14 23:39:05.412 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCare/[0-42080).parquet\n", + "2024-12-14 23:39:05.413 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:05.413230. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:05.413 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet\n", + "2024-12-14 23:39:05.413 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:05.414 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseCare/[0-42080).parquet\n", + "2024-12-14 23:39:05.420 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_care_performed\n", + "2024-12-14 23:39:05.421 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseCarePerformedTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.421 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseCarePerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.421 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_care_entered\n", + "2024-12-14 23:39:05.422 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseCareEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.422 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseCareEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.422 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCare/[0-42080).parquet\n", + "2024-12-14 23:39:05.456 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.042881\n", + "2024-12-14 23:39:05.456 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCare/.[0-42080).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCare/.[0-42080).parquet_cache/locks/2024-12-14T23:39:05.413230.json\n", + "2024-12-14 23:39:05.457 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/admissionDx/[0-7578).parquet\n", + "2024-12-14 23:39:05.457 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:05.457731. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:05.458 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet\n", + "2024-12-14 23:39:05.458 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:05.458 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for admissionDx/[0-7578).parquet\n", + "2024-12-14 23:39:05.464 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission_diagnosis\n", + "2024-12-14 23:39:05.464 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admitdxname\n", + "2024-12-14 23:39:05.465 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - admitDxEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.465 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admitDxEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.465 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/admissionDx/[0-7578).parquet\n", + "2024-12-14 23:39:05.481 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.024060\n", + "2024-12-14 23:39:05.481 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/admissionDx/.[0-7578).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/admissionDx/.[0-7578).parquet_cache/locks/2024-12-14T23:39:05.457731.json\n", + "2024-12-14 23:39:05.482 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/physicalExam/[0-84058).parquet\n", + "2024-12-14 23:39:05.483 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:05.483301. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:05.483 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet\n", + "2024-12-14 23:39:05.483 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:05.484 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for physicalExam/[0-84058).parquet\n", + "2024-12-14 23:39:05.489 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting physical_exam_entered\n", + "2024-12-14 23:39:05.490 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - physicalExamEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.490 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"physicalExamEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.490 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/physicalExam/[0-84058).parquet\n", + "2024-12-14 23:39:05.520 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.036863\n", + "2024-12-14 23:39:05.520 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/physicalExam/.[0-84058).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/physicalExam/.[0-84058).parquet_cache/locks/2024-12-14T23:39:05.483301.json\n", + "2024-12-14 23:39:05.521 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/diagnosis/[0-24978).parquet\n", + "2024-12-14 23:39:05.522 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:05.522549. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:05.523 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet\n", + "2024-12-14 23:39:05.523 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:05.524 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for diagnosis/[0-24978).parquet\n", + "2024-12-14 23:39:05.533 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", + "2024-12-14 23:39:05.534 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd9code\n", + "2024-12-14 23:39:05.534 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column diagnosispriority\n", + "2024-12-14 23:39:05.534 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - diagnosisEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.534 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"diagnosisEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.534 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/diagnosis/[0-24978).parquet\n", + "2024-12-14 23:39:05.552 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.029433\n", + "2024-12-14 23:39:05.552 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/diagnosis/.[0-24978).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/diagnosis/.[0-24978).parquet_cache/locks/2024-12-14T23:39:05.522549.json\n", + "2024-12-14 23:39:05.552 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanEOL/[0-15).parquet\n", + "2024-12-14 23:39:05.553 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:05.553446. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:05.553 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet\n", + "2024-12-14 23:39:05.554 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:05.554 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanEOL/[0-15).parquet\n", + "2024-12-14 23:39:05.560 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplEolDiscussion\n", + "2024-12-14 23:39:05.560 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanEolDiscussionOccurredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.560 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanEolDiscussionOccurredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.560 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanEOL/[0-15).parquet\n", + "2024-12-14 23:39:05.566 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.012657\n", + "2024-12-14 23:39:05.566 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanEOL/.[0-15).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanEOL/.[0-15).parquet_cache/locks/2024-12-14T23:39:05.553446.json\n", + "2024-12-14 23:39:05.567 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/pastHistory/[0-12109).parquet\n", + "2024-12-14 23:39:05.568 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:05.568603. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:05.569 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet\n", + "2024-12-14 23:39:05.569 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:05.569 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for pastHistory/[0-12109).parquet\n", + "2024-12-14 23:39:05.577 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting past_history_taken\n", + "2024-12-14 23:39:05.577 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - pastHistoryTakenTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.578 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"pastHistoryTakenTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.578 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting past_history_entered\n", + "2024-12-14 23:39:05.579 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - pastHistoryEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.580 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"pastHistoryEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.580 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/pastHistory/[0-12109).parquet\n", + "2024-12-14 23:39:05.607 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.038943\n", + "2024-12-14 23:39:05.607 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/pastHistory/.[0-12109).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/pastHistory/.[0-12109).parquet_cache/locks/2024-12-14T23:39:05.568603.json\n", + "2024-12-14 23:39:05.608 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/treatment/[0-38290).parquet\n", + "2024-12-14 23:39:05.609 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:05.609104. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:05.609 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet\n", + "2024-12-14 23:39:05.609 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:05.610 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for treatment/[0-38290).parquet\n", + "2024-12-14 23:39:05.615 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting treatment\n", + "2024-12-14 23:39:05.616 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column treatmentstring\n", + "2024-12-14 23:39:05.616 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - treatmentEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.616 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"treatmentEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.616 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/treatment/[0-38290).parquet\n", + "2024-12-14 23:39:05.631 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.022700\n", + "2024-12-14 23:39:05.632 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/treatment/.[0-38290).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/treatment/.[0-38290).parquet_cache/locks/2024-12-14T23:39:05.609104.json\n", + "2024-12-14 23:39:05.633 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 23:39:05.634 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:05.634678. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:05.635 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 23:39:05.635 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:05.635 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseAssessment/[0-91589).parquet\n", + "2024-12-14 23:39:05.645 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_assessment_performed\n", + "2024-12-14 23:39:05.646 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseAssessPerformedTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.646 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseAssessPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.647 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_assessment_entered\n", + "2024-12-14 23:39:05.647 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseAssessEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.648 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseAssessEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.648 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 23:39:05.694 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.060287\n", + "2024-12-14 23:39:05.695 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseAssessment/.[0-91589).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseAssessment/.[0-91589).parquet_cache/locks/2024-12-14T23:39:05.634678.json\n", + "2024-12-14 23:39:05.696 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/allergy/[0-2475).parquet\n", + "2024-12-14 23:39:05.696 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:05.696688. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:05.697 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet\n", + "2024-12-14 23:39:05.697 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:05.697 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for allergy/[0-2475).parquet\n", + "2024-12-14 23:39:05.703 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting allergy\n", + "2024-12-14 23:39:05.704 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column allergytype\n", + "2024-12-14 23:39:05.704 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column allergyname\n", + "2024-12-14 23:39:05.704 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - allergyEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.704 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"allergyEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.704 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/allergy/[0-2475).parquet\n", + "2024-12-14 23:39:05.710 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.013415\n", + "2024-12-14 23:39:05.710 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/allergy/.[0-2475).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/allergy/.[0-2475).parquet_cache/locks/2024-12-14T23:39:05.696688.json\n", + "2024-12-14 23:39:05.711 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 23:39:05.712 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:05.712038. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:05.712 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 23:39:05.712 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:05.713 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 23:39:05.719 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplInfectDisease\n", + "2024-12-14 23:39:05.719 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column infectdiseaseassessment\n", + "2024-12-14 23:39:05.720 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column infectdiseasesite\n", + "2024-12-14 23:39:05.720 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column responsetotherapy\n", + "2024-12-14 23:39:05.720 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column treatment\n", + "2024-12-14 23:39:05.720 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanInfectDiseaseEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.720 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanInfectDiseaseEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.720 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 23:39:05.737 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.024961\n", + "2024-12-14 23:39:05.737 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanInfectiousDisease/.[0-112).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanInfectiousDisease/.[0-112).parquet_cache/locks/2024-12-14T23:39:05.712038.json\n", + "2024-12-14 23:39:05.738 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 23:39:05.738 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:05.738756. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:05.739 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 23:39:05.739 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:05.740 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 23:39:05.746 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplItem\n", + "2024-12-14 23:39:05.747 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgroup\n", + "2024-12-14 23:39:05.747 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplitemvalue\n", + "2024-12-14 23:39:05.747 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanGeneralItemEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.747 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanGeneralItemEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.747 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 23:39:05.757 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.018518\n", + "2024-12-14 23:39:05.757 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGeneral/.[0-33148).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGeneral/.[0-33148).parquet_cache/locks/2024-12-14T23:39:05.738756.json\n", + "2024-12-14 23:39:05.764 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 23:39:05.765 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:05.764840. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:05.765 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 23:39:05.765 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:05.766 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 23:39:05.775 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting temperature\n", + "2024-12-14 23:39:05.775 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.776 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.776 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting saO2\n", + "2024-12-14 23:39:05.776 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.777 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.777 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting heartRate\n", + "2024-12-14 23:39:05.777 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.778 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.778 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting respiration\n", + "2024-12-14 23:39:05.778 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.778 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.779 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cvp\n", + "2024-12-14 23:39:05.779 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.779 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.780 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting etCo2\n", + "2024-12-14 23:39:05.780 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.780 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.780 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_systolic\n", + "2024-12-14 23:39:05.781 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.782 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.782 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_diastolic\n", + "2024-12-14 23:39:05.783 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.784 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.784 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_mean\n", + "2024-12-14 23:39:05.784 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.785 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.785 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_systolic\n", + "2024-12-14 23:39:05.786 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.787 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.787 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_diastolic\n", + "2024-12-14 23:39:05.788 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.789 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.791 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_mean\n", + "2024-12-14 23:39:05.792 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.792 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.793 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st1\n", + "2024-12-14 23:39:05.793 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.794 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.794 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st2\n", + "2024-12-14 23:39:05.795 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.795 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.796 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st3\n", + "2024-12-14 23:39:05.796 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.797 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.797 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ICP\n", + "2024-12-14 23:39:05.798 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:05.798 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:05.799 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 23:39:08.319 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:02.554513\n", + "2024-12-14 23:39:08.319 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalPeriodic/.[0-1634960).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalPeriodic/.[0-1634960).parquet_cache/locks/2024-12-14T23:39:05.764840.json\n", + "2024-12-14 23:39:08.322 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/lab/[0-434660).parquet\n", + "2024-12-14 23:39:08.323 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:08.322754. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:08.323 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet\n", + "2024-12-14 23:39:08.323 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:08.324 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for lab/[0-434660).parquet\n", + "2024-12-14 23:39:08.332 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", + "2024-12-14 23:39:08.332 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labmeasurenamesystem\n", + "2024-12-14 23:39:08.332 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labmeasurenameinterface\n", + "2024-12-14 23:39:08.333 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labname\n", + "2024-12-14 23:39:08.333 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - labResultDrawnTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:08.333 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"labResultDrawnTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:08.333 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/lab/[0-434660).parquet\n", + "2024-12-14 23:39:08.425 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.103167\n", + "2024-12-14 23:39:08.426 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/lab/.[0-434660).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/lab/.[0-434660).parquet_cache/locks/2024-12-14T23:39:08.322754.json\n", + "2024-12-14 23:39:08.428 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/patient/[0-2520).parquet\n", + "2024-12-14 23:39:08.429 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:08.428997. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:08.429 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet\n", + "2024-12-14 23:39:08.429 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:08.430 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for patient/[0-2520).parquet\n", + "2024-12-14 23:39:08.440 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", + "2024-12-14 23:39:08.441 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - dateofbirth should already be in Date/time format\n", + "2024-12-14 23:39:08.441 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dateofbirth\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:08.441 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", + "2024-12-14 23:39:08.442 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", + "2024-12-14 23:39:08.442 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", + "2024-12-14 23:39:08.442 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ethnicity\n", + "2024-12-14 23:39:08.442 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column ethnicity\n", + "2024-12-14 23:39:08.442 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", + "2024-12-14 23:39:08.443 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hosp_admission\n", + "2024-12-14 23:39:08.443 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalteachingstatus\n", + "2024-12-14 23:39:08.443 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaladmitsource\n", + "2024-12-14 23:39:08.443 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalnumbedscategory\n", + "2024-12-14 23:39:08.444 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalregion\n", + "2024-12-14 23:39:08.444 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - hospitaladmittimestamp should already be in Date/time format\n", + "2024-12-14 23:39:08.444 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hospitaladmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:08.444 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hosp_discharge\n", + "2024-12-14 23:39:08.445 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaldischargestatus\n", + "2024-12-14 23:39:08.445 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaldischargelocation\n", + "2024-12-14 23:39:08.446 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - hospitaldischargetimestamp should already be in Date/time format\n", + "2024-12-14 23:39:08.446 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hospitaldischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:08.446 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission\n", + "2024-12-14 23:39:08.446 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitstaytype\n", + "2024-12-14 23:39:08.446 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitadmitsource\n", + "2024-12-14 23:39:08.447 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", + "2024-12-14 23:39:08.447 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:08.447 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission_weight\n", + "2024-12-14 23:39:08.447 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", + "2024-12-14 23:39:08.448 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:08.448 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission_height\n", + "2024-12-14 23:39:08.448 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", + "2024-12-14 23:39:08.448 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:08.449 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_discharge\n", + "2024-12-14 23:39:08.449 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitdischargelocation\n", + "2024-12-14 23:39:08.449 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitdischargestatus\n", + "2024-12-14 23:39:08.449 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitdischargetimestamp should already be in Date/time format\n", + "2024-12-14 23:39:08.449 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitdischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:08.450 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_discharge_weight\n", + "2024-12-14 23:39:08.450 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitdischargetimestamp should already be in Date/time format\n", + "2024-12-14 23:39:08.450 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitdischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:08.451 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/patient/[0-2520).parquet\n", + "2024-12-14 23:39:08.483 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.054453\n", + "2024-12-14 23:39:08.483 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/patient/.[0-2520).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/patient/.[0-2520).parquet_cache/locks/2024-12-14T23:39:08.428997.json\n", + "2024-12-14 23:39:08.484 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 23:39:08.485 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:08.485522. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:08.486 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 23:39:08.486 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:08.486 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseCharting/[0-1477163).parquet\n", + "2024-12-14 23:39:08.495 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_charting_performed\n", + "2024-12-14 23:39:08.496 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nursingChartPerformedTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:08.496 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nursingChartPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:08.496 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_charting_entered\n", + "2024-12-14 23:39:08.497 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nursingChartEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:08.497 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nursingChartEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:08.498 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 23:39:09.156 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.671273\n", + "2024-12-14 23:39:09.157 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCharting/.[0-1477163).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCharting/.[0-1477163).parquet_cache/locks/2024-12-14T23:39:08.485522.json\n", + "2024-12-14 23:39:09.157 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 23:39:09.158 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:09.158363. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:09.158 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 23:39:09.158 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:09.159 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanGoal/[0-3633).parquet\n", + "2024-12-14 23:39:09.164 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplGoal\n", + "2024-12-14 23:39:09.165 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalstatus\n", + "2024-12-14 23:39:09.165 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalcategory\n", + "2024-12-14 23:39:09.165 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalvalue\n", + "2024-12-14 23:39:09.165 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanGoalEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.165 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanGoalEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.165 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 23:39:09.175 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.016704\n", + "2024-12-14 23:39:09.175 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGoal/.[0-3633).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGoal/.[0-3633).parquet_cache/locks/2024-12-14T23:39:09.158363.json\n", + "2024-12-14 23:39:09.178 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 23:39:09.178 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:09.178748. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:09.179 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 23:39:09.179 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:09.179 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 23:39:09.185 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_charting_performed\n", + "2024-12-14 23:39:09.185 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respChartPerformedTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.186 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respChartPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.186 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_charting_entered\n", + "2024-12-14 23:39:09.187 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respChartEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.187 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respChartEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.187 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCharting/[0-176089).parquet\n", + "2024-12-14 23:39:09.271 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.092834\n", + "2024-12-14 23:39:09.271 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCharting/.[0-176089).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCharting/.[0-176089).parquet_cache/locks/2024-12-14T23:39:09.178748.json\n", + "2024-12-14 23:39:09.273 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 23:39:09.274 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:09.273902. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:09.274 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 23:39:09.274 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:09.275 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for respiratoryCare/[0-5436).parquet\n", + "2024-12-14 23:39:09.281 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting resp_care_status\n", + "2024-12-14 23:39:09.282 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - respCareStatusEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.285 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"respCareStatusEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.285 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting vent_start\n", + "2024-12-14 23:39:09.286 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - ventStartTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.286 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"ventStartTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.286 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting vent_end\n", + "2024-12-14 23:39:09.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - ventEndTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.287 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"ventEndTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.287 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCare/[0-5436).parquet\n", + "2024-12-14 23:39:09.314 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.040903\n", + "2024-12-14 23:39:09.314 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCare/.[0-5436).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCare/.[0-5436).parquet_cache/locks/2024-12-14T23:39:09.273902.json\n", + "2024-12-14 23:39:09.319 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 23:39:09.320 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:09.320267. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:09.320 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 23:39:09.320 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:09.321 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 23:39:09.329 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_systolic\n", + "2024-12-14 23:39:09.330 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.330 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.330 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_diastolic\n", + "2024-12-14 23:39:09.331 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.331 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.331 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting non_invasive_mean\n", + "2024-12-14 23:39:09.332 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.332 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.332 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting paop\n", + "2024-12-14 23:39:09.332 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.333 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.333 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cardiac_output\n", + "2024-12-14 23:39:09.333 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.334 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.334 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cardiac_input\n", + "2024-12-14 23:39:09.334 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.334 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.335 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting svr\n", + "2024-12-14 23:39:09.335 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.335 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.336 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting svri\n", + "2024-12-14 23:39:09.336 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.336 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.336 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pvr\n", + "2024-12-14 23:39:09.337 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.337 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.337 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pvri\n", + "2024-12-14 23:39:09.338 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.338 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.338 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalAperiodic/[0-274088).parquet\n", + "2024-12-14 23:39:09.543 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.222946\n", + "2024-12-14 23:39:09.543 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalAperiodic/.[0-274088).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalAperiodic/.[0-274088).parquet_cache/locks/2024-12-14T23:39:09.320267.json\n", + "2024-12-14 23:39:09.544 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/medication/[0-75604).parquet\n", + "2024-12-14 23:39:09.545 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:09.545419. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:09.545 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/medication/[0-75604).parquet\n", + "2024-12-14 23:39:09.546 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:09.546 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for medication/[0-75604).parquet\n", + "2024-12-14 23:39:09.552 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_ordered\n", + "2024-12-14 23:39:09.553 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", + "2024-12-14 23:39:09.553 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugordertimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.554 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugordertimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.554 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_started\n", + "2024-12-14 23:39:09.555 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", + "2024-12-14 23:39:09.555 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugstarttimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.555 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugstarttimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.555 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting drug_stopped\n", + "2024-12-14 23:39:09.556 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column drugname\n", + "2024-12-14 23:39:09.556 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - drugstoptimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.556 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"drugstoptimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.556 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/medication/[0-75604).parquet\n", + "2024-12-14 23:39:09.637 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.091766\n", + "2024-12-14 23:39:09.637 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/medication/.[0-75604).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/medication/.[0-75604).parquet_cache/locks/2024-12-14T23:39:09.545419.json\n", + "2024-12-14 23:39:09.638 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCare/[0-42080).parquet\n", + "2024-12-14 23:39:09.639 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:09.639037. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:09.639 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCare/[0-42080).parquet\n", + "2024-12-14 23:39:09.639 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:09.640 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseCare/[0-42080).parquet\n", + "2024-12-14 23:39:09.646 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_care_performed\n", + "2024-12-14 23:39:09.646 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseCarePerformedTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.647 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseCarePerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.647 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_care_entered\n", + "2024-12-14 23:39:09.648 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseCareEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.648 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseCareEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.648 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCare/[0-42080).parquet\n", + "2024-12-14 23:39:09.675 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.036044\n", + "2024-12-14 23:39:09.675 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCare/.[0-42080).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCare/.[0-42080).parquet_cache/locks/2024-12-14T23:39:09.639037.json\n", + "2024-12-14 23:39:09.675 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/admissionDx/[0-7578).parquet\n", + "2024-12-14 23:39:09.676 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:09.676449. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:09.676 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/admissionDx/[0-7578).parquet\n", + "2024-12-14 23:39:09.676 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:09.677 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for admissionDx/[0-7578).parquet\n", + "2024-12-14 23:39:09.683 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting admission_diagnosis\n", + "2024-12-14 23:39:09.683 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column admitdxname\n", + "2024-12-14 23:39:09.683 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - admitDxEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.683 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"admitDxEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.684 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/admissionDx/[0-7578).parquet\n", + "2024-12-14 23:39:09.691 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.015114\n", + "2024-12-14 23:39:09.691 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/admissionDx/.[0-7578).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/admissionDx/.[0-7578).parquet_cache/locks/2024-12-14T23:39:09.676449.json\n", + "2024-12-14 23:39:09.692 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/physicalExam/[0-84058).parquet\n", + "2024-12-14 23:39:09.693 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:09.693671. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:09.694 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/physicalExam/[0-84058).parquet\n", + "2024-12-14 23:39:09.694 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:09.695 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for physicalExam/[0-84058).parquet\n", + "2024-12-14 23:39:09.700 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting physical_exam_entered\n", + "2024-12-14 23:39:09.701 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - physicalExamEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.701 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"physicalExamEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.701 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/physicalExam/[0-84058).parquet\n", + "2024-12-14 23:39:09.730 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.036897\n", + "2024-12-14 23:39:09.730 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/physicalExam/.[0-84058).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/physicalExam/.[0-84058).parquet_cache/locks/2024-12-14T23:39:09.693671.json\n", + "2024-12-14 23:39:09.731 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/diagnosis/[0-24978).parquet\n", + "2024-12-14 23:39:09.732 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:09.731879. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:09.732 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/diagnosis/[0-24978).parquet\n", + "2024-12-14 23:39:09.732 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:09.732 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for diagnosis/[0-24978).parquet\n", + "2024-12-14 23:39:09.740 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting diagnosis\n", + "2024-12-14 23:39:09.740 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column icd9code\n", + "2024-12-14 23:39:09.740 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column diagnosispriority\n", + "2024-12-14 23:39:09.741 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - diagnosisEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.741 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"diagnosisEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.741 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/diagnosis/[0-24978).parquet\n", + "2024-12-14 23:39:09.756 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.024460\n", + "2024-12-14 23:39:09.756 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/diagnosis/.[0-24978).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/diagnosis/.[0-24978).parquet_cache/locks/2024-12-14T23:39:09.731879.json\n", + "2024-12-14 23:39:09.757 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanEOL/[0-15).parquet\n", + "2024-12-14 23:39:09.757 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:09.757652. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:09.758 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanEOL/[0-15).parquet\n", + "2024-12-14 23:39:09.758 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:09.758 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanEOL/[0-15).parquet\n", + "2024-12-14 23:39:09.765 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplEolDiscussion\n", + "2024-12-14 23:39:09.765 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanEolDiscussionOccurredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.765 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanEolDiscussionOccurredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.765 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanEOL/[0-15).parquet\n", + "2024-12-14 23:39:09.769 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.011545\n", + "2024-12-14 23:39:09.769 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanEOL/.[0-15).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanEOL/.[0-15).parquet_cache/locks/2024-12-14T23:39:09.757652.json\n", + "2024-12-14 23:39:09.771 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/pastHistory/[0-12109).parquet\n", + "2024-12-14 23:39:09.772 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:09.772071. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:09.772 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/pastHistory/[0-12109).parquet\n", + "2024-12-14 23:39:09.772 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:09.773 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for pastHistory/[0-12109).parquet\n", + "2024-12-14 23:39:09.780 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting past_history_taken\n", + "2024-12-14 23:39:09.781 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - pastHistoryTakenTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.781 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"pastHistoryTakenTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.781 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting past_history_entered\n", + "2024-12-14 23:39:09.782 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - pastHistoryEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.782 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"pastHistoryEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.783 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/pastHistory/[0-12109).parquet\n", + "2024-12-14 23:39:09.803 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.031252\n", + "2024-12-14 23:39:09.803 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/pastHistory/.[0-12109).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/pastHistory/.[0-12109).parquet_cache/locks/2024-12-14T23:39:09.772071.json\n", + "2024-12-14 23:39:09.804 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/treatment/[0-38290).parquet\n", + "2024-12-14 23:39:09.805 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:09.805381. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:09.806 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/treatment/[0-38290).parquet\n", + "2024-12-14 23:39:09.806 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:09.806 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for treatment/[0-38290).parquet\n", + "2024-12-14 23:39:09.812 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting treatment\n", + "2024-12-14 23:39:09.812 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column treatmentstring\n", + "2024-12-14 23:39:09.813 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - treatmentEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.813 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"treatmentEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.813 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/treatment/[0-38290).parquet\n", + "2024-12-14 23:39:09.827 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.021636\n", + "2024-12-14 23:39:09.827 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/treatment/.[0-38290).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/treatment/.[0-38290).parquet_cache/locks/2024-12-14T23:39:09.805381.json\n", + "2024-12-14 23:39:09.828 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 23:39:09.828 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:09.828483. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:09.828 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 23:39:09.828 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:09.829 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseAssessment/[0-91589).parquet\n", + "2024-12-14 23:39:09.835 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_assessment_performed\n", + "2024-12-14 23:39:09.836 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseAssessPerformedTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.836 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseAssessPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.837 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_assessment_entered\n", + "2024-12-14 23:39:09.837 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nurseAssessEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.837 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nurseAssessEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.838 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseAssessment/[0-91589).parquet\n", + "2024-12-14 23:39:09.885 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.056697\n", + "2024-12-14 23:39:09.885 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseAssessment/.[0-91589).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseAssessment/.[0-91589).parquet_cache/locks/2024-12-14T23:39:09.828483.json\n", + "2024-12-14 23:39:09.886 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/allergy/[0-2475).parquet\n", + "2024-12-14 23:39:09.887 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:09.887366. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:09.887 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/allergy/[0-2475).parquet\n", + "2024-12-14 23:39:09.887 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:09.888 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for allergy/[0-2475).parquet\n", + "2024-12-14 23:39:09.893 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting allergy\n", + "2024-12-14 23:39:09.894 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column allergytype\n", + "2024-12-14 23:39:09.894 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column allergyname\n", + "2024-12-14 23:39:09.894 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - allergyEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.894 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"allergyEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.894 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/allergy/[0-2475).parquet\n", + "2024-12-14 23:39:09.903 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.016374\n", + "2024-12-14 23:39:09.903 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/allergy/.[0-2475).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/allergy/.[0-2475).parquet_cache/locks/2024-12-14T23:39:09.887366.json\n", + "2024-12-14 23:39:09.905 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 23:39:09.906 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:09.906396. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:09.907 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 23:39:09.907 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:09.908 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 23:39:09.915 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplInfectDisease\n", + "2024-12-14 23:39:09.915 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column infectdiseaseassessment\n", + "2024-12-14 23:39:09.915 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column infectdiseasesite\n", + "2024-12-14 23:39:09.916 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column responsetotherapy\n", + "2024-12-14 23:39:09.916 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column treatment\n", + "2024-12-14 23:39:09.916 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanInfectDiseaseEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.916 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanInfectDiseaseEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.916 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanInfectiousDisease/[0-112).parquet\n", + "2024-12-14 23:39:09.920 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.013937\n", + "2024-12-14 23:39:09.920 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanInfectiousDisease/.[0-112).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanInfectiousDisease/.[0-112).parquet_cache/locks/2024-12-14T23:39:09.906396.json\n", + "2024-12-14 23:39:09.921 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 23:39:09.921 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:09.921481. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:09.921 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 23:39:09.921 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:09.922 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 23:39:09.927 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplItem\n", + "2024-12-14 23:39:09.927 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgroup\n", + "2024-12-14 23:39:09.928 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplitemvalue\n", + "2024-12-14 23:39:09.928 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanGeneralItemEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.928 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanGeneralItemEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.928 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGeneral/[0-33148).parquet\n", + "2024-12-14 23:39:09.939 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.018197\n", + "2024-12-14 23:39:09.939 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGeneral/.[0-33148).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGeneral/.[0-33148).parquet_cache/locks/2024-12-14T23:39:09.921481.json\n", + "2024-12-14 23:39:09.946 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 23:39:09.947 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:09.947356. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:09.947 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 23:39:09.948 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:09.948 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 23:39:09.964 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting temperature\n", + "2024-12-14 23:39:09.964 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.965 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.965 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting saO2\n", + "2024-12-14 23:39:09.966 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.967 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.967 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting heartRate\n", + "2024-12-14 23:39:09.968 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.969 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.969 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting respiration\n", + "2024-12-14 23:39:09.970 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.970 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.970 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cvp\n", + "2024-12-14 23:39:09.971 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.972 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.972 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting etCo2\n", + "2024-12-14 23:39:09.973 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.974 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.974 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_systolic\n", + "2024-12-14 23:39:09.975 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.975 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.976 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_diastolic\n", + "2024-12-14 23:39:09.976 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.977 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.977 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting systemic_mean\n", + "2024-12-14 23:39:09.978 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.979 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.979 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_systolic\n", + "2024-12-14 23:39:09.980 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.981 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.981 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_diastolic\n", + "2024-12-14 23:39:09.982 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.983 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.983 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting pa_mean\n", + "2024-12-14 23:39:09.984 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.985 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.985 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st1\n", + "2024-12-14 23:39:09.986 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.987 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.987 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st2\n", + "2024-12-14 23:39:09.988 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.989 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.989 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting st3\n", + "2024-12-14 23:39:09.990 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.991 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.991 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ICP\n", + "2024-12-14 23:39:09.992 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - observationEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:09.993 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"observationEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:09.993 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 23:39:12.473 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:02.525682\n", + "2024-12-14 23:39:12.473 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalPeriodic/.[0-1634960).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalPeriodic/.[0-1634960).parquet_cache/locks/2024-12-14T23:39:09.947356.json\n", + "2024-12-14 23:39:12.475 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/lab/[0-434660).parquet\n", + "2024-12-14 23:39:12.476 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:12.476018. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:12.476 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/lab/[0-434660).parquet\n", + "2024-12-14 23:39:12.476 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:12.477 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for lab/[0-434660).parquet\n", + "2024-12-14 23:39:12.484 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting lab\n", + "2024-12-14 23:39:12.484 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labmeasurenamesystem\n", + "2024-12-14 23:39:12.484 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labmeasurenameinterface\n", + "2024-12-14 23:39:12.484 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column labname\n", + "2024-12-14 23:39:12.485 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - labResultDrawnTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:12.485 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"labResultDrawnTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:12.485 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/lab/[0-434660).parquet\n", + "2024-12-14 23:39:12.563 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.087227\n", + "2024-12-14 23:39:12.563 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/lab/.[0-434660).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/lab/.[0-434660).parquet_cache/locks/2024-12-14T23:39:12.476018.json\n", + "2024-12-14 23:39:12.565 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/patient/[0-2520).parquet\n", + "2024-12-14 23:39:12.566 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:12.566223. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:12.566 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/patient/[0-2520).parquet\n", + "2024-12-14 23:39:12.566 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:12.567 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for patient/[0-2520).parquet\n", + "2024-12-14 23:39:12.574 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting dob\n", + "2024-12-14 23:39:12.575 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - dateofbirth should already be in Date/time format\n", + "2024-12-14 23:39:12.575 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"dateofbirth\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:12.575 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting gender\n", + "2024-12-14 23:39:12.576 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column gender\n", + "2024-12-14 23:39:12.576 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", + "2024-12-14 23:39:12.576 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting ethnicity\n", + "2024-12-14 23:39:12.576 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column ethnicity\n", + "2024-12-14 23:39:12.576 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:493 - Adding null literate for time\n", + "2024-12-14 23:39:12.576 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hosp_admission\n", + "2024-12-14 23:39:12.577 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalteachingstatus\n", + "2024-12-14 23:39:12.577 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaladmitsource\n", + "2024-12-14 23:39:12.577 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalnumbedscategory\n", + "2024-12-14 23:39:12.577 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitalregion\n", + "2024-12-14 23:39:12.577 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - hospitaladmittimestamp should already be in Date/time format\n", + "2024-12-14 23:39:12.578 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hospitaladmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:12.578 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting hosp_discharge\n", + "2024-12-14 23:39:12.578 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaldischargestatus\n", + "2024-12-14 23:39:12.578 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column hospitaldischargelocation\n", + "2024-12-14 23:39:12.578 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - hospitaldischargetimestamp should already be in Date/time format\n", + "2024-12-14 23:39:12.578 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"hospitaldischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:12.579 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission\n", + "2024-12-14 23:39:12.579 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitstaytype\n", + "2024-12-14 23:39:12.579 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitadmitsource\n", + "2024-12-14 23:39:12.579 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", + "2024-12-14 23:39:12.580 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:12.580 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission_weight\n", + "2024-12-14 23:39:12.580 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", + "2024-12-14 23:39:12.580 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:12.580 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_admission_height\n", + "2024-12-14 23:39:12.581 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitadmittimestamp should already be in Date/time format\n", + "2024-12-14 23:39:12.581 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitadmittimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:12.581 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_discharge\n", + "2024-12-14 23:39:12.582 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitdischargelocation\n", + "2024-12-14 23:39:12.582 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column unitdischargestatus\n", + "2024-12-14 23:39:12.582 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitdischargetimestamp should already be in Date/time format\n", + "2024-12-14 23:39:12.582 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitdischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:12.582 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting unit_discharge_weight\n", + "2024-12-14 23:39:12.583 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - unitdischargetimestamp should already be in Date/time format\n", + "2024-12-14 23:39:12.583 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"unitdischargetimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:12.583 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/patient/[0-2520).parquet\n", + "2024-12-14 23:39:12.622 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.056145\n", + "2024-12-14 23:39:12.622 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/patient/.[0-2520).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/patient/.[0-2520).parquet_cache/locks/2024-12-14T23:39:12.566223.json\n", + "2024-12-14 23:39:12.623 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 23:39:12.624 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:12.624165. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:12.624 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 23:39:12.624 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:12.625 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for nurseCharting/[0-1477163).parquet\n", + "2024-12-14 23:39:12.630 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_charting_performed\n", + "2024-12-14 23:39:12.631 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nursingChartPerformedTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:12.631 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nursingChartPerformedTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:12.632 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting nurse_charting_entered\n", + "2024-12-14 23:39:12.632 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - nursingChartEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:12.633 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"nursingChartEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:12.633 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCharting/[0-1477163).parquet\n", + "2024-12-14 23:39:13.170 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.546149\n", + "2024-12-14 23:39:13.170 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCharting/.[0-1477163).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCharting/.[0-1477163).parquet_cache/locks/2024-12-14T23:39:12.624165.json\n", + "2024-12-14 23:39:13.171 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:799 - Converting /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet to events and saving to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 23:39:13.172 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:13.172006. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:13.172 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/shard_events/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 23:39:13.172 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:13.173 | INFO | MEDS_transforms.extract.convert_to_sharded_events:compute_fn:808 - Extracting events for carePlanGoal/[0-3633).parquet\n", + "2024-12-14 23:39:13.179 | INFO | MEDS_transforms.extract.convert_to_sharded_events:convert_to_events:730 - Building computational graph for extracting cplGoal\n", + "2024-12-14 23:39:13.180 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalstatus\n", + "2024-12-14 23:39:13.180 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalcategory\n", + "2024-12-14 23:39:13.180 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:470 - Extracting column cplgoalvalue\n", + "2024-12-14 23:39:13.180 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:488 - carePlanGoalEnteredTimestamp should already be in Date/time format\n", + "2024-12-14 23:39:13.180 | INFO | MEDS_transforms.extract.convert_to_sharded_events:extract_event:561 - Filtering out rows with null times via col(\"carePlanGoalEnteredTimestamp\").strict_cast(Datetime(Microseconds, None)).is_not_null()\n", + "2024-12-14 23:39:13.180 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGoal/[0-3633).parquet\n", + "2024-12-14 23:39:13.193 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.021035\n", + "2024-12-14 23:39:13.193 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGoal/.[0-3633).parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGoal/.[0-3633).parquet_cache/locks/2024-12-14T23:39:13.172006.json\n", + "2024-12-14 23:39:13.193 | INFO | MEDS_transforms.extract.convert_to_sharded_events:main:823 - Subsharded into converted events.\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:24.857\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: merge_to_MEDS_cohort\u001b[0m\n", - "\u001b[32m2024-12-14 17:20:24.863\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-merge_to_MEDS_cohort --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=merge_to_MEDS_cohort\u001b[0m\n", - "\u001b[32m2024-12-14 17:21:21.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 23:39:13.514\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: merge_to_MEDS_cohort\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:13.528\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-merge_to_MEDS_cohort --config-dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=merge_to_MEDS_cohort\u001b[0m\n", + "\u001b[32m2024-12-14 23:40:52.381\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:21:21.085\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 17:20:25.493 | INFO | MEDS_transforms.utils:stage_init:73 - Running merge_to_MEDS_cohort with the following configuration:\n", + "\u001b[32m2024-12-14 23:40:52.381\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 23:39:14.481 | INFO | MEDS_transforms.utils:stage_init:73 - Running merge_to_MEDS_cohort with the following configuration:\n", "input_dir: ${oc.env:EICU_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:EICU_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -7176,112 +7194,112 @@ "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", "\n", - "2024-12-14 17:20:25.508 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "2024-12-14 23:39:14.506 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", "unique_by: null\n", "additional_sort_by: null\n", "is_metadata: false\n", - "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events\n", - "metadata_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects\n", - "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort\n", + "data_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events\n", + "metadata_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort\n", "reducer_output_dir: null\n", "\n", "Paths: (checkbox indicates if it exists)\n", - " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events\n", - " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort\n", - " - metadata_input_dir: ❌ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects\n", - "2024-12-14 17:20:25.523 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator_by_shard_map:687 - Mapping computation over a maximum of 3 shards\n", - "2024-12-14 17:20:25.531 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", - "2024-12-14 17:20:25.532 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:25.531974. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:25.533 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0\n", - "2024-12-14 17:20:25.535 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 20 files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/patient/[0-2520).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/admissionDx/[0-7578).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/allergy/[0-2475).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGeneral/[0-33148).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanEOL/[0-15).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGoal/[0-3633).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanInfectiousDisease/[0-112).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/diagnosis/[0-24978).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/lab/[0-434660).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/medication/[0-75604).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseAssessment/[0-91589).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCare/[0-42080).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCharting/[0-1477163).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/pastHistory/[0-12109).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/physicalExam/[0-84058).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCare/[0-5436).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCharting/[0-176089).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/treatment/[0-38290).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalAperiodic/[0-274088).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalPeriodic/[0-1634960).parquet\n", - "2024-12-14 17:20:25.546 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:25.546 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", - "2024-12-14 17:20:39.416 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:13.884657\n", - "2024-12-14 17:20:39.417 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/held_out/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/held_out/.0.parquet_cache/locks/2024-12-14T17:20:25.531974.json\n", - "2024-12-14 17:20:39.418 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", - "2024-12-14 17:20:39.419 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:39.419421. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:39.419 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0\n", - "2024-12-14 17:20:39.421 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 20 files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/patient/[0-2520).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/admissionDx/[0-7578).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/allergy/[0-2475).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGeneral/[0-33148).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanEOL/[0-15).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGoal/[0-3633).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanInfectiousDisease/[0-112).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/diagnosis/[0-24978).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/lab/[0-434660).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/medication/[0-75604).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseAssessment/[0-91589).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCare/[0-42080).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCharting/[0-1477163).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/pastHistory/[0-12109).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/physicalExam/[0-84058).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCare/[0-5436).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCharting/[0-176089).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/treatment/[0-38290).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalAperiodic/[0-274088).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalPeriodic/[0-1634960).parquet\n", - "2024-12-14 17:20:39.432 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:39.433 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", - "2024-12-14 17:20:49.043 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:09.624267\n", - "2024-12-14 17:20:49.044 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/.0.parquet_cache/locks/2024-12-14T17:20:39.419421.json\n", - "2024-12-14 17:20:49.045 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0 into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", - "2024-12-14 17:20:49.046 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:20:49.045956. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:20:49.046 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0\n", - "2024-12-14 17:20:49.047 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 20 files:\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/patient/[0-2520).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/admissionDx/[0-7578).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/allergy/[0-2475).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGeneral/[0-33148).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanEOL/[0-15).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGoal/[0-3633).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanInfectiousDisease/[0-112).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/diagnosis/[0-24978).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/lab/[0-434660).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/medication/[0-75604).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseAssessment/[0-91589).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCare/[0-42080).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCharting/[0-1477163).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/pastHistory/[0-12109).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/physicalExam/[0-84058).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCare/[0-5436).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCharting/[0-176089).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/treatment/[0-38290).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalAperiodic/[0-274088).parquet\n", - " - /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalPeriodic/[0-1634960).parquet\n", - "2024-12-14 17:20:49.059 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:20:49.059 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", - "2024-12-14 17:21:20.691 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:31.645753\n", - "2024-12-14 17:21:20.692 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/.0.parquet_cache/locks/2024-12-14T17:20:49.045956.json\n", - "2024-12-14 17:21:20.693 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:55.184901\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events\n", + " - output_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort\n", + " - metadata_input_dir: ❌ /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects\n", + "2024-12-14 23:39:14.530 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator_by_shard_map:687 - Mapping computation over a maximum of 3 shards\n", + "2024-12-14 23:39:14.541 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0 into /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", + "2024-12-14 23:39:14.542 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:14.541989. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:14.543 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0\n", + "2024-12-14 23:39:14.547 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 20 files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/patient/[0-2520).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/admissionDx/[0-7578).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/allergy/[0-2475).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGeneral/[0-33148).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanEOL/[0-15).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanGoal/[0-3633).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/carePlanInfectiousDisease/[0-112).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/diagnosis/[0-24978).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/lab/[0-434660).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/medication/[0-75604).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseAssessment/[0-91589).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCare/[0-42080).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/nurseCharting/[0-1477163).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/pastHistory/[0-12109).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/physicalExam/[0-84058).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCare/[0-5436).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/respiratoryCharting/[0-176089).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/treatment/[0-38290).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalAperiodic/[0-274088).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/held_out/0/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 23:39:14.554 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:14.554 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", + "2024-12-14 23:39:39.028 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:24.486201\n", + "2024-12-14 23:39:39.029 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/held_out/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/held_out/.0.parquet_cache/locks/2024-12-14T23:39:14.541989.json\n", + "2024-12-14 23:39:39.031 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0 into /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", + "2024-12-14 23:39:39.033 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:39:39.032845. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:39:39.034 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0\n", + "2024-12-14 23:39:39.038 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 20 files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/patient/[0-2520).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/admissionDx/[0-7578).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/allergy/[0-2475).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGeneral/[0-33148).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanEOL/[0-15).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanGoal/[0-3633).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/carePlanInfectiousDisease/[0-112).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/diagnosis/[0-24978).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/lab/[0-434660).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/medication/[0-75604).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseAssessment/[0-91589).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCare/[0-42080).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/nurseCharting/[0-1477163).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/pastHistory/[0-12109).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/physicalExam/[0-84058).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCare/[0-5436).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/respiratoryCharting/[0-176089).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/treatment/[0-38290).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalAperiodic/[0-274088).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/tuning/0/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 23:39:39.044 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:39:39.045 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", + "2024-12-14 23:40:01.470 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:22.437199\n", + "2024-12-14 23:40:01.470 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/.0.parquet_cache/locks/2024-12-14T23:39:39.032845.json\n", + "2024-12-14 23:40:01.472 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0 into /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", + "2024-12-14 23:40:01.473 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:40:01.473220. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:40:01.474 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0\n", + "2024-12-14 23:40:01.477 | INFO | MEDS_transforms.extract.merge_to_MEDS_cohort:merge_subdirs_and_sort:204 - Reading 20 files:\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/patient/[0-2520).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/admissionDx/[0-7578).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/allergy/[0-2475).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGeneral/[0-33148).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanEOL/[0-15).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanGoal/[0-3633).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/carePlanInfectiousDisease/[0-112).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/diagnosis/[0-24978).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/lab/[0-434660).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/medication/[0-75604).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseAssessment/[0-91589).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCare/[0-42080).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/nurseCharting/[0-1477163).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/pastHistory/[0-12109).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/physicalExam/[0-84058).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCare/[0-5436).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/respiratoryCharting/[0-176089).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/treatment/[0-38290).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalAperiodic/[0-274088).parquet\n", + " - /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/convert_to_sharded_events/train/0/vitalPeriodic/[0-1634960).parquet\n", + "2024-12-14 23:40:01.484 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:40:01.485 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", + "2024-12-14 23:40:52.003 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:50.529674\n", + "2024-12-14 23:40:52.003 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/.0.parquet_cache/locks/2024-12-14T23:40:01.473220.json\n", + "2024-12-14 23:40:52.004 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:01:37.497362\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:21:21.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: finalize_MEDS_metadata\u001b[0m\n", - "\u001b[32m2024-12-14 17:21:21.143\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-finalize_MEDS_metadata --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=finalize_MEDS_metadata\u001b[0m\n", - "\u001b[32m2024-12-14 17:21:21.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 23:40:52.382\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: finalize_MEDS_metadata\u001b[0m\n", + "\u001b[32m2024-12-14 23:40:52.432\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-finalize_MEDS_metadata --config-dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=finalize_MEDS_metadata\u001b[0m\n", + "\u001b[32m2024-12-14 23:40:53.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:21:21.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 17:21:21.837 | INFO | MEDS_transforms.utils:stage_init:73 - Running finalize_MEDS_metadata with the following configuration:\n", + "\u001b[32m2024-12-14 23:40:53.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 23:40:53.110 | INFO | MEDS_transforms.utils:stage_init:73 - Running finalize_MEDS_metadata with the following configuration:\n", "input_dir: ${oc.env:EICU_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:EICU_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -7349,36 +7367,36 @@ "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", "\n", - "2024-12-14 17:21:21.853 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "2024-12-14 23:40:53.131 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", "is_metadata: true\n", "do_retype: true\n", - "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort\n", - "metadata_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects\n", - "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/finalize_MEDS_metadata\n", - "reducer_output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata\n", + "data_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort\n", + "metadata_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/finalize_MEDS_metadata\n", + "reducer_output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata\n", "train_only: true\n", "\n", "Paths: (checkbox indicates if it exists)\n", - " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort\n", - " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/finalize_MEDS_metadata\n", - " - metadata_input_dir: ❌ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects\n", - "2024-12-14 17:21:21.857 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:170 - Validating code metadata\n", - "2024-12-14 17:21:21.857 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:179 - No code metadata found at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects/codes.parquet. Making empty metadata file.\n", - "2024-12-14 17:21:21.859 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:183 - Writing finalized metadata df to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata/codes.parquet\n", - "2024-12-14 17:21:21.861 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:187 - Creating dataset metadata\n", - "2024-12-14 17:21:21.863 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:199 - Writing finalized dataset metadata to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata/dataset.json\n", - "2024-12-14 17:21:21.864 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:204 - Creating subject splits from {str(shards_map_fp.resolve())}\n", - "2024-12-14 17:21:21.865 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split train has 1087 subjects\n", - "2024-12-14 17:21:21.865 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split tuning has 544 subjects\n", - "2024-12-14 17:21:21.865 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split held_out has 543 subjects\n", - "2024-12-14 17:21:21.865 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:222 - Writing finalized subject splits to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata/subject_splits.parquet\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort\n", + " - output_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/finalize_MEDS_metadata\n", + " - metadata_input_dir: ❌ /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects\n", + "2024-12-14 23:40:53.137 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:170 - Validating code metadata\n", + "2024-12-14 23:40:53.137 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:179 - No code metadata found at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/split_and_shard_subjects/codes.parquet. Making empty metadata file.\n", + "2024-12-14 23:40:53.344 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:183 - Writing finalized metadata df to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata/codes.parquet\n", + "2024-12-14 23:40:53.345 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:187 - Creating dataset metadata\n", + "2024-12-14 23:40:53.348 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:199 - Writing finalized dataset metadata to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata/dataset.json\n", + "2024-12-14 23:40:53.349 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:204 - Creating subject splits from {str(shards_map_fp.resolve())}\n", + "2024-12-14 23:40:53.350 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split train has 1087 subjects\n", + "2024-12-14 23:40:53.350 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split tuning has 544 subjects\n", + "2024-12-14 23:40:53.350 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:217 - Split held_out has 543 subjects\n", + "2024-12-14 23:40:53.351 | INFO | MEDS_transforms.extract.finalize_MEDS_metadata:main:222 - Writing finalized subject splits to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata/subject_splits.parquet\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:21:21.906\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: finalize_MEDS_data\u001b[0m\n", - "\u001b[32m2024-12-14 17:21:21.913\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-finalize_MEDS_data --config-dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=finalize_MEDS_data\u001b[0m\n", - "\u001b[32m2024-12-14 17:21:45.018\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "\u001b[32m2024-12-14 23:40:53.452\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: finalize_MEDS_data\u001b[0m\n", + "\u001b[32m2024-12-14 23:40:53.472\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_extract-finalize_MEDS_data --config-dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/eICU_Example/configs --config-name=extract_eICU 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=finalize_MEDS_data\u001b[0m\n", + "\u001b[32m2024-12-14 23:41:31.150\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 17:21:45.019\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", - "2024-12-14 17:21:22.492 | INFO | MEDS_transforms.utils:stage_init:73 - Running finalize_MEDS_data with the following configuration:\n", + "\u001b[32m2024-12-14 23:41:31.150\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "2024-12-14 23:40:54.119 | INFO | MEDS_transforms.utils:stage_init:73 - Running finalize_MEDS_data with the following configuration:\n", "input_dir: ${oc.env:EICU_PRE_MEDS_DIR}\n", "cohort_dir: ${oc.env:EICU_MEDS_COHORT_DIR}\n", "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", @@ -7446,41 +7464,41 @@ "event_conversion_config_fp: ${oc.env:EVENT_CONVERSION_CONFIG_FP}\n", "shards_map_fp: ${cohort_dir}/metadata/.shards.json\n", "\n", - "2024-12-14 17:21:22.508 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "2024-12-14 23:40:54.141 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", "do_retype: true\n", "is_metadata: false\n", - "data_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort\n", - "metadata_input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata\n", - "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data\n", + "data_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort\n", + "metadata_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data\n", "reducer_output_dir: null\n", "\n", "Paths: (checkbox indicates if it exists)\n", - " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort\n", - " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data\n", - " - metadata_input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata\n", - "2024-12-14 17:21:22.524 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator:600 - Mapping computation over a maximum of 3 shards\n", - "2024-12-14 17:21:22.532 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/held_out/0.parquet\n", - "2024-12-14 17:21:22.533 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:21:22.533510. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:21:22.535 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", - "2024-12-14 17:21:22.535 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:21:24.217 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/held_out/0.parquet\n", - "2024-12-14 17:21:27.278 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:04.745218\n", - "2024-12-14 17:21:27.278 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/held_out/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/held_out/.0.parquet_cache/locks/2024-12-14T17:21:22.533510.json\n", - "2024-12-14 17:21:27.495 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/tuning/0.parquet\n", - "2024-12-14 17:21:27.495 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:21:27.495681. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:21:27.496 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", - "2024-12-14 17:21:27.496 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:21:28.862 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/tuning/0.parquet\n", - "2024-12-14 17:21:31.908 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:04.412390\n", - "2024-12-14 17:21:31.908 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/tuning/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/tuning/.0.parquet_cache/locks/2024-12-14T17:21:27.495681.json\n", - "2024-12-14 17:21:32.132 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/0.parquet into /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/train/0.parquet\n", - "2024-12-14 17:21:32.133 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 17:21:32.133050. Double checking no earlier locks have been registered.\n", - "2024-12-14 17:21:32.133 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", - "2024-12-14 17:21:32.133 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", - "2024-12-14 17:21:36.556 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/train/0.parquet\n", - "2024-12-14 17:21:43.984 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:11.851649\n", - "2024-12-14 17:21:43.984 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/train/.0.parquet_cache, but clearing lock at /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/train/.0.parquet_cache/locks/2024-12-14T17:21:32.133050.json\n", - "2024-12-14 17:21:44.856 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:22.347847\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort\n", + " - output_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data\n", + " - metadata_input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/metadata\n", + "2024-12-14 23:40:54.165 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator:600 - Mapping computation over a maximum of 3 shards\n", + "2024-12-14 23:40:54.176 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet into /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/held_out/0.parquet\n", + "2024-12-14 23:40:54.177 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:40:54.177220. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:40:54.178 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/held_out/0.parquet\n", + "2024-12-14 23:40:54.179 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:40:55.948 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/held_out/0.parquet\n", + "2024-12-14 23:41:03.087 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:08.910657\n", + "2024-12-14 23:41:03.088 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/held_out/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/held_out/.0.parquet_cache/locks/2024-12-14T23:40:54.177220.json\n", + "2024-12-14 23:41:03.720 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet into /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/tuning/0.parquet\n", + "2024-12-14 23:41:03.721 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:41:03.721072. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:41:03.721 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/tuning/0.parquet\n", + "2024-12-14 23:41:03.722 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:41:05.335 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/tuning/0.parquet\n", + "2024-12-14 23:41:11.800 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:08.079782\n", + "2024-12-14 23:41:11.801 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/tuning/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/tuning/.0.parquet_cache/locks/2024-12-14T23:41:03.721072.json\n", + "2024-12-14 23:41:12.356 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/0.parquet into /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/train/0.parquet\n", + "2024-12-14 23:41:12.358 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-14 23:41:12.358139. Double checking no earlier locks have been registered.\n", + "2024-12-14 23:41:12.359 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/merge_to_MEDS_cohort/train/0.parquet\n", + "2024-12-14 23:41:12.359 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-14 23:41:15.666 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/train/0.parquet\n", + "2024-12-14 23:41:29.647 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:17.288719\n", + "2024-12-14 23:41:29.648 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/train/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/eicu_demo/meds/data/train/.0.parquet_cache/locks/2024-12-14T23:41:12.358139.json\n", + "2024-12-14 23:41:30.813 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:36.671912\n", "\u001b[0m\n" ] } @@ -7500,7 +7518,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -7536,7 +7554,7 @@ "└────────────┴─────────────────────┴─────────────────────────────────┴───────────────┘" ] }, - "execution_count": 50, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -7556,7 +7574,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "dev", + "display_name": "meds_demo", "language": "python", "name": "python3" }, diff --git a/demo/meds_tab.ipynb b/demo/meds_tab.ipynb index 4ed2e0b..65a4198 100644 --- a/demo/meds_tab.ipynb +++ b/demo/meds_tab.ipynb @@ -16,57 +16,117 @@ "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: meds-tab in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (0.0.7)\n", - "Requirement already satisfied: polars<=1.17.1,>=1.6.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-tab) (1.17.1)\n", - "Requirement already satisfied: pyarrow in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-tab) (17.0.0)\n", - "Requirement already satisfied: loguru in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-tab) (0.7.3)\n", - "Requirement already satisfied: hydra-core==1.3.2 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-tab) (1.3.2)\n", - "Requirement already satisfied: numpy in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-tab) (2.2.0)\n", - "Requirement already satisfied: scipy<1.14.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-tab) (1.13.1)\n", - "Requirement already satisfied: pandas in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-tab) (2.2.3)\n", - "Requirement already satisfied: tqdm in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-tab) (4.67.1)\n", - "Requirement already satisfied: xgboost in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-tab) (2.1.3)\n", - "Requirement already satisfied: scikit-learn in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-tab) (1.6.0)\n", - "Requirement already satisfied: hydra-optuna-sweeper in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-tab) (1.2.0)\n", - "Requirement already satisfied: hydra-joblib-launcher in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-tab) (1.2.0)\n", - "Requirement already satisfied: ml-mixins in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-tab) (0.1.0)\n", - "Requirement already satisfied: meds>=0.3.3 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-tab) (0.3.3)\n", - "Requirement already satisfied: meds-transforms>=0.0.7 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-tab) (0.0.9)\n", - "Requirement already satisfied: omegaconf<2.4,>=2.2 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from hydra-core==1.3.2->meds-tab) (2.3.0)\n", - "Requirement already satisfied: antlr4-python3-runtime==4.9.* in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from hydra-core==1.3.2->meds-tab) (4.9.3)\n", - "Requirement already satisfied: packaging in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from hydra-core==1.3.2->meds-tab) (24.2)\n", - "Requirement already satisfied: jsonschema>=4.0.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds>=0.3.3->meds-tab) (4.23.0)\n", - "Requirement already satisfied: typing-extensions>=4.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds>=0.3.3->meds-tab) (4.12.2)\n", - "Requirement already satisfied: nested_ragged_tensors>=0.0.8 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds-transforms>=0.0.7->meds-tab) (0.1)\n", - "Requirement already satisfied: joblib>=0.14.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from hydra-joblib-launcher->meds-tab) (1.4.2)\n", - "Requirement already satisfied: optuna<3.0.0,>=2.10.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from hydra-optuna-sweeper->meds-tab) (2.10.1)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from pandas->meds-tab) (2.9.0.post0)\n", - "Requirement already satisfied: pytz>=2020.1 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from pandas->meds-tab) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from pandas->meds-tab) (2024.2)\n", - "Requirement already satisfied: threadpoolctl>=3.1.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from scikit-learn->meds-tab) (3.5.0)\n", - "Requirement already satisfied: attrs>=22.2.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds>=0.3.3->meds-tab) (24.2.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds>=0.3.3->meds-tab) (2024.10.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds>=0.3.3->meds-tab) (0.35.1)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds>=0.3.3->meds-tab) (0.22.3)\n", - "Requirement already satisfied: safetensors in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from nested_ragged_tensors>=0.0.8->meds-transforms>=0.0.7->meds-tab) (0.4.5)\n", - "Requirement already satisfied: PyYAML>=5.1.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from omegaconf<2.4,>=2.2->hydra-core==1.3.2->meds-tab) (6.0.2)\n", - "Requirement already satisfied: alembic in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (1.14.0)\n", - "Requirement already satisfied: cliff in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (4.8.0)\n", - "Requirement already satisfied: cmaes>=0.8.2 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (0.11.1)\n", - "Requirement already satisfied: colorlog in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (6.9.0)\n", - "Requirement already satisfied: sqlalchemy>=1.1.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (2.0.36)\n", - "Requirement already satisfied: six>=1.5 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas->meds-tab) (1.17.0)\n", - "Requirement already satisfied: greenlet!=0.4.17 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from sqlalchemy>=1.1.0->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (3.1.1)\n", - "Requirement already satisfied: Mako in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from alembic->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (1.3.8)\n", - "Requirement already satisfied: PrettyTable>=0.7.2 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from cliff->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (3.12.0)\n", - "Requirement already satisfied: autopage>=0.4.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from cliff->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (0.5.2)\n", - "Requirement already satisfied: cmd2>=1.0.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from cliff->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (2.5.7)\n", - "Requirement already satisfied: stevedore>=2.0.1 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from cliff->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (5.4.0)\n", - "Requirement already satisfied: gnureadline in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from cmd2>=1.0.0->cliff->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (8.2.13)\n", - "Requirement already satisfied: pyperclip in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from cmd2>=1.0.0->cliff->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (1.9.0)\n", - "Requirement already satisfied: wcwidth in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from cmd2>=1.0.0->cliff->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (0.2.13)\n", - "Requirement already satisfied: pbr>=2.0.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from stevedore>=2.0.1->cliff->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (6.1.0)\n", - "Requirement already satisfied: MarkupSafe>=0.9.2 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from Mako->alembic->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (3.0.2)\n" + "Collecting meds-tab\n", + " Downloading meds_tab-0.0.7-py3-none-any.whl.metadata (16 kB)\n", + "Requirement already satisfied: polars<=1.17.1,>=1.6.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from meds-tab) (1.17.1)\n", + "Requirement already satisfied: pyarrow in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from meds-tab) (17.0.0)\n", + "Requirement already satisfied: loguru in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from meds-tab) (0.7.3)\n", + "Requirement already satisfied: hydra-core==1.3.2 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from meds-tab) (1.3.2)\n", + "Requirement already satisfied: numpy in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from meds-tab) (2.2.0)\n", + "Collecting scipy<1.14.0 (from meds-tab)\n", + " Downloading scipy-1.13.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)\n", + "Collecting pandas (from meds-tab)\n", + " Using cached pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)\n", + "Requirement already satisfied: tqdm in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from meds-tab) (4.67.1)\n", + "Collecting xgboost (from meds-tab)\n", + " Downloading xgboost-2.1.3-py3-none-manylinux_2_28_x86_64.whl.metadata (2.1 kB)\n", + "Collecting scikit-learn (from meds-tab)\n", + " Downloading scikit_learn-1.6.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)\n", + "Collecting hydra-optuna-sweeper (from meds-tab)\n", + " Using cached hydra_optuna_sweeper-1.2.0-py3-none-any.whl.metadata (1.0 kB)\n", + "Collecting hydra-joblib-launcher (from meds-tab)\n", + " Using cached hydra_joblib_launcher-1.2.0-py3-none-any.whl.metadata (1.0 kB)\n", + "Collecting ml-mixins (from meds-tab)\n", + " Using cached ml_mixins-0.1.0-py3-none-any.whl.metadata (3.0 kB)\n", + "Requirement already satisfied: meds>=0.3.3 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from meds-tab) (0.3.3)\n", + "Requirement already satisfied: meds-transforms>=0.0.7 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from meds-tab) (0.0.9)\n", + "Requirement already satisfied: omegaconf<2.4,>=2.2 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from hydra-core==1.3.2->meds-tab) (2.3.0)\n", + "Requirement already satisfied: antlr4-python3-runtime==4.9.* in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from hydra-core==1.3.2->meds-tab) (4.9.3)\n", + "Requirement already satisfied: packaging in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from hydra-core==1.3.2->meds-tab) (24.2)\n", + "Requirement already satisfied: jsonschema>=4.0.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from meds>=0.3.3->meds-tab) (4.23.0)\n", + "Requirement already satisfied: typing-extensions>=4.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from meds>=0.3.3->meds-tab) (4.12.2)\n", + "Requirement already satisfied: nested_ragged_tensors>=0.0.8 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from meds-transforms>=0.0.7->meds-tab) (0.1)\n", + "Collecting joblib>=0.14.0 (from hydra-joblib-launcher->meds-tab)\n", + " Using cached joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)\n", + "Collecting optuna<3.0.0,>=2.10.0 (from hydra-optuna-sweeper->meds-tab)\n", + " Using cached optuna-2.10.1-py3-none-any.whl.metadata (15 kB)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from pandas->meds-tab) (2.9.0.post0)\n", + "Collecting pytz>=2020.1 (from pandas->meds-tab)\n", + " Using cached pytz-2024.2-py2.py3-none-any.whl.metadata (22 kB)\n", + "Collecting tzdata>=2022.7 (from pandas->meds-tab)\n", + " Using cached tzdata-2024.2-py2.py3-none-any.whl.metadata (1.4 kB)\n", + "Collecting threadpoolctl>=3.1.0 (from scikit-learn->meds-tab)\n", + " Using cached threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)\n", + "Collecting nvidia-nccl-cu12 (from xgboost->meds-tab)\n", + " Using cached nvidia_nccl_cu12-2.23.4-py3-none-manylinux2014_x86_64.whl.metadata (1.8 kB)\n", + "Requirement already satisfied: attrs>=22.2.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds>=0.3.3->meds-tab) (24.2.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds>=0.3.3->meds-tab) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds>=0.3.3->meds-tab) (0.35.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds>=0.3.3->meds-tab) (0.22.3)\n", + "Requirement already satisfied: safetensors in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from nested_ragged_tensors>=0.0.8->meds-transforms>=0.0.7->meds-tab) (0.4.5)\n", + "Requirement already satisfied: PyYAML>=5.1.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from omegaconf<2.4,>=2.2->hydra-core==1.3.2->meds-tab) (6.0.2)\n", + "Collecting alembic (from optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab)\n", + " Using cached alembic-1.14.0-py3-none-any.whl.metadata (7.4 kB)\n", + "Collecting cliff (from optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab)\n", + " Using cached cliff-4.8.0-py3-none-any.whl.metadata (2.1 kB)\n", + "Collecting cmaes>=0.8.2 (from optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab)\n", + " Using cached cmaes-0.11.1-py3-none-any.whl.metadata (18 kB)\n", + "Collecting colorlog (from optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab)\n", + " Using cached colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)\n", + "Collecting sqlalchemy>=1.1.0 (from optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab)\n", + " Using cached SQLAlchemy-2.0.36-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)\n", + "Requirement already satisfied: six>=1.5 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas->meds-tab) (1.17.0)\n", + "Collecting greenlet!=0.4.17 (from sqlalchemy>=1.1.0->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab)\n", + " Using cached greenlet-3.1.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (3.8 kB)\n", + "Collecting Mako (from alembic->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab)\n", + " Downloading Mako-1.3.8-py3-none-any.whl.metadata (2.9 kB)\n", + "Collecting PrettyTable>=0.7.2 (from cliff->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab)\n", + " Using cached prettytable-3.12.0-py3-none-any.whl.metadata (30 kB)\n", + "Collecting autopage>=0.4.0 (from cliff->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab)\n", + " Using cached autopage-0.5.2-py3-none-any.whl.metadata (7.9 kB)\n", + "Collecting cmd2>=1.0.0 (from cliff->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab)\n", + " Using cached cmd2-2.5.7-py3-none-any.whl.metadata (13 kB)\n", + "Collecting stevedore>=2.0.1 (from cliff->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab)\n", + " Using cached stevedore-5.4.0-py3-none-any.whl.metadata (2.3 kB)\n", + "Collecting pyperclip (from cmd2>=1.0.0->cliff->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab)\n", + " Using cached pyperclip-1.9.0-py3-none-any.whl\n", + "Requirement already satisfied: wcwidth in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from cmd2>=1.0.0->cliff->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab) (0.2.13)\n", + "Collecting pbr>=2.0.0 (from stevedore>=2.0.1->cliff->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab)\n", + " Using cached pbr-6.1.0-py2.py3-none-any.whl.metadata (3.4 kB)\n", + "Collecting MarkupSafe>=0.9.2 (from Mako->alembic->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-tab)\n", + " Using cached MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.0 kB)\n", + "Downloading meds_tab-0.0.7-py3-none-any.whl (65 kB)\n", + "Downloading scipy-1.13.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (38.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m38.2/38.2 MB\u001b[0m \u001b[31m66.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hUsing cached hydra_joblib_launcher-1.2.0-py3-none-any.whl (5.2 kB)\n", + "Using cached hydra_optuna_sweeper-1.2.0-py3-none-any.whl (8.5 kB)\n", + "Using cached ml_mixins-0.1.0-py3-none-any.whl (8.0 kB)\n", + "Using cached pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.7 MB)\n", + "Downloading scikit_learn-1.6.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.1/13.1 MB\u001b[0m \u001b[31m103.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading xgboost-2.1.3-py3-none-manylinux_2_28_x86_64.whl (153.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m153.9/153.9 MB\u001b[0m \u001b[31m106.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hUsing cached joblib-1.4.2-py3-none-any.whl (301 kB)\n", + "Using cached optuna-2.10.1-py3-none-any.whl (308 kB)\n", + "Using cached pytz-2024.2-py2.py3-none-any.whl (508 kB)\n", + "Using cached threadpoolctl-3.5.0-py3-none-any.whl (18 kB)\n", + "Using cached tzdata-2024.2-py2.py3-none-any.whl (346 kB)\n", + "Using cached nvidia_nccl_cu12-2.23.4-py3-none-manylinux2014_x86_64.whl (199.0 MB)\n", + "Using cached cmaes-0.11.1-py3-none-any.whl (35 kB)\n", + "Using cached SQLAlchemy-2.0.36-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)\n", + "Using cached alembic-1.14.0-py3-none-any.whl (233 kB)\n", + "Using cached cliff-4.8.0-py3-none-any.whl (80 kB)\n", + "Using cached colorlog-6.9.0-py3-none-any.whl (11 kB)\n", + "Using cached autopage-0.5.2-py3-none-any.whl (30 kB)\n", + "Using cached cmd2-2.5.7-py3-none-any.whl (152 kB)\n", + "Using cached greenlet-3.1.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (613 kB)\n", + "Using cached prettytable-3.12.0-py3-none-any.whl (31 kB)\n", + "Using cached stevedore-5.4.0-py3-none-any.whl (49 kB)\n", + "Downloading Mako-1.3.8-py3-none-any.whl (78 kB)\n", + "Using cached MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (23 kB)\n", + "Using cached pbr-6.1.0-py2.py3-none-any.whl (108 kB)\n", + "Installing collected packages: pytz, pyperclip, tzdata, threadpoolctl, scipy, PrettyTable, pbr, nvidia-nccl-cu12, ml-mixins, MarkupSafe, joblib, greenlet, colorlog, cmd2, cmaes, autopage, xgboost, stevedore, sqlalchemy, scikit-learn, pandas, Mako, hydra-joblib-launcher, cliff, alembic, optuna, hydra-optuna-sweeper, meds-tab\n", + "Successfully installed Mako-1.3.8 MarkupSafe-3.0.2 PrettyTable-3.12.0 alembic-1.14.0 autopage-0.5.2 cliff-4.8.0 cmaes-0.11.1 cmd2-2.5.7 colorlog-6.9.0 greenlet-3.1.1 hydra-joblib-launcher-1.2.0 hydra-optuna-sweeper-1.2.0 joblib-1.4.2 meds-tab-0.0.7 ml-mixins-0.1.0 nvidia-nccl-cu12-2.23.4 optuna-2.10.1 pandas-2.2.3 pbr-6.1.0 pyperclip-1.9.0 pytz-2024.2 scikit-learn-1.6.0 scipy-1.13.1 sqlalchemy-2.0.36 stevedore-5.4.0 threadpoolctl-3.5.0 tzdata-2024.2 xgboost-2.1.3\n" ] } ], @@ -114,9 +174,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[32m2024-12-14 19:28:24.645\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m446\u001b[0m - \u001b[1mRunning describe_codes with the following configuration:\n", - "input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", - "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "\u001b[32m2024-12-14 23:35:39.448\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m446\u001b[0m - \u001b[1mRunning describe_codes with the following configuration:\n", + "input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", "do_overwrite: false\n", "seed: 1\n", "tqdm: false\n", @@ -127,9 +187,9 @@ "output_filepath: ${output_dir}/metadata/codes.parquet\n", "name: describe_codes\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 19:28:24.646\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m467\u001b[0m - \u001b[34m\u001b[1mStage config:\n", - "input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", - "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "\u001b[32m2024-12-14 23:35:39.449\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m467\u001b[0m - \u001b[34m\u001b[1mStage config:\n", + "input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", "do_overwrite: false\n", "seed: 1\n", "tqdm: false\n", @@ -141,42 +201,42 @@ "name: describe_codes\n", "\n", "Paths: (checkbox indicates if it exists)\n", - " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\u001b[0m\n", - "\u001b[32m2024-12-14 19:28:24.649\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.describe_codes\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m41\u001b[0m - \u001b[1mIterating through shards and caching feature frequencies.\u001b[0m\n", - "\u001b[32m2024-12-14 19:28:24.652\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:28:24.651936. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:28:24.653\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:28:24.653\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_tabular_automl/describe_codes.py:34: DataOrientationWarning: Row orientation inferred during DataFrame construction. Explicitly specify the orientation by passing `orient=\"row\"` to silence this warning.\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:39.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.describe_codes\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m41\u001b[0m - \u001b[1mIterating through shards and caching feature frequencies.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:39.456\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:35:39.456333. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:39.457\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:39.458\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_tabular_automl/describe_codes.py:34: DataOrientationWarning: Row orientation inferred during DataFrame construction. Explicitly specify the orientation by passing `orient=\"row\"` to silence this warning.\n", " return pl.DataFrame([[col, freq] for col, freq in freq_dict.items()], schema=[\"code\", \"count\"])\n", - "\u001b[32m2024-12-14 19:28:24.706\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.052346\u001b[0m\n", - "\u001b[32m2024-12-14 19:28:24.706\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/.cache/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:28:24.938\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.286837\u001b[0m\n", - "\u001b[32m2024-12-14 19:28:24.938\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/.cache/train/.0_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:28:24.940\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:28:24.939927. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:28:24.940\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:28:24.940\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:28:24.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.018227\u001b[0m\n", - "\u001b[32m2024-12-14 19:28:24.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/.cache/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:28:24.960\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.020954\u001b[0m\n", - "\u001b[32m2024-12-14 19:28:24.960\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/.cache/tuning/.0_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:28:24.961\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:28:24.961781. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:28:24.962\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:28:24.962\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:28:24.974\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.011915\u001b[0m\n", - "\u001b[32m2024-12-14 19:28:24.974\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/.cache/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:28:24.975\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.013785\u001b[0m\n", - "\u001b[32m2024-12-14 19:28:24.975\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/.cache/held_out/.0_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:28:24.975\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.describe_codes\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m66\u001b[0m - \u001b[1mSumming frequency computations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:28:24.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:28:24.976571. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:28:24.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/.cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:28:24.977\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_tabular_automl/describe_codes.py:63: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", + "\u001b[32m2024-12-14 23:35:39.548\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.089958\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:39.548\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/.cache/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:39.594\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.137667\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:39.594\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/.cache/tuning/.0_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:39.599\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:35:39.598780. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:39.600\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:39.600\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:39.713\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.112567\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:39.713\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/.cache/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:39.717\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.118395\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:39.717\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/.cache/train/.0_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:39.718\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:35:39.718663. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:39.719\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:39.719\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:39.753\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.034429\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:39.754\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/.cache/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:39.756\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.037859\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:39.756\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/.cache/held_out/.0_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:39.757\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.describe_codes\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m66\u001b[0m - \u001b[1mSumming frequency computations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:39.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:35:39.758648. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:39.759\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/.cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:39.759\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_tabular_automl/describe_codes.py:63: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", " if not df.columns == [\"code\", \"count\"]:\n", - "\u001b[32m2024-12-14 19:28:24.992\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.015360\u001b[0m\n", - "\u001b[32m2024-12-14 19:28:24.992\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:28:24.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.018957\u001b[0m\n", - "\u001b[32m2024-12-14 19:28:24.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/.codes_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:28:24.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.describe_codes\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m94\u001b[0m - \u001b[1mStored feature columns and frequencies.\u001b[0m\n" + "\u001b[32m2024-12-14 23:35:39.783\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.023492\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:39.783\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:39.787\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.029215\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:39.788\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/.codes_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:39.788\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.describe_codes\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m94\u001b[0m - \u001b[1mStored feature columns and frequencies.\u001b[0m\n" ] } ], @@ -210,7 +270,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -223,9 +283,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[32m2024-12-14 19:29:06.167\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m446\u001b[0m - \u001b[1mRunning tabularize_static with the following configuration:\n", - "input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", - "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "\u001b[32m2024-12-14 23:35:41.366\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m446\u001b[0m - \u001b[1mRunning tabularize_static with the following configuration:\n", + "input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", "do_overwrite: false\n", "seed: 1\n", "tqdm: false\n", @@ -258,9 +318,9 @@ "output_tabularized_dir: ${output_dir}/tabularize\n", "name: tabularization\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:06.169\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m467\u001b[0m - \u001b[34m\u001b[1mStage config:\n", - "input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", - "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "\u001b[32m2024-12-14 23:35:41.368\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m467\u001b[0m - \u001b[34m\u001b[1mStage config:\n", + "input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", "do_overwrite: false\n", "seed: 1\n", "tqdm: false\n", @@ -294,36 +354,36 @@ "name: tabularization\n", "\n", "Paths: (checkbox indicates if it exists)\n", - " - input_code_metadata_fp: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\n", - " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", - " - tabularization.filtered_code_metadata_fp: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:06.173\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet exists; reading directly and returning.\u001b[0m\n", - "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_tabular_automl/describe_codes.py:63: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", + " - input_code_metadata_fp: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", + " - tabularization.filtered_code_metadata_fp: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:41.374\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet exists; reading directly and returning.\u001b[0m\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_tabular_automl/describe_codes.py:63: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", " if not df.columns == [\"code\", \"count\"]:\n", - "\u001b[32m2024-12-14 19:29:06.191\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:29:06.190860. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:06.192\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:06.367\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:06.388\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_static_features\u001b[0m:\u001b[36mget_sparse_static_rep\u001b[0m:\u001b[36m69\u001b[0m - \u001b[1mMake static data sparse and merge it with the time-series data\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:06.460\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.092494\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:06.460\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/none/static/present.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:06.462\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.271271\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:06.462\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/none/static/.present_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:06.463\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:29:06.463428. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:06.463\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:06.601\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:06.614\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_static_features\u001b[0m:\u001b[36mget_sparse_static_rep\u001b[0m:\u001b[36m69\u001b[0m - \u001b[1mMake static data sparse and merge it with the time-series data\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:06.698\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.096838\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:06.698\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/none/static/present.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:06.699\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.236471\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:06.699\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/none/static/.present_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:06.701\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:29:06.700988. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:06.701\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:06.835\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:06.859\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_static_features\u001b[0m:\u001b[36mget_sparse_static_rep\u001b[0m:\u001b[36m69\u001b[0m - \u001b[1mMake static data sparse and merge it with the time-series data\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:07.036\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.199972\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:07.036\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/none/static/present.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:07.037\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.336466\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:07.037\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/none/static/.present_cache\u001b[0m\n" + "\u001b[32m2024-12-14 23:35:41.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:35:41.394003. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:41.395\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:41.567\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:41.625\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_static_features\u001b[0m:\u001b[36mget_sparse_static_rep\u001b[0m:\u001b[36m69\u001b[0m - \u001b[1mMake static data sparse and merge it with the time-series data\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:41.913\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.345097\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:41.913\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/none/static/present.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:41.916\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.522465\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:41.916\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/none/static/.present_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:41.918\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:35:41.918193. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:41.918\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:42.096\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:42.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_static_features\u001b[0m:\u001b[36mget_sparse_static_rep\u001b[0m:\u001b[36m69\u001b[0m - \u001b[1mMake static data sparse and merge it with the time-series data\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:42.303\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.206801\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:42.303\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/none/static/present.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:42.306\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.388196\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:42.306\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/none/static/.present_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:42.308\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:35:42.308698. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:42.309\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:42.479\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:42.503\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_static_features\u001b[0m:\u001b[36mget_sparse_static_rep\u001b[0m:\u001b[36m69\u001b[0m - \u001b[1mMake static data sparse and merge it with the time-series data\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:42.643\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.163699\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:42.643\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/none/static/present.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:42.644\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.335758\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:42.644\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/none/static/.present_cache\u001b[0m\n" ] } ], @@ -333,7 +393,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -346,11 +406,11 @@ "name": "stdout", "output_type": "stream", "text": [ - "[2024-12-14 19:29:41,824][HYDRA] Joblib.Parallel(n_jobs=-1,backend=loky,prefer=processes,require=None,verbose=0,timeout=None,pre_dispatch=2*n_jobs,batch_size=auto,temp_folder=None,max_nbytes=None,mmap_mode=r) is launching 2 jobs\n", - "[2024-12-14 19:29:41,824][HYDRA] Launching jobs, sweep output dir : /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/.logs\n", - "[2024-12-14 19:29:41,824][HYDRA] \t#0 : worker=0 input_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data output_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/ do_overwrite=False tabularization.window_sizes=[1d,30d,365d] tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max]\n", - "[2024-12-14 19:29:41,824][HYDRA] \t#1 : worker=1 input_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data output_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/ do_overwrite=False tabularization.window_sizes=[1d,30d,365d] tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max]\n", - "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/joblib/externals/loky/backend/fork_exec.py:38: RuntimeWarning: Using fork() can cause Polars to deadlock in the child process.\n", + "[2024-12-14 23:35:44,566][HYDRA] Joblib.Parallel(n_jobs=-1,backend=loky,prefer=processes,require=None,verbose=0,timeout=None,pre_dispatch=2*n_jobs,batch_size=auto,temp_folder=None,max_nbytes=None,mmap_mode=r) is launching 2 jobs\n", + "[2024-12-14 23:35:44,566][HYDRA] Launching jobs, sweep output dir : /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/.logs\n", + "[2024-12-14 23:35:44,566][HYDRA] \t#0 : worker=0 input_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data output_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/ do_overwrite=False tabularization.window_sizes=[1d,30d,365d] tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max]\n", + "[2024-12-14 23:35:44,566][HYDRA] \t#1 : worker=1 input_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data output_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/ do_overwrite=False tabularization.window_sizes=[1d,30d,365d] tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max]\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/joblib/externals/loky/backend/fork_exec.py:38: RuntimeWarning: Using fork() can cause Polars to deadlock in the child process.\n", "In addition, using fork() with Python in general is a recipe for mysterious\n", "deadlocks and crashes.\n", "\n", @@ -364,14 +424,14 @@ "or by setting POLARS_ALLOW_FORKING_THREAD=1.\n", "\n", " pid = os.fork()\n", - "\u001b[32m2024-12-14 19:29:44.342\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mcurrent_script_name\u001b[0m:\u001b[36m433\u001b[0m - \u001b[33m\u001b[1mCan't find main function in __main__ module. Using sys.argv[0] as a fallback.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:44.343\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m446\u001b[0m - \u001b[1mRunning meds-tab-tabularize-time-series with the following configuration:\n", - "input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", - "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "\u001b[32m2024-12-14 23:35:45.887\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mcurrent_script_name\u001b[0m:\u001b[36m433\u001b[0m - \u001b[33m\u001b[1mCan't find main function in __main__ module. Using sys.argv[0] as a fallback.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:45.888\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m446\u001b[0m - \u001b[1mRunning meds-tab-tabularize-time-series with the following configuration:\n", + "input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", "do_overwrite: false\n", "seed: 1\n", "tqdm: false\n", - "worker: 0\n", + "worker: 1\n", "loguru_init: false\n", "log_dir: ${output_dir}/.logs/\n", "cache_dir: ${output_dir}/.cache\n", @@ -400,10 +460,9 @@ "output_tabularized_dir: ${output_dir}/tabularize\n", "name: tabularization\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:44.343\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mcurrent_script_name\u001b[0m:\u001b[36m433\u001b[0m - \u001b[33m\u001b[1mCan't find main function in __main__ module. Using sys.argv[0] as a fallback.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:44.344\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m446\u001b[0m - \u001b[1mRunning meds-tab-tabularize-time-series with the following configuration:\n", - "input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", - "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "\u001b[32m2024-12-14 23:35:45.890\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m467\u001b[0m - \u001b[34m\u001b[1mStage config:\n", + "input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", "do_overwrite: false\n", "seed: 1\n", "tqdm: false\n", @@ -435,10 +494,17 @@ "input_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", "output_tabularized_dir: ${output_dir}/tabularize\n", "name: tabularization\n", - "\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:44.345\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m467\u001b[0m - \u001b[34m\u001b[1mStage config:\n", - "input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", - "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_code_metadata_fp: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", + " - tabularization.filtered_code_metadata_fp: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\u001b[0m\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_tabular_automl/describe_codes.py:63: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", + " if not df.columns == [\"code\", \"count\"]:\n", + "\u001b[32m2024-12-14 23:35:45.904\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mcurrent_script_name\u001b[0m:\u001b[36m433\u001b[0m - \u001b[33m\u001b[1mCan't find main function in __main__ module. Using sys.argv[0] as a fallback.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:45.906\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m446\u001b[0m - \u001b[1mRunning meds-tab-tabularize-time-series with the following configuration:\n", + "input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", "do_overwrite: false\n", "seed: 1\n", "tqdm: false\n", @@ -470,18 +536,14 @@ "input_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", "output_tabularized_dir: ${output_dir}/tabularize\n", "name: tabularization\n", - "\n", - "Paths: (checkbox indicates if it exists)\n", - " - input_code_metadata_fp: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\n", - " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", - " - tabularization.filtered_code_metadata_fp: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:44.346\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m467\u001b[0m - \u001b[34m\u001b[1mStage config:\n", - "input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", - "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:45.908\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m467\u001b[0m - \u001b[34m\u001b[1mStage config:\n", + "input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", "do_overwrite: false\n", "seed: 1\n", "tqdm: false\n", - "worker: 1\n", + "worker: 0\n", "loguru_init: false\n", "log_dir: ${output_dir}/.logs/\n", "cache_dir: ${output_dir}/.cache\n", @@ -511,769 +573,767 @@ "name: tabularization\n", "\n", "Paths: (checkbox indicates if it exists)\n", - " - input_code_metadata_fp: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\n", - " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", - " - tabularization.filtered_code_metadata_fp: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\u001b[0m\n", - "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_tabular_automl/describe_codes.py:63: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", - " if not df.columns == [\"code\", \"count\"]:\n", - "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_tabular_automl/describe_codes.py:63: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", + " - input_code_metadata_fp: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", + " - tabularization.filtered_code_metadata_fp: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\u001b[0m\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_tabular_automl/describe_codes.py:63: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", " if not df.columns == [\"code\", \"count\"]:\n", - "\u001b[32m2024-12-14 19:29:44.368\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:29:44.368445. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:44.368\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:29:44.368478. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:44.369\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:44.369\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:44.511\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:44.513\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:44.516\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:44.517\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:44.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum_sqd for window_size 365d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:44.680\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:44.770\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum_sqd for window_size 365d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:44.802\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:46.193\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:46.213\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:47.803\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:47.803\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:03.290497\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:47.803\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/sum_sqd.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:47.820\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:03.451847\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:47.820\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/.sum_sqd_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:47.821\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:29:47.821471. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:47.821\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:47.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:47.965\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:47.970\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:47.979\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:48.080\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum for window_size 365d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:48.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:49.355\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:49.364\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:50.657\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:50.657\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:02.691439\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:50.657\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/sum.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:50.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:02.851850\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:50.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/.sum_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:50.674\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:29:50.674418. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:50.674\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:50.813\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:50.818\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:51.035\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/min for window_size 30d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:51.063\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:51.319\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:51.319\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:06.808234\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:51.319\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/sum_sqd.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:51.335\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:06.967497\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:51.336\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/.sum_sqd_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:51.337\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:29:51.337319. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:51.337\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:51.475\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:51.480\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:51.539\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/count for window_size 365d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:51.549\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:51.892\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:51.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:52.264\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:52.264\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.789179\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:52.264\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:52.277\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.940558\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:52.277\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:52.279\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:29:52.279073. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:52.279\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:52.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:52.432\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:52.538\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum for window_size 30d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:52.557\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:53.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:53.781\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:55.038\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:55.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:55.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:02.619791\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:55.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/sum.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:55.049\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:55.065\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:02.786003\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:55.065\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/.sum_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:55.066\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:29:55.066239. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:55.066\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:55.205\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:55.210\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:55.266\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/count for window_size 30d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:55.274\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:55.618\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:55.621\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:55.998\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:55.998\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.793007\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:55.998\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:56.014\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.947830\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:56.014\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:56.015\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:29:56.015070. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:56.015\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:56.153\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:56.158\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:56.221\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/min for window_size 365d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:56.231\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:57.244\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:57.246\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:58.298\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:58.298\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:02.144824\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:58.298\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/min.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:58.313\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:02.298852\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:58.314\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/.min_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:58.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:29:58.315084. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:58.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:58.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:58.459\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:58.653\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum_sqd for window_size 1d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:58.680\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:59.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:59.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:08.286706\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:59.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/min.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:59.113\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:08.438738\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:59.113\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/.min_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:59.114\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:29:59.114326. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:59.114\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:59.253\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:59.258\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:59.469\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/count for window_size 365d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:29:59.505\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:00.862\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:00.877\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:01.809\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:01.815\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:02.297\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:02.297\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:03.043633\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:02.297\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:02.310\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:03.196022\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:02.310\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:02.311\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:02.311480. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:02.311\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:02.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:02.456\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:02.678\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/max for window_size 30d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:02.713\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:04.972\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:04.972\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:06.518336\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:04.972\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/sum_sqd.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:04.987\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:06.672445\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:04.987\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/.sum_sqd_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:04.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:04.988614. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:04.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:05.126\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:05.130\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:05.323\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/count for window_size 1d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:05.350\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:06.714\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:06.724\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:06.738\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:06.745\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:08.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:08.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:02.973982\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:08.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:08.114\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:03.125978\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:08.114\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:08.115\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:08.115686. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:08.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:08.256\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:08.261\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:08.380\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/count for window_size 365d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:08.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:09.049\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:09.054\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:09.740\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:09.740\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:01.484012\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:09.740\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:09.754\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:01.639095\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:09.754\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:09.756\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:09.755883. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:09.756\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:09.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:09.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:09.973\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation code/count for window_size 1d, with 7033 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:09.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:10.519\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:10.521\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:10.760\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:10.761\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:08.309484\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:10.761\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/max.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:10.775\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:08.463632\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:10.775\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/.max_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:10.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:10.776228. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:10.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:10.916\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:10.921\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:10.982\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum_sqd for window_size 365d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:10.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:11.193\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:11.194\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:01.299031\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:11.194\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/code/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:11.294\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:01.538430\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:11.294\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/code/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:11.295\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:11.295472. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:11.295\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:11.433\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:11.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:11.643\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum_sqd for window_size 30d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:11.679\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:11.797\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:11.799\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:12.651\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:12.651\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:01.734256\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:12.651\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/sum_sqd.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:12.666\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:01.889755\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:12.666\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/.sum_sqd_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:12.667\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:12.667033. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:12.667\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:12.808\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:12.813\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:12.918\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/min for window_size 365d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:12.936\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:14.839\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:14.846\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:14.851\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:14.856\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:16.826\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:16.826\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:04.017321\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:16.826\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/min.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:16.842\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:04.175854\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:16.843\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/.min_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:16.844\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:16.844065. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:16.844\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:16.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:16.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:17.052\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum for window_size 1d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:17.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:17.732\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:17.734\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:18.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:18.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:06.703274\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:18.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/sum_sqd.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:18.150\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:06.855055\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:18.150\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/.sum_sqd_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:18.151\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:18.151696. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:18.152\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:18.290\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:18.294\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:18.408\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:18.408\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:01.423637\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:18.408\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/sum.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:18.425\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:01.581304\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:18.425\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/.sum_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:18.426\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:18.426646. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:18.427\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:18.552\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation code/count for window_size 365d, with 7033 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:18.566\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:18.571\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:18.589\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:18.627\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum_sqd for window_size 30d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:18.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:19.444\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:19.446\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:20.298\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:20.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:01.732474\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:20.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/sum_sqd.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:20.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:01.888907\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:20.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/.sum_sqd_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:20.316\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:20.316486. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:20.316\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:20.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:20.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:20.681\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/count for window_size 30d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:20.716\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:20.717\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:20.729\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:22.071\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:22.077\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:23.486\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:23.486\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:03.002779\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:23.486\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:23.500\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:03.183946\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:23.500\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:23.501\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:23.501441. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:23.501\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:23.641\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:23.646\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:23.712\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/min for window_size 1d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:23.720\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:24.731\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:24.733\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:25.536\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:25.536\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:07.246122\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:25.538\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/code/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:25.764\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:25.764\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:02.122564\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:25.764\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/min.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:25.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:02.280840\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:25.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/.min_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:25.783\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:25.783527. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:25.784\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:25.924\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:25.929\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:26.046\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/count for window_size 30d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:26.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:26.505\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:08.353698\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:26.505\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/code/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:26.506\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:26.506679. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:26.507\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:26.652\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:26.657\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:26.721\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/max for window_size 1d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:26.722\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:26.726\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:26.732\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:27.403\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:27.403\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:01.478692\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:27.403\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:27.420\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:01.636762\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:27.420\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:27.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:27.421440. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:27.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:27.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:27.565\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:27.627\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum for window_size 30d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:27.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:27.744\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:27.746\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:28.297\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:28.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:28.775\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:28.775\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:02.122697\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:28.775\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/max.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:28.788\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:02.281859\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:28.788\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/.max_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:28.790\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:28.789747. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:28.790\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:28.929\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:28.934\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:28.991\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:28.992\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:01.430692\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:28.992\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/sum.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:29.006\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:01.585463\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:29.007\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/.sum_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:29.008\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:29.007931. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:29.008\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:29.151\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:29.155\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:29.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation code/count for window_size 1d, with 7033 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:29.220\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum_sqd for window_size 1d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:29.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:29.231\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:30.040\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:30.042\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:30.866\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:30.866\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:01.715615\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:30.866\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/sum_sqd.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:30.883\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:01.875855\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:30.883\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/.sum_sqd_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:30.884\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:30.884794. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:30.885\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:31.024\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:31.029\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:31.127\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/max for window_size 365d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:31.141\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:31.326\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:31.332\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:33.077\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:33.081\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:33.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:33.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:05.059885\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:33.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/code/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:34.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:05.608663\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:34.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/code/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:34.399\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/sum.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:34.400\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:34.400358. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:34.400\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:34.539\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:34.544\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:34.765\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum for window_size 1d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:34.790\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:35.043\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:35.043\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:04.018792\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:35.044\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/max.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:35.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:04.175562\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:35.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/.max_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:35.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:35.061625. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:35.062\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:35.200\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:35.205\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:35.276\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation code/count for window_size 30d, with 7033 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:35.283\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:35.821\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:35.824\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:36.757\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:36.757\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:01.557473\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:36.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/code/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:36.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:01.896954\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:36.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/code/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:36.959\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:36.959721. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:36.960\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:37.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:37.098\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:37.158\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/max for window_size 30d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:37.168\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:37.333\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:37.338\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:38.139\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:38.142\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:39.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:39.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:02.052899\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:39.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/max.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:39.158\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:02.199030\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:39.158\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/.max_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:39.160\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:39.159826. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:39.160\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:39.295\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:39.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:39.362\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/count for window_size 1d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:39.370\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:39.709\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:39.711\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:39.852\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:39.852\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:05.313204\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:39.852\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/sum.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:39.866\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:05.466418\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:39.866\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/.sum_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:39.867\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/sum_sqd.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:39.868\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:39.868178. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:39.868\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:40.004\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:40.009\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:40.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:40.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.772471\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:40.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:40.080\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.920812\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:40.080\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:40.081\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:40.081722. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:40.082\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:40.113\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/min for window_size 1d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:40.131\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:40.219\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:40.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:40.449\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum for window_size 365d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:40.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:42.034\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:42.037\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:43.056\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:43.064\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:43.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:43.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:03.945570\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:43.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/min.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:43.965\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:04.096982\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:43.965\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/.min_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:43.966\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/count.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:43.966\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:43.966572. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:43.966\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:44.106\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:44.110\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:44.220\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum_sqd for window_size 30d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:44.232\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:45.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:45.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:45.759\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:05.539699\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:45.759\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/sum.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:45.762\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:45.774\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:05.693115\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:45.774\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/.sum_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:45.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:45.775948. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:45.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:45.915\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:45.920\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:46.109\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/max for window_size 365d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:46.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:47.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:47.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:03.236261\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:47.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/sum_sqd.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:47.357\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:03.390809\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:47.357\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/.sum_sqd_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:47.358\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/min.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:47.358\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/count.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:47.358\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/sum_sqd.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:47.359\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/count.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:47.359\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:47.359625. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:47.359\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:47.499\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:47.503\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:47.706\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/max for window_size 1d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:47.741\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:50.127\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:50.134\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:51.727\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:51.732\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:54.265\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:54.265\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:08.350187\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:54.265\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/max.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:54.279\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:08.503259\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:54.279\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/.max_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:54.280\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/min.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:54.280\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:54.280636. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:54.281\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:54.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:54.426\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:54.537\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum_sqd for window_size 1d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:54.555\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:55.738\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:55.738\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:08.239307\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:55.738\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/max.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:55.755\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:08.395853\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:55.755\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/.max_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:55.756\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/max.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:55.756\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/max.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:55.757\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m223\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/sum_sqd.npz is in progress as of 2024-12-14 19:30:54.280636. Returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:55.757\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/sum_sqd.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:55.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:55.758022. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:55.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:55.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:55.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:55.959\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/max for window_size 365d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:55.967\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:56.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:56.101\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:56.986\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:56.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:57.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:57.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:03.251303\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:57.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/sum_sqd.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:57.688\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:03.407902\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:57.688\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/.sum_sqd_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:57.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:57.689601. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:57.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:57.830\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:57.834\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:57.951\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/max for window_size 1d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:57.970\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:58.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:58.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:02.151356\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:58.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/max.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:58.069\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:02.311068\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:58.069\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/.max_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:58.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:30:58.070419. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:58.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:58.222\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:58.228\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:58.365\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation code/count for window_size 30d, with 7033 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:58.379\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:59.401\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:59.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:59.952\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:30:59.956\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:01.233\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:01.233\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:03.010203\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:01.233\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/code/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:01.479\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:03.408739\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:01.479\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/code/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:01.480\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m223\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/max.npz is in progress as of 2024-12-14 19:30:57.689601. Returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:01.480\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/code/count.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:01.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/sum.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:01.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/min.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:01.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/min.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:01.482\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/sum.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:01.482\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:01.482507. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:01.482\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:01.617\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:01.622\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:01.683\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum for window_size 365d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:01.691\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:01.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:01.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:04.074945\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:01.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/max.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:01.918\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:04.228736\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:01.918\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/.max_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:01.919\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:01.919543. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:01.919\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:02.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:02.066\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:02.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/count for window_size 1d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:02.170\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:02.368\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:02.370\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:02.846\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:02.850\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:03.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:03.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:01.449459\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:03.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/sum.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:03.082\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:01.600210\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:03.082\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/.sum_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:03.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/max.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:03.084\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:03.084505. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:03.085\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:03.226\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:03.231\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:03.356\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation code/count for window_size 1d, with 7033 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:03.370\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:03.526\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:03.527\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:01.465358\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:03.527\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:03.544\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:01.624964\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:03.544\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:03.545\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:03.545555. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:03.545\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:03.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:03.692\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:03.799\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum for window_size 1d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:03.819\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:04.415\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:04.418\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:05.105\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:05.108\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:05.685\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:05.685\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:02.459236\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:05.685\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/code/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:05.835\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:02.750706\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:05.835\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/code/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:05.836\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:05.836372. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:05.836\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:05.973\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:05.978\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:06.092\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/max for window_size 30d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:06.110\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:06.358\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:06.358\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:02.671017\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:06.358\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/sum.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:06.370\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:02.825396\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:06.371\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/.sum_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:06.371\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/code/count.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:06.372\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/sum.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:06.372\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/sum_sqd.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:06.372\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/code/count.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:06.373\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:06.373154. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:06.373\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:06.509\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:06.514\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:06.747\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum for window_size 30d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:06.783\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:08.046\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:08.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:09.404\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:09.411\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:10.026\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:10.026\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:04.052935\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:10.026\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/max.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:10.041\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:04.204687\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:10.041\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/.max_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:10.042\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:10.042142. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:10.042\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:10.193\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:10.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:10.381\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/min for window_size 1d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:10.417\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:12.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:12.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:05.578928\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:12.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/sum.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:12.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:05.727701\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:12.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/.sum_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:12.102\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:12.102019. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:12.102\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:12.237\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:12.242\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:12.307\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/min for window_size 30d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:12.318\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:13.296\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:13.298\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:14.333\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:14.334\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:02.096215\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:14.334\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/min.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:14.336\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:14.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:14.353\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:02.251713\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:14.353\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/.min_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:14.355\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:14.354885. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:14.355\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:14.534\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:14.538\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:14.655\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/min for window_size 30d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:14.668\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:16.740\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:16.744\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:18.566\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:18.566\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:08.373020\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:18.567\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/min.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:18.581\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:08.539738\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:18.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/.min_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:18.583\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:18.583144. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:18.583\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:18.733\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:18.738\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:18.755\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:18.755\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:04.220569\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:18.755\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/min.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:18.774\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:04.419170\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:18.774\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/.min_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:18.775\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/code/count.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:18.775\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/max.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:18.775\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/sum.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:18.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/count.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:18.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/count.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:18.777\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/code/count.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:18.777\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/sum_sqd.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:18.777\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/max.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:18.778\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/sum_sqd.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:18.778\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/sum.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:18.779\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:18.778926. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:18.779\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:18.815\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation code/count for window_size 365d, with 7033 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:18.826\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:18.921\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:18.926\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:19.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/min for window_size 365d, with 1170 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:19.175\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:19.380\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:19.382\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:20.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:20.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:01.609524\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:20.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/code/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:20.546\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:01.963247\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:20.546\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/code/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:20.547\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m223\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/min.npz is in progress as of 2024-12-14 19:31:18.778926. Returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:20.548\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:20.548180. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:20.548\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:20.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:20.694\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:20.946\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation code/count for window_size 30d, with 7033 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:20.972\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:23.062\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:23.068\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:23.139\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:23.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:27.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:27.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:06.312096\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:27.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/code/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:27.264\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:27.264\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:08.342520\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:27.264\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/min.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:27.279\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:08.500363\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:27.279\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/.min_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:27.280\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:27.280528. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:27.281\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:27.423\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:27.427\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:27.599\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation code/count for window_size 365d, with 7033 columns.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:27.614\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:27.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:07.141059\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:27.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/code/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:27.690\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/max.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:27.690\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/min.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:27.691\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/sum.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:27.691\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/min.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:27.691\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/count.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:27.692\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m223\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/code/count.npz is in progress as of 2024-12-14 19:31:27.280528. Returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:27.692\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/sum_sqd.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:27.692\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/count.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:27.693\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/sum.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:27.693\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/sum.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:28.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:28.590\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:30.571\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:30.571\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:03.148404\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:30.571\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/code/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:31.046\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:03.766112\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:31.047\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/code/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:31.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/code/count.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:31.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/min.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:31.048\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/code/count.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:31.049\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/count.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:31.049\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/min.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:31.049\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/sum_sqd.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:31.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/code/count.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:31.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/max.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:31.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/count.npz exists; reading directly and returning.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:31.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/max.npz exists; reading directly and returning.\u001b[0m\n" + "\u001b[32m2024-12-14 23:35:45.917\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:35:45.917667. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:45.918\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:45.936\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:35:45.936733. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:45.937\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:46.107\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:46.111\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:46.112\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:46.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:46.368\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum for window_size 365d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:46.370\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/max for window_size 30d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:46.410\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:46.419\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:48.566\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:48.603\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:49.557\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:49.592\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:51.051\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:51.051\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:04.940512\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:51.051\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:51.073\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:05.136241\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:51.073\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:51.075\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:35:51.075068. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:51.075\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:51.247\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:51.253\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:51.520\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum_sqd for window_size 1d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:51.573\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:52.736\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:52.736\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:06.629604\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:52.737\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:52.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:06.840980\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:52.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:52.760\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:35:52.760265. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:52.760\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:52.929\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:52.935\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:53.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation code/count for window_size 30d, with 7033 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:53.159\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:53.916\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:53.939\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:54.165\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:54.183\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:56.879\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:56.879\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:05.631920\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:56.879\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:56.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:05.829474\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:56.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:56.906\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:35:56.906229. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:56.906\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:57.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:57.089\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:57.350\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/count for window_size 365d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:57.401\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:58.479\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:58.517\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:59.690\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:59.690\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:02.606291\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:59.690\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:59.712\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:02.806360\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:59.712\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:59.714\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:35:59.714247. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:59.714\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:59.909\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:35:59.915\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:00.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:00.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:07.207372\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:00.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:00.459\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation code/count for window_size 365d, with 7033 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:00.541\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:01.282\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:08.521991\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:01.282\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:01.284\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:36:01.284574. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:01.285\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:01.460\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:01.465\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:01.638\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum for window_size 1d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:01.656\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:02.797\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:02.807\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:03.694\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:03.764\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:03.959\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:03.959\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:02.499409\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:03.959\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:03.982\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:02.697452\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:03.982\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:03.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:36:03.983424. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:03.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:04.156\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:04.162\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:04.339\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/max for window_size 1d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:04.373\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:06.030\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:06.040\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:07.753\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:07.753\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:03.596856\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:07.753\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:07.774\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:03.791272\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:07.774\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:07.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:36:07.776112. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:07.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:07.951\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:07.957\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:08.246\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/max for window_size 1d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:08.293\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:11.174\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:11.174\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:11.264766\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:11.175\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:11.459\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:11.476\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:12.809\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:13.095592\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:12.810\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:12.813\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:36:12.813252. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:12.813\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:12.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:12.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:13.158\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation code/count for window_size 365d, with 7033 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:13.187\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:14.058\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:14.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:14.814\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:14.814\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:06.863368\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:14.815\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:14.835\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:07.059224\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:14.835\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:14.836\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/sum.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:14.837\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:36:14.837376. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:14.837\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:15.034\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:15.040\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:15.498\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/max for window_size 30d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:15.577\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:15.633\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:15.634\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:02.644570\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:15.634\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:15.960\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:03.147157\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:15.960\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:15.962\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m223\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/max.npz is in progress as of 2024-12-14 23:36:14.837376. Returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:15.963\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:36:15.963548. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:15.963\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:16.132\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:16.138\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:16.293\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum_sqd for window_size 1d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:16.319\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:17.656\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:17.664\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:19.080\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:19.081\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:02.948612\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:19.081\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:19.106\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:03.143142\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:19.106\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:19.108\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:36:19.108420. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:19.108\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:19.286\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:19.292\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:19.467\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation code/count for window_size 1d, with 7033 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:19.493\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:20.318\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:20.332\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:21.328\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:21.329\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:02.041967\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:21.329\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:21.543\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:02.435003\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:21.543\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:21.545\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:36:21.544979. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:21.545\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:21.716\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:21.722\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:22.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum_sqd for window_size 30d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:22.200\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:22.208\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:22.235\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:27.605\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:27.664\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:28.890\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:28.890\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:13.856255\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:28.891\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:28.916\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:14.078731\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:28.916\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:28.918\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:36:28.917895. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:28.918\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:29.095\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:29.101\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:29.368\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/count for window_size 30d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:29.407\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:30.403\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:30.433\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:31.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:31.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:02.359601\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:31.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:31.475\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:02.557925\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:31.475\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:31.477\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:36:31.477201. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:31.477\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:31.649\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:31.654\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:31.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum for window_size 1d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:31.937\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:33.722\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:33.723\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:12.006070\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:33.723\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:33.752\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:12.207090\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:33.752\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:33.754\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:36:33.753800. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:33.754\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:33.929\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:33.935\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:34.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:34.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:34.177\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/min for window_size 365d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:34.215\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:36.182\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:36.182\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:04.533419\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:36.182\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:36.203\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:04.726580\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:36.203\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:36.205\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:36:36.205348. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:36.205\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:36.378\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:36.383\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:36.535\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/min for window_size 30d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:36.555\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:37.532\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:37.571\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:38.235\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:38.251\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:39.964\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:39.965\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:03.586602\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:39.965\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:39.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:03.782719\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:39.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:39.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:36:39.989649. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:39.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:40.162\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:40.168\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:40.326\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum_sqd for window_size 365d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:40.350\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:40.973\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:40.973\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:07.043599\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:40.973\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:40.996\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:07.242631\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:40.996\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:40.998\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:36:40.997996. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:40.998\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:41.174\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:41.180\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:41.306\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/max for window_size 30d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:41.329\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:41.703\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:41.733\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:43.035\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:43.060\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:43.150\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:43.151\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:02.987901\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:43.151\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:43.171\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:03.181422\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:43.171\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:43.172\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:36:43.172470. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:43.172\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:43.347\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:43.353\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:43.746\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum for window_size 1d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:43.813\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:44.872\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:44.873\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:03.698286\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:44.873\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:44.898\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:03.900040\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:44.898\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:44.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:36:44.899642. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:44.900\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:45.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:45.075\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:45.395\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation code/count for window_size 1d, with 7033 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:45.440\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:46.928\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:46.943\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:47.937\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:47.957\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:48.929\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:48.929\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:03.859035\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:48.929\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:49.303\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:04.404006\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:49.303\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:49.305\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:36:49.305467. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:49.305\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:49.502\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:49.508\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:49.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation code/count for window_size 365d, with 7033 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:49.802\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:51.274\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:51.306\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:52.225\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:52.226\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:08.878504\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:52.226\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:52.246\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:09.074093\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:52.246\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:52.248\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:36:52.248119. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:52.248\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:52.430\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:52.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:52.729\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum_sqd for window_size 365d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:52.799\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:54.413\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:54.413\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:04.910930\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:54.413\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:55.130\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:05.825221\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:55.130\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:55.133\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:36:55.133392. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:55.133\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:55.305\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:55.310\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:55.351\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:55.387\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:55.702\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum for window_size 30d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:55.763\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:58.069\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:58.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:05.639175\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:58.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:58.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:05.845164\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:58.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:58.095\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:36:58.094850. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:58.095\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:58.268\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:58.274\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:58.678\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/min for window_size 1d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:36:58.734\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:00.434\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:00.499\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:05.044\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:05.044\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:09.739288\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:05.044\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:05.065\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:09.931825\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:05.065\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:05.066\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:37:05.066773. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:05.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:05.176\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:05.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:05.274\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:05.280\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:05.448\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/count for window_size 1d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:05.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:06.041\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:06.049\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:06.674\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:06.674\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:01.399770\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:06.674\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:06.696\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:01.629349\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:06.696\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:06.697\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:37:06.697735. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:06.698\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:06.875\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:06.881\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:06.994\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/min for window_size 1d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:07.015\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:08.852\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:08.864\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:10.735\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:10.735\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:03.859595\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:10.735\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:10.757\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:04.060162\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:10.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:10.759\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:37:10.759687. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:10.760\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:10.935\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:10.940\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:11.400\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/min for window_size 30d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:11.474\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:11.924\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:11.924\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:13.656171\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:11.925\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:11.945\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:13.850176\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:11.945\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:11.946\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:37:11.946554. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:11.947\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:12.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:12.163\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:12.590\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/min for window_size 365d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:12.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:18.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:18.170\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:18.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:19.052\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:25.075\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:25.075\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:14.139969\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:25.075\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:25.103\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:14.344267\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:25.104\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:25.106\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:37:25.105754. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:25.106\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:25.278\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:25.283\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:25.576\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/min for window_size 1d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:25.617\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:25.657\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:25.657\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:13.500279\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:25.658\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:25.679\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:13.732931\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:25.679\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:25.681\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:37:25.680958. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:25.681\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:25.871\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:25.877\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:26.148\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/count for window_size 1d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:26.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:27.166\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:27.180\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:28.192\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:28.192\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:02.321112\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:28.193\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:28.213\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:02.532931\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:28.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:28.215\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/code/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:28.216\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:37:28.215930. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:28.216\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:28.388\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:28.393\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:28.882\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation code/count for window_size 30d, with 7033 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:28.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:28.912\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:28.942\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:31.967\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:32.014\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:32.176\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:32.176\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:06.897986\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:32.176\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:32.200\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:07.094834\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:32.200\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:32.202\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:37:32.202215. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:32.202\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:32.375\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:32.380\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:32.618\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum for window_size 30d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:32.655\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:34.815\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:34.847\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:36.953\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:36.954\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:04.578813\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:36.954\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:36.975\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:04.773224\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:36.975\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:36.976\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/sum.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:36.977\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/sum_sqd.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:36.977\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/sum.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:36.978\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:37:36.978394. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:36.978\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:37.151\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:37.156\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:37.397\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/min for window_size 30d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:37.426\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:40.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:40.670\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:44.303\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:44.303\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:07.152251\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:44.303\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:44.331\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:07.353202\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:44.331\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:44.346\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/sum.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:44.348\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:37:44.347922. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:44.348\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:44.528\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:44.534\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:44.692\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/count for window_size 365d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:44.722\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:45.290\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:45.316\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:45.892\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:45.892\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:01.364243\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:45.892\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:45.913\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:01.565790\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:45.913\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:45.914\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/max.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:45.915\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:37:45.915650. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:45.916\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:46.091\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:46.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:46.251\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/min for window_size 365d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:46.274\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:48.009\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:48.033\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:49.929\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:49.929\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:03.837372\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:49.929\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:49.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:04.034930\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:49.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:49.952\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:37:49.952175. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:49.952\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:50.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:50.131\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:50.533\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum for window_size 365d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:50.606\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:54.948\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:55.005\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:59.493\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:59.493\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:09.367494\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:59.493\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:59.515\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:09.563392\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:59.515\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:59.517\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:37:59.516977. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:59.517\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:59.694\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:37:59.699\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:00.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/count for window_size 365d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:00.132\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:02.200\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:02.261\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:04.559\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:04.559\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:04.865373\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:04.559\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:04.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:05.068211\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:04.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:04.587\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:38:04.586943. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:04.587\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:04.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:04.763\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:05.186\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation code/count for window_size 1d, with 7033 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:05.263\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:08.655\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:08.672\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:12.560\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:12.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:07.802511\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:12.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:13.270\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:08.683355\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:13.270\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:13.272\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:38:13.272684. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:13.273\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:13.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:13.450\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:13.881\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/max for window_size 365d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:13.953\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:16.681\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:16.681\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:48.293227\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:16.681\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:20.970\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:21.022\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:28.170\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:28.171\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:14.725988\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:28.171\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:28.193\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:14.920565\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:28.193\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:28.194\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m223\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/code/count.npz is in progress as of 2024-12-14 23:37:28.215930. Returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:28.195\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:38:28.195569. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:28.195\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:28.369\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:28.374\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:28.752\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/count for window_size 30d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:28.818\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:30.878\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:30.934\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:33.079\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:33.079\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:04.710689\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:33.080\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:33.103\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:04.907889\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:33.103\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:33.104\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/max.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:33.105\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/code/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:33.106\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:38:33.106063. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:33.106\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:33.281\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:33.287\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:33.703\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/count for window_size 1d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:33.787\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:35.846\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:35.862\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:38.072\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:38.072\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:04.791070\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:38.072\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:38.095\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:04.989776\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:38.096\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:38.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:38:38.097366. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:38.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:38.300\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:38.306\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:38.709\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/max for window_size 1d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:38.778\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.008\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:01:12.792611\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.008\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.014\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:38:41.013714. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.014\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.252\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.260\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.403\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum for window_size 365d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:41.429\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:43.136\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:43.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:44.490\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:44.491\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:03.238070\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:44.491\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:44.539\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:03.526197\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:44.540\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:44.541\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:38:44.541737. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:44.542\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:44.732\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:44.741\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:44.969\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum_sqd for window_size 30d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:45.008\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:46.522\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:46.541\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:47.975\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:48.009\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:51.121\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:51.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:06.389568\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:51.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:51.149\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:06.607538\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:51.149\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:51.150\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/min.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:51.151\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/code/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:51.152\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/min.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:51.152\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/sum_sqd.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:51.153\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/code/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:51.154\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:38:51.153823. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:51.154\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:51.337\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:51.343\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:51.502\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum_sqd for window_size 30d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:51.535\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:53.135\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:53.153\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:53.923\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:53.924\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:15.624057\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:53.924\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:53.954\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:15.857346\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:53.954\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:53.956\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m223\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/sum_sqd.npz is in progress as of 2024-12-14 23:38:51.153823. Returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:53.957\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/min.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:53.957\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/sum_sqd.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:53.957\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/sum_sqd.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:53.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:53.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/min.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:53.959\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/max.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:53.960\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:38:53.960132. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:53.960\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:54.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:54.143\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:54.567\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum_sqd for window_size 365d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:54.655\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:54.856\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:54.856\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:03.519015\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:54.856\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:54.889\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:03.735879\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:54.889\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:54.891\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:38:54.891516. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:54.892\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:55.117\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:55.124\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:55.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/max for window_size 365d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:55.447\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:59.470\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:38:59.500\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:01.096\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:01.191\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:03.251\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:03.252\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:08.134201\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:03.252\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:03.292\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:08.401235\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:03.293\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:03.297\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:03.296932. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:03.297\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:03.594\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:03.603\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:03.729\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/count for window_size 30d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:03.755\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:04.441\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:04.460\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:05.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:05.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:01.475806\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:05.070\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:05.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:01.800463\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:05.097\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:05.098\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:05.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:05.099902. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:05.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:05.331\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:05.338\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:05.617\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation code/count for window_size 30d, with 7033 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:05.651\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:07.419\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:07.453\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:08.069\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:08.069\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:13.931749\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:08.069\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:08.102\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:14.142205\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:08.102\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:08.105\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:08.106\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m223\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/code/count.npz is in progress as of 2024-12-14 23:39:05.099902. Returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:08.107\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:08.107135. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:08.107\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:08.296\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:08.302\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:08.470\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum for window_size 30d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:08.493\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:09.734\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:09.755\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:05.805817\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.282\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.282\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:02.985521\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.282\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.314\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:03.207556\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.314\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.316\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:11.316329. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.316\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.519\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.525\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:06.802081\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/max.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/code/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/max.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.906\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/sum.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.906\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/code/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.907\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.907\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.908\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/sum_sqd.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.908\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/min.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.909\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/min.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.909\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.910\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/sum.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.910\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/max.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.911\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/min.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.911\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/sum_sqd.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.912\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/sum.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.912\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/sum.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.913\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:11.913227. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.913\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:11.962\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/sum_sqd for window_size 1d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:12.025\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:12.132\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:12.140\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_ts_features\u001b[0m:\u001b[36msummarize_dynamic_measurements\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mGenerating Sparse matrix for Time Series Features\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:12.318\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mgenerate_summary\u001b[0m:\u001b[36m206\u001b[0m - \u001b[1mGenerating aggregation value/max for window_size 365d, with 1170 columns.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:12.343\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mStep 1.5: Running sparse aggregation.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:14.747\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:14.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:16.652\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:16.653\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:04.520141\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:16.653\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:16.683\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:04.770536\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:16.684\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:16.685\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:16.685\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/sum_sqd.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:16.686\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/code/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:16.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m223\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/sum_sqd.npz is in progress as of 2024-12-14 23:39:11.316329. Returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:16.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/min.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:16.688\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:18.849\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mStep 2: computing rolling windows and aggregating.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:18.868\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.generate_summarized_reps\u001b[0m:\u001b[36mcompute_agg\u001b[0m:\u001b[36m162\u001b[0m - \u001b[1mStarting final sparse aggregations.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:24.825\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.tabularize_time_series\u001b[0m:\u001b[36mcompute_fn\u001b[0m:\u001b[36m121\u001b[0m - \u001b[1mWriting pivot file\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:24.825\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:13.305698\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:24.826\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:24.852\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:13.536404\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:24.852\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:24.854\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/count.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:24.854\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/max.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:24.855\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/min.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:24.855\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/sum.npz exists; reading directly and returning.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:24.856\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m212\u001b[0m - \u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/max.npz exists; reading directly and returning.\u001b[0m\n" ] } ], @@ -1283,7 +1343,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 8, "metadata": { "id": "NmaR_-Fik4eH" }, @@ -1292,9 +1352,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[32m2024-12-14 19:31:32.691\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m446\u001b[0m - \u001b[1mRunning cache_task with the following configuration:\n", - "input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", - "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "\u001b[32m2024-12-14 23:39:26.960\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m446\u001b[0m - \u001b[1mRunning cache_task with the following configuration:\n", + "input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", "do_overwrite: false\n", "seed: 1\n", "tqdm: false\n", @@ -1325,15 +1385,15 @@ " _resolved_codes: ${filter_to_codes:${tabularization.filtered_code_metadata_fp},${tabularization.allowed_codes},${tabularization.min_code_inclusion_count},${tabularization.min_code_inclusion_frequency},${tabularization.max_included_codes}}\n", "task_name: mortality/in_icu/first_24h\n", "input_tabularized_dir: ${output_dir}/tabularize\n", - "input_label_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//task_labels/mortality/in_icu/first_24h/\n", + "input_label_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//task_labels/mortality/in_icu/first_24h/\n", "output_tabularized_cache_dir: ${output_dir}/${task_name}/task_cache\n", "output_label_cache_dir: ${output_dir}/${task_name}/labels\n", "label_column: boolean_value\n", "name: task_specific_caching\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:32.693\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m467\u001b[0m - \u001b[34m\u001b[1mStage config:\n", - "input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", - "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "\u001b[32m2024-12-14 23:39:26.963\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m467\u001b[0m - \u001b[34m\u001b[1mStage config:\n", + "input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", "do_overwrite: false\n", "seed: 1\n", "tqdm: false\n", @@ -1364,478 +1424,478 @@ " _resolved_codes: ${filter_to_codes:${tabularization.filtered_code_metadata_fp},${tabularization.allowed_codes},${tabularization.min_code_inclusion_count},${tabularization.min_code_inclusion_frequency},${tabularization.max_included_codes}}\n", "task_name: mortality/in_icu/first_24h\n", "input_tabularized_dir: ${output_dir}/tabularize\n", - "input_label_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//task_labels/mortality/in_icu/first_24h/\n", + "input_label_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//task_labels/mortality/in_icu/first_24h/\n", "output_tabularized_cache_dir: ${output_dir}/${task_name}/task_cache\n", "output_label_cache_dir: ${output_dir}/${task_name}/labels\n", "label_column: boolean_value\n", "name: task_specific_caching\n", "\n", "Paths: (checkbox indicates if it exists)\n", - " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", - " - input_label_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/task_labels/mortality/in_icu/first_24h\n", - " - input_tabularized_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize\n", - " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized\n", - " - tabularization.filtered_code_metadata_fp: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\u001b[0m\n", - "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_tabular_automl/describe_codes.py:63: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", + " - input_label_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/task_labels/mortality/in_icu/first_24h\n", + " - input_tabularized_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize\n", + " - output_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized\n", + " - tabularization.filtered_code_metadata_fp: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\u001b[0m\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_tabular_automl/describe_codes.py:63: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", " if not df.columns == [\"code\", \"count\"]:\n", - "\u001b[32m2024-12-14 19:31:32.718\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:32.718444. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:32.720\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/max.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:32.720\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mExtracting labels for /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", - "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/MEDS_tabular_automl/scripts/cache_task.py:138: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", + "\u001b[32m2024-12-14 23:39:26.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:26.993553. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:26.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:26.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mExtracting labels for /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_tabular_automl/scripts/cache_task.py:138: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", " if \"numeric_value\" not in pl.scan_parquet(meds_data_fp).columns:\n", - "\u001b[32m2024-12-14 19:31:33.117\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.121\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.004008\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.121\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/value/max.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.403557\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.122\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/value/.max_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.124\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:33.123828. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.124\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/count.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.124\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mExtracting labels for /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.311\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.312\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000829\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.312\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/value/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.312\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.189162\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.313\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/value/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.314\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:33.314377. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.314\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/max.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.314\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000467\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/value/max.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.316\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001786\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.316\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/value/.max_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.317\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:33.317717. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.318\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/count.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.318\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mExtracting labels for /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.598\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.599\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000750\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.599\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/value/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.599\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.281862\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.599\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/value/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.601\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:33.600910. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.601\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/sum_sqd.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.601\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.601\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.602\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000506\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.602\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/value/sum_sqd.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.602\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001858\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.602\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/value/.sum_sqd_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.604\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:33.604087. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.604\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/max.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.604\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.604\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.605\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000514\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.605\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/value/max.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.605\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001834\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.605\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/value/.max_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.607\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:33.607287. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.607\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/count.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.607\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.608\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.608\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000479\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.608\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/value/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.608\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001616\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.608\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/value/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.610\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:33.610091. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.610\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/none/static/present.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.610\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.611\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.612\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000564\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.612\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/none/static/present.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.612\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002447\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.612\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/none/static/.present_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.613\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:33.613706. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.613\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/code/count.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.614\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.803\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.092937\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/code/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.897\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.283911\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.897\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/code/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:33.899028. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/code/count.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.933\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.957\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.024107\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/code/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.059743\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/code/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.960\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:33.960194. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.960\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/code/count.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:33.960\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.143\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.240\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.096358\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.240\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/code/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.240\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.280730\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.240\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/code/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.242\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.242218. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.242\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/sum.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.242\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.243\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.243\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000614\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.244\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/value/sum.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.244\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002073\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.244\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/value/.sum_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.245\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.245544. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.245\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/count.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.245\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.246\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.247\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000766\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.247\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/value/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.247\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002275\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.247\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/value/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.249\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.249117. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.249\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/sum_sqd.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.249\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.250\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.250\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000515\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.250\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/value/sum_sqd.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.251\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001884\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.251\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/value/.sum_sqd_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.252\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.252423. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.252\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/min.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.252\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.253\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.253\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000494\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.253\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/value/min.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.254\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001767\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.254\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/value/.min_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.255\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.255409. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.255\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/sum.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.255\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.256\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.256\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000610\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.256\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/value/sum.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.257\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001738\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.257\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/value/.sum_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.258\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.258242. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.258\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/max.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.258\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.258\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.259\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000440\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.259\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/value/max.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.259\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001533\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.259\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/value/.max_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.261\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.261017. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.261\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/code/count.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.261\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.378\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.429\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.051062\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.429\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/code/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.430\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.169042\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.430\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/code/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.431\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.431334. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.431\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/max.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.431\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.432\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.433\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000719\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.433\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/value/max.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.433\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002271\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.433\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/value/.max_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.435\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.435039. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.435\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/sum_sqd.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.435\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000529\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.436\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/value/sum_sqd.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.437\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001961\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.437\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/value/.sum_sqd_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.438258. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/count.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000549\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/value/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.440\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001836\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.440\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/value/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.441\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.441675. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.441\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/min.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.441\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.442\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.443\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000480\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.443\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/value/min.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.443\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001698\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.443\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/value/.min_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.444\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.444502. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.444\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/sum.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.444\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000432\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/value/sum.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001475\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.446\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/value/.sum_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.447\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.447331. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.447\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/count.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.447\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.448\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.449\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000560\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.449\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/value/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.449\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002037\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.449\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/value/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.450\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.450709. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/sum_sqd.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.452\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000461\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.452\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/value/sum_sqd.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.452\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001706\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.452\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/value/.sum_sqd_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.453\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.453614. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.453\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/sum_sqd.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.453\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000431\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/value/sum_sqd.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001619\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/value/.sum_sqd_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.456\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.456354. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.456\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/min.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.456\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.457\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.457\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000500\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.457\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/value/min.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.458\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001761\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.458\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/value/.min_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.476\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.476393. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.476\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/count.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.476\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.477\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.478\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000694\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.478\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/value/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.478\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002162\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.478\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/value/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.479\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.479821. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.480\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/sum_sqd.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.480\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.480\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000443\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/value/sum_sqd.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001582\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/value/.sum_sqd_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.482\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.482562. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.482\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/min.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.482\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000450\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/value/min.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.484\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001509\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.484\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/value/.min_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.485213. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/sum.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.486\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.486\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000747\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.487\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/value/sum.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.487\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002087\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.487\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/value/.sum_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.488491. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/code/count.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.570\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.610\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.039900\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.610\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/code/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.611\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.123018\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.611\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/code/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.613\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.612903. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.613\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/sum.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.613\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.613\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.614\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000530\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.614\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/value/sum.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.614\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001848\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.614\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/value/.sum_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.616\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.616112. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.616\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/sum_sqd.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.616\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.617\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.617\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000553\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.617\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/value/sum_sqd.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.617\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001867\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.618\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/value/.sum_sqd_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.619\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.619236. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.619\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/min.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.619\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.619\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.620\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000449\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.620\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/value/min.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.620\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001561\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.620\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/value/.min_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.622\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.621973. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.622\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/sum_sqd.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.622\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.622\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.623\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000444\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.623\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/value/sum_sqd.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.623\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001567\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.623\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/value/.sum_sqd_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.624\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.624635. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.624\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/sum.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.624\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.625\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.625\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000472\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.625\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/value/sum.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.626\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001622\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.626\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/value/.sum_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.627\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.627487. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.627\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/max.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.627\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.628\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.628\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000565\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.628\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/value/max.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.629\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001945\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.629\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/value/.max_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.631\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:34.631092. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.631\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/code/count.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.631\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:34.975\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.107\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.132318\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.108\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/code/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.109\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.478014\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.109\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/code/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.110\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.110579. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.110\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/max.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.111\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.111\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.112\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000835\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.112\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/value/max.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.112\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002405\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.113\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/value/.max_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.114\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.114273. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.114\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/min.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.114\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.115\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000872\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/value/min.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.117\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002747\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.117\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/value/.min_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.118\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.118304. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.118\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/sum_sqd.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.118\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.119\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.119\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000667\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.119\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/value/sum_sqd.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.120\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001921\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.120\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/value/.sum_sqd_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.121\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.121639. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.121\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/none/static/present.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.121\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.124\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001168\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/none/static/present.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.004340\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.126\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/none/static/.present_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.127\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.127226. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.127\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/min.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.127\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.127\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.128\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000535\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.128\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/value/min.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.128\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001667\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.128\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/value/.min_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.130\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.130180. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.130\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/max.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.130\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.131\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.131\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000638\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.131\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/value/max.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.132\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001985\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.132\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/value/.max_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.133\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.133366. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.133\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/sum.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.133\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.134\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.134\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000595\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.134\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/value/sum.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.135\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001750\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.135\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/value/.sum_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.136\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.136312. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.136\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/min.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.136\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000587\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/value/min.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.138\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001763\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.138\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/value/.min_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.139\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.139237. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.139\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/sum.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.139\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.140\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.140\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000552\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.140\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/value/sum.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.141\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001845\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.141\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/value/.sum_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.142\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.142795. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.143\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/none/static/present.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.143\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.145\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000699\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.145\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/none/static/present.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.003298\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/none/static/.present_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.147\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.147754. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.148\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/code/count.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.148\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.593\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.739\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.145727\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.739\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/code/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.740\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.593151\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.740\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/code/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.742\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.742465. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.742\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/code/count.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.742\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.810\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.840\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.030237\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.840\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/code/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.841\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.099197\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.841\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/code/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.843\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.843082. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.843\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/code/count.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.843\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.931\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.977\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.045663\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.977\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/code/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.978\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.135366\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.978\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/code/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.980\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.979866. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.980\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/min.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.980\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.980\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.981\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000733\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.981\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/value/min.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.982\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002257\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.982\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/value/.min_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.983223. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/max.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.985\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000639\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.985\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/value/max.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.985\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002292\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.985\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/value/.max_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.987\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.986869. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.987\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/sum.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.987\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.987\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000508\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/value/sum.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001896\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/value/.sum_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.989928. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/count.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.990\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.991\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000468\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.991\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/value/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.991\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001743\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.991\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/value/.count_cache\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 19:31:35.992953. Double checking no earlier locks have been registered.\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/count.npz'))\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.994\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000601\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.994\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/value/count.npz\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.994\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.001857\u001b[0m\n", - "\u001b[32m2024-12-14 19:31:35.994\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/value/.count_cache\u001b[0m\n" + "\u001b[32m2024-12-14 23:39:27.335\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:27.337\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.002343\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:27.337\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:27.338\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.345167\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:27.338\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:27.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:27.342114. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:27.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/min.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:27.343\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mExtracting labels for /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:27.835\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:27.837\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001631\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:27.837\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:27.838\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.496617\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:27.838\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:27.841\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:27.841166. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:27.841\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/sum_sqd.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:27.841\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1mExtracting labels for /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.220\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.222\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001386\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.222\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.382032\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.225\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:28.225263. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.225\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/sum.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.225\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.226\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.227\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000975\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.228\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.228\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.003305\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.228\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.230\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:28.230745. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.231\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/sum.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.231\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.231\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.233\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001024\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.233\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.233\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002947\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.233\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.235\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:28.235757. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.236\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/code/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.236\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.305\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.389\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.083393\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.389\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.390\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.155225\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.391\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.393\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:28.393217. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.393\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/sum.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.393\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001306\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.003403\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:28.398520. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/sum.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.399\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.399\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.400\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001155\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.401\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.401\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002950\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.401\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.403\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:28.403343. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.403\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/code/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.403\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:28.936\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:29.226\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.289836\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:29.226\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:29.227\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.824596\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:29.228\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:29.230\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:29.230391. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:29.230\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/min.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:29.230\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:29.231\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:29.233\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001886\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:29.233\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:29.234\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.003879\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:29.234\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:29.236\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:29.236283. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:29.236\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/sum.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:29.236\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:29.237\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:29.239\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001484\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:29.239\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:29.239\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.003431\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:29.239\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:29.243\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:29.243697. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:29.244\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:29.244\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:29.245\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:29.246\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001482\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:29.246\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:29.247\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.003553\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:29.247\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:29.249\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:29.249181. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:29.249\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/code/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:29.249\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:30.826\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.046\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:07.219049\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.047\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.052\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:08.803199\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.052\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:38.060894. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.063\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/sum_sqd.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.063\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.066\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.069\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.002469\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.069\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.075\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.014771\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.076\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:38.082951. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.084\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/none/static/present.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.084\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.087\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.091\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.002698\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.091\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/none/static/present.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.098\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.015064\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.098\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/none/static/.present_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.106\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:38.105527. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.106\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.107\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.109\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.112\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.002776\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.112\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.113\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.008050\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.113\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.130\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:38.129700. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.130\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/sum_sqd.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.131\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.132\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.135\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.002690\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.135\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.007695\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.143\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:38.143204. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/max.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.144\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.150\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.003418\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.150\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.151\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.008666\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.152\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.159\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:38.159340. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.160\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/code/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.160\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.446\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.623\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.177140\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.624\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.625\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.465708\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.625\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.627\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:38.626971. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.627\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/sum_sqd.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.627\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.628\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.629\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001391\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.629\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.630\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.003174\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.630\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.632\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:38.631988. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.632\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/none/static/present.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.632\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.633\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001785\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/none/static/present.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.636\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.004460\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.636\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/none/static/.present_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.639\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:38.639065. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.639\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/min.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.639\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.640\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.641\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001102\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.641\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.641\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002739\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.641\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.643\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:38.643611. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.644\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.644\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.644\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.645\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.000925\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.645\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.646\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002595\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.646\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.648\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:38.647962. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.648\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/code/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.648\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.834\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.059569\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.897\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.249099\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.897\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:38.902941. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.908\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.002128\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.908\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.909\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.006845\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.910\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.915\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:38.915177. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.916\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/sum_sqd.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.916\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.917\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.919\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001687\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.919\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.920\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.004850\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.920\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.923\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:38.923040. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.923\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.923\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.924\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.926\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001346\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.926\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.927\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.003959\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.927\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.972\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:38.972017. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.972\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/code/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:38.972\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.325\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.873\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.548147\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.874\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.877\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:01.905807\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.877\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.883\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:40.882903. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.883\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/sum.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.884\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.885\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.887\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001642\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.887\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.888\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.005157\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.888\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.890\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:40.890371. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.890\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/none/static/present.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.891\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.892\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001982\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/none/static/present.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.004542\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.895\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/none/static/.present_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.897\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:40.896869. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.897\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.897\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.898\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001231\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.900\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.003179\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.900\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:40.901904. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/min.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.902\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001422\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.003453\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.905\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.907\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:40.907496. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.907\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/code/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:40.908\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.156\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.268\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.112351\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.269\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.270\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.362816\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.270\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.272\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:41.272574. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.273\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/sum_sqd.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.273\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.274\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.275\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001615\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.275\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.276\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.003864\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.276\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.278\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:41.278357. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.278\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/min.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.279\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.279\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.281\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001471\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.281\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.281\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.003590\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.282\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.284\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:41.283858. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.284\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/max.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.284\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.285\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.286\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001656\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.287\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.287\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.003683\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.287\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.289\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:41.289534. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.290\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/max.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.290\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.290\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.292\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001557\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.292\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.293\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.003593\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.293\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.295\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:41.295009. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.295\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/min.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.295\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.296\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.298\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.002391\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.300\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.005415\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.300\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.306\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:41.306290. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.307\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/value/sum.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.307\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.308\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.310\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001779\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.310\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.311\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.005244\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.311\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:41.314935. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/max.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.315\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.316\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.318\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001311\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.318\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.318\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.003727\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.318\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.320\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:41.320510. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.320\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/value/max.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.323\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001251\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.323\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.323\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.003103\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.323\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.325\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:41.325359. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.325\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/30d/value/sum_sqd.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.325\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.326\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.328\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001410\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.328\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.328\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.003351\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.328\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/30d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.330\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:41.330514. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.330\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/30d/value/max.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.331\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.331\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.333\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001221\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.333\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.333\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.003044\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.333\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/30d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.335\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:41.335482. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.335\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/sum_sqd.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.336\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.336\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.338\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001369\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.338\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.338\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.003274\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.338\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.341\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:41.340848. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.341\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/max.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.341\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.343\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001194\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.343\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.343\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.003024\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.343\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.345\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:41.345767. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.346\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/365d/code/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.346\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.617\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.744\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.126122\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.744\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.745\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.399807\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.745\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/365d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.748\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:41.747815. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.748\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.748\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.749\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.751\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001731\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.751\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.751\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.003927\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.751\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.753\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:41.753707. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.754\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/tuning/0/1d/code/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.754\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/tuning/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.896\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.061608\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.958\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/code/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.959\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.205862\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.959\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/tuning/0/1d/code/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.961\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:41.961564. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.962\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/30d/value/min.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.962\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.963\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.964\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001438\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.964\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.965\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.003619\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.965\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/30d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.967\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:41.967066. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.967\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/sum.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.967\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.968\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.969\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001480\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.970\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.970\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.003384\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.970\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.972\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:41.972395. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.972\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/sum.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.972\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.973\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.974\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001221\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.974\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/value/sum.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.975\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.003004\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.975\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/value/.sum_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.977\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:41.977139. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.977\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/1d/value/count.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.977\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.978\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.979\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001151\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.979\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/value/count.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.980\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002931\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.980\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/1d/value/.count_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.981\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:41.981732. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.982\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/min.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.982\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.982\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001168\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002924\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/value/.min_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.986\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:41.986356. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.986\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/365d/value/max.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.986\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.987\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001184\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002876\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.989\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/365d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.991\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:41.990926. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.991\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/train/0/365d/value/sum_sqd.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.991\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/train/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.992\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001179\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/value/sum_sqd.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002904\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.993\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/train/0/365d/value/.sum_sqd_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:41.995565. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.995\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/max.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.996\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.996\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001131\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.997\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/value/max.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.998\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002819\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:41.998\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/value/.max_cache\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:42.000\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m228\u001b[0m - \u001b[1mRegistered lock at 2024-12-14 23:39:42.000189. Double checking no earlier locks have been registered.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:42.000\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m235\u001b[0m - \u001b[1mReading input dataframe from (PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet'), PosixPath('/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/tabularize/held_out/0/1d/value/min.npz'))\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:42.000\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.cache_task\u001b[0m:\u001b[36mread_fn\u001b[0m:\u001b[36m166\u001b[0m - \u001b[1mLabels already exist, reading from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels/held_out/0.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:42.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m237\u001b[0m - \u001b[1mRead dataset\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:42.002\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m259\u001b[0m - \u001b[1mCompleted step 0 in 0:00:00.001223\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:42.002\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m261\u001b[0m - \u001b[1mWriting final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/value/min.npz\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:42.003\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mSucceeded in 0:00:00.002894\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:42.003\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.mapper\u001b[0m:\u001b[36mwrap\u001b[0m:\u001b[36m265\u001b[0m - \u001b[1mClearing cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/task_cache/held_out/0/1d/value/.min_cache\u001b[0m\n" ] } ], @@ -1845,7 +1905,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": { "id": "dLIkOzTblBB2" }, @@ -1854,16 +1914,16 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[32m[I 2024-12-14 19:36:45,225]\u001b[0m A new study created in memory with name: no-name-12520edb-1d03-4285-8296-9a50cda0c57f\u001b[0m\n", - "[2024-12-14 19:36:45,225][HYDRA] Study name: no-name-12520edb-1d03-4285-8296-9a50cda0c57f\n", - "[2024-12-14 19:36:45,225][HYDRA] Storage: None\n", - "[2024-12-14 19:36:45,225][HYDRA] Sampler: TPESampler\n", - "[2024-12-14 19:36:45,225][HYDRA] Directions: ['maximize']\n", - "[2024-12-14 19:36:45,228][HYDRA] Joblib.Parallel(n_jobs=-1,backend=loky,prefer=processes,require=None,verbose=0,timeout=None,pre_dispatch=2*n_jobs,batch_size=auto,temp_folder=None,max_nbytes=None,mmap_mode=r) is launching 2 jobs\n", - "[2024-12-14 19:36:45,228][HYDRA] Launching jobs, sweep output dir : /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/output/meds_tab/mortality/in_icu/first_24h/2024-12-14_19-36-44/hydra\n", - "[2024-12-14 19:36:45,228][HYDRA] \t#0 : +model_launcher.model.eta=0.3778393076431474 +model_launcher.model.lambda=0.02025881019876421 +model_launcher.model.alpha=0.0016680628084537149 +model_launcher.model.subsample=0.6402928897499296 +model_launcher.model.min_child_weight=72.18555499106307 +model_launcher.model.max_depth=13 model_launcher.training_params.num_boost_round=258 model_launcher.training_params.early_stopping_rounds=5 input_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data output_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/ output_model_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/ task_name=mortality/in_icu/first_24h do_overwrite=False tabularization.window_sizes=[1d,30d,365d] tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max] tabularization.min_code_inclusion_count=10\n", - "[2024-12-14 19:36:45,228][HYDRA] \t#1 : +model_launcher.model.eta=0.3174642123234027 +model_launcher.model.lambda=0.1822517129021365 +model_launcher.model.alpha=0.0034557035933266635 +model_launcher.model.subsample=0.6267376438851018 +model_launcher.model.min_child_weight=19.197298182330886 +model_launcher.model.max_depth=13 model_launcher.training_params.num_boost_round=709 model_launcher.training_params.early_stopping_rounds=3 input_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data output_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/ output_model_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/ task_name=mortality/in_icu/first_24h do_overwrite=False tabularization.window_sizes=[1d,30d,365d] tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max] tabularization.min_code_inclusion_count=10\n", - "/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/joblib/externals/loky/backend/fork_exec.py:38: RuntimeWarning: Using fork() can cause Polars to deadlock in the child process.\n", + "\u001b[32m[I 2024-12-14 23:39:44,970]\u001b[0m A new study created in memory with name: no-name-caa18645-481d-4a43-b303-eeb4485e2bbb\u001b[0m\n", + "[2024-12-14 23:39:44,971][HYDRA] Study name: no-name-caa18645-481d-4a43-b303-eeb4485e2bbb\n", + "[2024-12-14 23:39:44,971][HYDRA] Storage: None\n", + "[2024-12-14 23:39:44,971][HYDRA] Sampler: TPESampler\n", + "[2024-12-14 23:39:44,971][HYDRA] Directions: ['maximize']\n", + "[2024-12-14 23:39:44,974][HYDRA] Joblib.Parallel(n_jobs=-1,backend=loky,prefer=processes,require=None,verbose=0,timeout=None,pre_dispatch=2*n_jobs,batch_size=auto,temp_folder=None,max_nbytes=None,mmap_mode=r) is launching 2 jobs\n", + "[2024-12-14 23:39:44,974][HYDRA] Launching jobs, sweep output dir : /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/output/meds_tab/mortality/in_icu/first_24h/2024-12-14_23-39-43/hydra\n", + "[2024-12-14 23:39:44,974][HYDRA] \t#0 : +model_launcher.model.eta=0.22037365138299875 +model_launcher.model.lambda=0.06876318864824583 +model_launcher.model.alpha=0.1407563256868113 +model_launcher.model.subsample=0.9905504065650356 +model_launcher.model.min_child_weight=80.66173743818563 +model_launcher.model.max_depth=15 model_launcher.training_params.num_boost_round=984 model_launcher.training_params.early_stopping_rounds=7 input_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data output_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/ output_model_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/ task_name=mortality/in_icu/first_24h do_overwrite=False tabularization.window_sizes=[1d,30d,365d] tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max] tabularization.min_code_inclusion_count=10\n", + "[2024-12-14 23:39:44,974][HYDRA] \t#1 : +model_launcher.model.eta=0.07281662229132216 +model_launcher.model.lambda=0.44578142602542953 +model_launcher.model.alpha=0.027022316884683026 +model_launcher.model.subsample=0.9102834274171552 +model_launcher.model.min_child_weight=53.387421633372924 +model_launcher.model.max_depth=15 model_launcher.training_params.num_boost_round=967 model_launcher.training_params.early_stopping_rounds=9 input_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data output_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/ output_model_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/ task_name=mortality/in_icu/first_24h do_overwrite=False tabularization.window_sizes=[1d,30d,365d] tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max] tabularization.min_code_inclusion_count=10\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/joblib/externals/loky/backend/fork_exec.py:38: RuntimeWarning: Using fork() can cause Polars to deadlock in the child process.\n", "In addition, using fork() with Python in general is a recipe for mysterious\n", "deadlocks and crashes.\n", "\n", @@ -1877,10 +1937,10 @@ "or by setting POLARS_ALLOW_FORKING_THREAD=1.\n", "\n", " pid = os.fork()\n", - "\u001b[32m2024-12-14 19:36:46.927\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mcurrent_script_name\u001b[0m:\u001b[36m433\u001b[0m - \u001b[33m\u001b[1mCan't find main function in __main__ module. Using sys.argv[0] as a fallback.\u001b[0m\n", - "\u001b[32m2024-12-14 19:36:46.930\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m446\u001b[0m - \u001b[1mRunning meds-tab-model with the following configuration:\n", - "input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", - "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "\u001b[32m2024-12-14 23:39:46.210\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mcurrent_script_name\u001b[0m:\u001b[36m433\u001b[0m - \u001b[33m\u001b[1mCan't find main function in __main__ module. Using sys.argv[0] as a fallback.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:46.213\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m446\u001b[0m - \u001b[1mRunning meds-tab-model with the following configuration:\n", + "input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", "do_overwrite: false\n", "seed: 1\n", "tqdm: false\n", @@ -1939,27 +1999,26 @@ " nthread: 1\n", " tree_method: hist\n", " objective: binary:logistic\n", - " eta: 0.3778393076431474\n", - " lambda: 0.02025881019876421\n", - " alpha: 0.0016680628084537149\n", - " subsample: 0.6402928897499296\n", - " min_child_weight: 72.18555499106307\n", - " max_depth: 13\n", + " eta: 0.22037365138299875\n", + " lambda: 0.06876318864824583\n", + " alpha: 0.1407563256868113\n", + " subsample: 0.9905504065650356\n", + " min_child_weight: 80.66173743818563\n", + " max_depth: 15\n", " training_params:\n", - " num_boost_round: 258\n", - " early_stopping_rounds: 5\n", + " num_boost_round: 984\n", + " early_stopping_rounds: 7\n", "task_name: mortality/in_icu/first_24h\n", "input_tabularized_cache_dir: ${output_dir}/${task_name}/task_cache\n", "input_label_cache_dir: ${output_dir}/${task_name}/labels\n", - "output_model_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/\n", + "output_model_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/\n", "time_output_model_dir: ${output_model_dir}/${now:%Y-%m-%d_%H-%M-%S}\n", "delete_below_top_k: -1\n", "name: launch_model\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 19:36:46.930\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mcurrent_script_name\u001b[0m:\u001b[36m433\u001b[0m - \u001b[33m\u001b[1mCan't find main function in __main__ module. Using sys.argv[0] as a fallback.\u001b[0m\n", - "\u001b[32m2024-12-14 19:36:46.933\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m446\u001b[0m - \u001b[1mRunning meds-tab-model with the following configuration:\n", - "input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", - "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "\u001b[32m2024-12-14 23:39:46.217\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m467\u001b[0m - \u001b[34m\u001b[1mStage config:\n", + "input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", "do_overwrite: false\n", "seed: 1\n", "tqdm: false\n", @@ -2018,26 +2077,110 @@ " nthread: 1\n", " tree_method: hist\n", " objective: binary:logistic\n", - " eta: 0.3174642123234027\n", - " lambda: 0.1822517129021365\n", - " alpha: 0.0034557035933266635\n", - " subsample: 0.6267376438851018\n", - " min_child_weight: 19.197298182330886\n", - " max_depth: 13\n", + " eta: 0.22037365138299875\n", + " lambda: 0.06876318864824583\n", + " alpha: 0.1407563256868113\n", + " subsample: 0.9905504065650356\n", + " min_child_weight: 80.66173743818563\n", + " max_depth: 15\n", " training_params:\n", - " num_boost_round: 709\n", - " early_stopping_rounds: 3\n", + " num_boost_round: 984\n", + " early_stopping_rounds: 7\n", + "task_name: mortality/in_icu/first_24h\n", + "input_tabularized_cache_dir: ${output_dir}/${task_name}/task_cache\n", + "input_label_cache_dir: ${output_dir}/${task_name}/labels\n", + "output_model_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/\n", + "time_output_model_dir: ${output_model_dir}/${now:%Y-%m-%d_%H-%M-%S}\n", + "delete_below_top_k: -1\n", + "name: launch_model\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", + " - input_label_cache_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels\n", + " - output_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized\n", + " - tabularization.filtered_code_metadata_fp: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:46.233\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mcurrent_script_name\u001b[0m:\u001b[36m433\u001b[0m - \u001b[33m\u001b[1mCan't find main function in __main__ module. Using sys.argv[0] as a fallback.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:46.237\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m446\u001b[0m - \u001b[1mRunning meds-tab-model with the following configuration:\n", + "input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "do_overwrite: false\n", + "seed: 1\n", + "tqdm: false\n", + "worker: 0\n", + "loguru_init: false\n", + "log_dir: ${output_dir}/.logs/\n", + "cache_dir: ${output_dir}/.cache\n", + "tabularization:\n", + " filtered_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + " allowed_codes: null\n", + " min_code_inclusion_count: 10\n", + " min_code_inclusion_frequency: null\n", + " min_correlation: null\n", + " max_by_correlation: null\n", + " max_included_codes: null\n", + " window_sizes:\n", + " - 1d\n", + " - 30d\n", + " - 365d\n", + " aggs:\n", + " - static/present\n", + " - code/count\n", + " - value/count\n", + " - value/sum\n", + " - value/sum_sqd\n", + " - value/min\n", + " - value/max\n", + " _resolved_codes: ${filter_to_codes:${tabularization.filtered_code_metadata_fp},${tabularization.allowed_codes},${tabularization.min_code_inclusion_count},${tabularization.min_code_inclusion_frequency},${tabularization.max_included_codes}}\n", + "path:\n", + " input_tabularized_cache_dir: ${input_tabularized_cache_dir}\n", + " input_label_cache_dir: ${input_label_cache_dir}\n", + " model_file_stem: xgboost\n", + " model_file_extension: .json\n", + " cache_dir: ${cache_dir}\n", + " sweep_results_dir: ${time_output_model_dir}/sweep_results/\n", + " best_trial_dir: ${time_output_model_dir}/best_trial/\n", + " performance_log_stem: performance\n", + " config_log_stem: config\n", + "data_processing_params:\n", + " imputer:\n", + " imputer_target: null\n", + " normalization:\n", + " normalizer: null\n", + "data_loading_params:\n", + " keep_data_in_memory: true\n", + " binarize_task: true\n", + "model_launcher:\n", + " path: ${path}\n", + " data_processing_params: ${data_processing_params}\n", + " data_loading_params: ${data_loading_params}\n", + " tabularization: ${tabularization}\n", + " _target_: MEDS_tabular_automl.xgboost_model.XGBoostModel.initialize\n", + " model:\n", + " booster: gbtree\n", + " device: cpu\n", + " nthread: 1\n", + " tree_method: hist\n", + " objective: binary:logistic\n", + " eta: 0.07281662229132216\n", + " lambda: 0.44578142602542953\n", + " alpha: 0.027022316884683026\n", + " subsample: 0.9102834274171552\n", + " min_child_weight: 53.387421633372924\n", + " max_depth: 15\n", + " training_params:\n", + " num_boost_round: 967\n", + " early_stopping_rounds: 9\n", "task_name: mortality/in_icu/first_24h\n", "input_tabularized_cache_dir: ${output_dir}/${task_name}/task_cache\n", "input_label_cache_dir: ${output_dir}/${task_name}/labels\n", - "output_model_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/\n", + "output_model_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/\n", "time_output_model_dir: ${output_model_dir}/${now:%Y-%m-%d_%H-%M-%S}\n", "delete_below_top_k: -1\n", "name: launch_model\n", "\u001b[0m\n", - "\u001b[32m2024-12-14 19:36:46.933\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m467\u001b[0m - \u001b[34m\u001b[1mStage config:\n", - "input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", - "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "\u001b[32m2024-12-14 23:39:46.241\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m467\u001b[0m - \u001b[34m\u001b[1mStage config:\n", + "input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", "do_overwrite: false\n", "seed: 1\n", "tqdm: false\n", @@ -2096,31 +2239,121 @@ " nthread: 1\n", " tree_method: hist\n", " objective: binary:logistic\n", - " eta: 0.3778393076431474\n", - " lambda: 0.02025881019876421\n", - " alpha: 0.0016680628084537149\n", - " subsample: 0.6402928897499296\n", - " min_child_weight: 72.18555499106307\n", - " max_depth: 13\n", + " eta: 0.07281662229132216\n", + " lambda: 0.44578142602542953\n", + " alpha: 0.027022316884683026\n", + " subsample: 0.9102834274171552\n", + " min_child_weight: 53.387421633372924\n", + " max_depth: 15\n", " training_params:\n", - " num_boost_round: 258\n", - " early_stopping_rounds: 5\n", + " num_boost_round: 967\n", + " early_stopping_rounds: 9\n", "task_name: mortality/in_icu/first_24h\n", "input_tabularized_cache_dir: ${output_dir}/${task_name}/task_cache\n", "input_label_cache_dir: ${output_dir}/${task_name}/labels\n", - "output_model_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/\n", + "output_model_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/\n", "time_output_model_dir: ${output_model_dir}/${now:%Y-%m-%d_%H-%M-%S}\n", "delete_below_top_k: -1\n", "name: launch_model\n", "\n", "Paths: (checkbox indicates if it exists)\n", - " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", - " - input_label_cache_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels\n", - " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized\n", - " - tabularization.filtered_code_metadata_fp: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\u001b[0m\n", - "\u001b[32m2024-12-14 19:36:46.937\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m467\u001b[0m - \u001b[34m\u001b[1mStage config:\n", - "input_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", - "output_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", + " - input_label_cache_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels\n", + " - output_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized\n", + " - tabularization.filtered_code_metadata_fp: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\u001b[0m\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_tabular_automl/describe_codes.py:63: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", + " if not df.columns == [\"code\", \"count\"]:\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_tabular_automl/describe_codes.py:63: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", + " if not df.columns == [\"code\", \"count\"]:\n", + "\u001b[32m2024-12-14 23:39:48.629\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.launch_model\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m70\u001b[0m - \u001b[34m\u001b[1mModel config and performance logged to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/output/meds_tab/mortality/in_icu/first_24h/2024-12-14_23-39-43/sweep_results/4399271614519315579/config.log and /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/output/meds_tab/mortality/in_icu/first_24h/2024-12-14_23-39-43/sweep_results/4399271614519315579/performance.log\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:48.643\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.launch_model\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m70\u001b[0m - \u001b[34m\u001b[1mModel config and performance logged to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/output/meds_tab/mortality/in_icu/first_24h/2024-12-14_23-39-43/sweep_results/7990023231025888822/config.log and /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/output/meds_tab/mortality/in_icu/first_24h/2024-12-14_23-39-43/sweep_results/7990023231025888822/performance.log\u001b[0m\n", + "[2024-12-14 23:39:48,800][HYDRA] Joblib.Parallel(n_jobs=-1,backend=loky,prefer=processes,require=None,verbose=0,timeout=None,pre_dispatch=2*n_jobs,batch_size=auto,temp_folder=None,max_nbytes=None,mmap_mode=r) is launching 2 jobs\n", + "[2024-12-14 23:39:48,800][HYDRA] Launching jobs, sweep output dir : /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/output/meds_tab/mortality/in_icu/first_24h/2024-12-14_23-39-43/hydra\n", + "[2024-12-14 23:39:48,800][HYDRA] \t#0 : +model_launcher.model.eta=0.01142217933288686 +model_launcher.model.lambda=0.003637888220850399 +model_launcher.model.alpha=0.4118676342582917 +model_launcher.model.subsample=0.8132771225539538 +model_launcher.model.min_child_weight=42.3261225166682 +model_launcher.model.max_depth=13 model_launcher.training_params.num_boost_round=564 model_launcher.training_params.early_stopping_rounds=1 input_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data output_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/ output_model_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/ task_name=mortality/in_icu/first_24h do_overwrite=False tabularization.window_sizes=[1d,30d,365d] tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max] tabularization.min_code_inclusion_count=10\n", + "[2024-12-14 23:39:48,800][HYDRA] \t#1 : +model_launcher.model.eta=0.02696421201974344 +model_launcher.model.lambda=0.747916008024172 +model_launcher.model.alpha=0.0027585603023633004 +model_launcher.model.subsample=0.8513376072017109 +model_launcher.model.min_child_weight=28.223985720385308 +model_launcher.model.max_depth=12 model_launcher.training_params.num_boost_round=214 model_launcher.training_params.early_stopping_rounds=3 input_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data output_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/ output_model_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/ task_name=mortality/in_icu/first_24h do_overwrite=False tabularization.window_sizes=[1d,30d,365d] tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max] tabularization.min_code_inclusion_count=10\n", + "\u001b[32m2024-12-14 23:39:49.331\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mcurrent_script_name\u001b[0m:\u001b[36m433\u001b[0m - \u001b[33m\u001b[1mCan't find main function in __main__ module. Using sys.argv[0] as a fallback.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:49.331\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mcurrent_script_name\u001b[0m:\u001b[36m433\u001b[0m - \u001b[33m\u001b[1mCan't find main function in __main__ module. Using sys.argv[0] as a fallback.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:49.334\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m446\u001b[0m - \u001b[1mRunning meds-tab-model with the following configuration:\n", + "input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "do_overwrite: false\n", + "seed: 1\n", + "tqdm: false\n", + "worker: 0\n", + "loguru_init: false\n", + "log_dir: ${output_dir}/.logs/\n", + "cache_dir: ${output_dir}/.cache\n", + "tabularization:\n", + " filtered_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + " allowed_codes: null\n", + " min_code_inclusion_count: 10\n", + " min_code_inclusion_frequency: null\n", + " min_correlation: null\n", + " max_by_correlation: null\n", + " max_included_codes: null\n", + " window_sizes:\n", + " - 1d\n", + " - 30d\n", + " - 365d\n", + " aggs:\n", + " - static/present\n", + " - code/count\n", + " - value/count\n", + " - value/sum\n", + " - value/sum_sqd\n", + " - value/min\n", + " - value/max\n", + " _resolved_codes: ${filter_to_codes:${tabularization.filtered_code_metadata_fp},${tabularization.allowed_codes},${tabularization.min_code_inclusion_count},${tabularization.min_code_inclusion_frequency},${tabularization.max_included_codes}}\n", + "path:\n", + " input_tabularized_cache_dir: ${input_tabularized_cache_dir}\n", + " input_label_cache_dir: ${input_label_cache_dir}\n", + " model_file_stem: xgboost\n", + " model_file_extension: .json\n", + " cache_dir: ${cache_dir}\n", + " sweep_results_dir: ${time_output_model_dir}/sweep_results/\n", + " best_trial_dir: ${time_output_model_dir}/best_trial/\n", + " performance_log_stem: performance\n", + " config_log_stem: config\n", + "data_processing_params:\n", + " imputer:\n", + " imputer_target: null\n", + " normalization:\n", + " normalizer: null\n", + "data_loading_params:\n", + " keep_data_in_memory: true\n", + " binarize_task: true\n", + "model_launcher:\n", + " path: ${path}\n", + " data_processing_params: ${data_processing_params}\n", + " data_loading_params: ${data_loading_params}\n", + " tabularization: ${tabularization}\n", + " _target_: MEDS_tabular_automl.xgboost_model.XGBoostModel.initialize\n", + " model:\n", + " booster: gbtree\n", + " device: cpu\n", + " nthread: 1\n", + " tree_method: hist\n", + " objective: binary:logistic\n", + " eta: 0.02696421201974344\n", + " lambda: 0.747916008024172\n", + " alpha: 0.0027585603023633004\n", + " subsample: 0.8513376072017109\n", + " min_child_weight: 28.223985720385308\n", + " max_depth: 12\n", + " training_params:\n", + " num_boost_round: 214\n", + " early_stopping_rounds: 3\n", + "task_name: mortality/in_icu/first_24h\n", + "input_tabularized_cache_dir: ${output_dir}/${task_name}/task_cache\n", + "input_label_cache_dir: ${output_dir}/${task_name}/labels\n", + "output_model_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/\n", + "time_output_model_dir: ${output_model_dir}/${now:%Y-%m-%d_%H-%M-%S}\n", + "delete_below_top_k: -1\n", + "name: launch_model\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:49.334\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m446\u001b[0m - \u001b[1mRunning meds-tab-model with the following configuration:\n", + "input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", "do_overwrite: false\n", "seed: 1\n", "tqdm: false\n", @@ -2179,62 +2412,1226 @@ " nthread: 1\n", " tree_method: hist\n", " objective: binary:logistic\n", - " eta: 0.3174642123234027\n", - " lambda: 0.1822517129021365\n", - " alpha: 0.0034557035933266635\n", - " subsample: 0.6267376438851018\n", - " min_child_weight: 19.197298182330886\n", + " eta: 0.01142217933288686\n", + " lambda: 0.003637888220850399\n", + " alpha: 0.4118676342582917\n", + " subsample: 0.8132771225539538\n", + " min_child_weight: 42.3261225166682\n", " max_depth: 13\n", " training_params:\n", - " num_boost_round: 709\n", + " num_boost_round: 564\n", + " early_stopping_rounds: 1\n", + "task_name: mortality/in_icu/first_24h\n", + "input_tabularized_cache_dir: ${output_dir}/${task_name}/task_cache\n", + "input_label_cache_dir: ${output_dir}/${task_name}/labels\n", + "output_model_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/\n", + "time_output_model_dir: ${output_model_dir}/${now:%Y-%m-%d_%H-%M-%S}\n", + "delete_below_top_k: -1\n", + "name: launch_model\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:49.338\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m467\u001b[0m - \u001b[34m\u001b[1mStage config:\n", + "input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "do_overwrite: false\n", + "seed: 1\n", + "tqdm: false\n", + "worker: 0\n", + "loguru_init: false\n", + "log_dir: ${output_dir}/.logs/\n", + "cache_dir: ${output_dir}/.cache\n", + "tabularization:\n", + " filtered_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + " allowed_codes: null\n", + " min_code_inclusion_count: 10\n", + " min_code_inclusion_frequency: null\n", + " min_correlation: null\n", + " max_by_correlation: null\n", + " max_included_codes: null\n", + " window_sizes:\n", + " - 1d\n", + " - 30d\n", + " - 365d\n", + " aggs:\n", + " - static/present\n", + " - code/count\n", + " - value/count\n", + " - value/sum\n", + " - value/sum_sqd\n", + " - value/min\n", + " - value/max\n", + " _resolved_codes: ${filter_to_codes:${tabularization.filtered_code_metadata_fp},${tabularization.allowed_codes},${tabularization.min_code_inclusion_count},${tabularization.min_code_inclusion_frequency},${tabularization.max_included_codes}}\n", + "path:\n", + " input_tabularized_cache_dir: ${input_tabularized_cache_dir}\n", + " input_label_cache_dir: ${input_label_cache_dir}\n", + " model_file_stem: xgboost\n", + " model_file_extension: .json\n", + " cache_dir: ${cache_dir}\n", + " sweep_results_dir: ${time_output_model_dir}/sweep_results/\n", + " best_trial_dir: ${time_output_model_dir}/best_trial/\n", + " performance_log_stem: performance\n", + " config_log_stem: config\n", + "data_processing_params:\n", + " imputer:\n", + " imputer_target: null\n", + " normalization:\n", + " normalizer: null\n", + "data_loading_params:\n", + " keep_data_in_memory: true\n", + " binarize_task: true\n", + "model_launcher:\n", + " path: ${path}\n", + " data_processing_params: ${data_processing_params}\n", + " data_loading_params: ${data_loading_params}\n", + " tabularization: ${tabularization}\n", + " _target_: MEDS_tabular_automl.xgboost_model.XGBoostModel.initialize\n", + " model:\n", + " booster: gbtree\n", + " device: cpu\n", + " nthread: 1\n", + " tree_method: hist\n", + " objective: binary:logistic\n", + " eta: 0.02696421201974344\n", + " lambda: 0.747916008024172\n", + " alpha: 0.0027585603023633004\n", + " subsample: 0.8513376072017109\n", + " min_child_weight: 28.223985720385308\n", + " max_depth: 12\n", + " training_params:\n", + " num_boost_round: 214\n", " early_stopping_rounds: 3\n", "task_name: mortality/in_icu/first_24h\n", "input_tabularized_cache_dir: ${output_dir}/${task_name}/task_cache\n", "input_label_cache_dir: ${output_dir}/${task_name}/labels\n", - "output_model_dir: /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/\n", + "output_model_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/\n", "time_output_model_dir: ${output_model_dir}/${now:%Y-%m-%d_%H-%M-%S}\n", "delete_below_top_k: -1\n", "name: launch_model\n", "\n", "Paths: (checkbox indicates if it exists)\n", - " - input_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", - " - input_label_cache_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels\n", - " - output_dir: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized\n", - " - tabularization.filtered_code_metadata_fp: ✅ /Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\u001b[0m\n", - "Error executing job with overrides: ['+model_launcher.model.eta=0.3778393076431474', '+model_launcher.model.lambda=0.02025881019876421', '+model_launcher.model.alpha=0.0016680628084537149', '+model_launcher.model.subsample=0.6402928897499296', '+model_launcher.model.min_child_weight=72.18555499106307', '+model_launcher.model.max_depth=13', 'model_launcher.training_params.num_boost_round=258', 'model_launcher.training_params.early_stopping_rounds=5', 'input_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data', 'output_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/', 'output_model_dir=/Users/sim/Documents/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/', 'task_name=mortality/in_icu/first_24h', 'do_overwrite=False', 'tabularization.window_sizes=[1d,30d,365d]', 'tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max]', 'tabularization.min_code_inclusion_count=10']\n", - "Traceback (most recent call last):\n", - " File \"/Users/sim/miniconda3/envs/dev/bin/meds-tab-model\", line 8, in \n", - " sys.exit(main())\n", - " ^^^^^^\n", - " File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/hydra/main.py\", line 94, in decorated_main\n", - " _run_hydra(\n", - " File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/hydra/_internal/utils.py\", line 394, in _run_hydra\n", - " _run_app(\n", - " File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/hydra/_internal/utils.py\", line 465, in _run_app\n", - " run_and_report(\n", - " File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/hydra/_internal/utils.py\", line 223, in run_and_report\n", - " raise ex\n", - " File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/hydra/_internal/utils.py\", line 220, in run_and_report\n", - " return func()\n", - " ^^^^^^\n", - " File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/hydra/_internal/utils.py\", line 466, in \n", - " lambda: hydra.multirun(\n", - " ^^^^^^^^^^^^^^^\n", - " File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/hydra/_internal/hydra.py\", line 162, in multirun\n", - " ret = sweeper.sweep(arguments=task_overrides)\n", - " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - " File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/hydra_plugins/hydra_optuna_sweeper/optuna_sweeper.py\", line 52, in sweep\n", - " return self.sweeper.sweep(arguments)\n", - " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - " File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/hydra_plugins/hydra_optuna_sweeper/_impl.py\", line 391, in sweep\n", - " raise e\n", - " File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/hydra_plugins/hydra_optuna_sweeper/_impl.py\", line 357, in sweep\n", - " values = [float(ret.return_value)]\n", - " ^^^^^^^^^^^^^^^^\n", - " File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/hydra/core/utils.py\", line 260, in return_value\n", - " raise self._return_value\n", - "hydra.errors.InstantiationException: Error locating target 'MEDS_tabular_automl.xgboost_model.XGBoostModel.initialize', set env var HYDRA_FULL_ERROR=1 to see chained exception.\n", - "full_key: model_launcher\n" + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", + " - input_label_cache_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels\n", + " - output_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized\n", + " - tabularization.filtered_code_metadata_fp: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:49.338\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m467\u001b[0m - \u001b[34m\u001b[1mStage config:\n", + "input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "do_overwrite: false\n", + "seed: 1\n", + "tqdm: false\n", + "worker: 0\n", + "loguru_init: false\n", + "log_dir: ${output_dir}/.logs/\n", + "cache_dir: ${output_dir}/.cache\n", + "tabularization:\n", + " filtered_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + " allowed_codes: null\n", + " min_code_inclusion_count: 10\n", + " min_code_inclusion_frequency: null\n", + " min_correlation: null\n", + " max_by_correlation: null\n", + " max_included_codes: null\n", + " window_sizes:\n", + " - 1d\n", + " - 30d\n", + " - 365d\n", + " aggs:\n", + " - static/present\n", + " - code/count\n", + " - value/count\n", + " - value/sum\n", + " - value/sum_sqd\n", + " - value/min\n", + " - value/max\n", + " _resolved_codes: ${filter_to_codes:${tabularization.filtered_code_metadata_fp},${tabularization.allowed_codes},${tabularization.min_code_inclusion_count},${tabularization.min_code_inclusion_frequency},${tabularization.max_included_codes}}\n", + "path:\n", + " input_tabularized_cache_dir: ${input_tabularized_cache_dir}\n", + " input_label_cache_dir: ${input_label_cache_dir}\n", + " model_file_stem: xgboost\n", + " model_file_extension: .json\n", + " cache_dir: ${cache_dir}\n", + " sweep_results_dir: ${time_output_model_dir}/sweep_results/\n", + " best_trial_dir: ${time_output_model_dir}/best_trial/\n", + " performance_log_stem: performance\n", + " config_log_stem: config\n", + "data_processing_params:\n", + " imputer:\n", + " imputer_target: null\n", + " normalization:\n", + " normalizer: null\n", + "data_loading_params:\n", + " keep_data_in_memory: true\n", + " binarize_task: true\n", + "model_launcher:\n", + " path: ${path}\n", + " data_processing_params: ${data_processing_params}\n", + " data_loading_params: ${data_loading_params}\n", + " tabularization: ${tabularization}\n", + " _target_: MEDS_tabular_automl.xgboost_model.XGBoostModel.initialize\n", + " model:\n", + " booster: gbtree\n", + " device: cpu\n", + " nthread: 1\n", + " tree_method: hist\n", + " objective: binary:logistic\n", + " eta: 0.01142217933288686\n", + " lambda: 0.003637888220850399\n", + " alpha: 0.4118676342582917\n", + " subsample: 0.8132771225539538\n", + " min_child_weight: 42.3261225166682\n", + " max_depth: 13\n", + " training_params:\n", + " num_boost_round: 564\n", + " early_stopping_rounds: 1\n", + "task_name: mortality/in_icu/first_24h\n", + "input_tabularized_cache_dir: ${output_dir}/${task_name}/task_cache\n", + "input_label_cache_dir: ${output_dir}/${task_name}/labels\n", + "output_model_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/\n", + "time_output_model_dir: ${output_model_dir}/${now:%Y-%m-%d_%H-%M-%S}\n", + "delete_below_top_k: -1\n", + "name: launch_model\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", + " - input_label_cache_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels\n", + " - output_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized\n", + " - tabularization.filtered_code_metadata_fp: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\u001b[0m\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_tabular_automl/describe_codes.py:63: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", + " if not df.columns == [\"code\", \"count\"]:\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_tabular_automl/describe_codes.py:63: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", + " if not df.columns == [\"code\", \"count\"]:\n", + "\u001b[32m2024-12-14 23:39:51.354\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.launch_model\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m70\u001b[0m - \u001b[34m\u001b[1mModel config and performance logged to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/output/meds_tab/mortality/in_icu/first_24h/2024-12-14_23-39-43/sweep_results/5319960564723494202/config.log and /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/output/meds_tab/mortality/in_icu/first_24h/2024-12-14_23-39-43/sweep_results/5319960564723494202/performance.log\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:51.356\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.launch_model\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m70\u001b[0m - \u001b[34m\u001b[1mModel config and performance logged to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/output/meds_tab/mortality/in_icu/first_24h/2024-12-14_23-39-43/sweep_results/2674362470617915327/config.log and /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/output/meds_tab/mortality/in_icu/first_24h/2024-12-14_23-39-43/sweep_results/2674362470617915327/performance.log\u001b[0m\n", + "[2024-12-14 23:39:51,521][HYDRA] Joblib.Parallel(n_jobs=-1,backend=loky,prefer=processes,require=None,verbose=0,timeout=None,pre_dispatch=2*n_jobs,batch_size=auto,temp_folder=None,max_nbytes=None,mmap_mode=r) is launching 2 jobs\n", + "[2024-12-14 23:39:51,521][HYDRA] Launching jobs, sweep output dir : /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/output/meds_tab/mortality/in_icu/first_24h/2024-12-14_23-39-43/hydra\n", + "[2024-12-14 23:39:51,521][HYDRA] \t#0 : +model_launcher.model.eta=0.008033814121534137 +model_launcher.model.lambda=0.0029391213721733406 +model_launcher.model.alpha=0.031965211281732187 +model_launcher.model.subsample=0.7011144902687296 +model_launcher.model.min_child_weight=79.85782023563286 +model_launcher.model.max_depth=9 model_launcher.training_params.num_boost_round=539 model_launcher.training_params.early_stopping_rounds=1 input_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data output_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/ output_model_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/ task_name=mortality/in_icu/first_24h do_overwrite=False tabularization.window_sizes=[1d,30d,365d] tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max] tabularization.min_code_inclusion_count=10\n", + "[2024-12-14 23:39:51,521][HYDRA] \t#1 : +model_launcher.model.eta=0.4103984719456223 +model_launcher.model.lambda=0.004166917970248561 +model_launcher.model.alpha=0.001902636489351413 +model_launcher.model.subsample=0.9769901151226084 +model_launcher.model.min_child_weight=78.45619026345103 +model_launcher.model.max_depth=12 model_launcher.training_params.num_boost_round=311 model_launcher.training_params.early_stopping_rounds=1 input_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data output_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/ output_model_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/ task_name=mortality/in_icu/first_24h do_overwrite=False tabularization.window_sizes=[1d,30d,365d] tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max] tabularization.min_code_inclusion_count=10\n", + "\u001b[32m2024-12-14 23:39:52.059\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mcurrent_script_name\u001b[0m:\u001b[36m433\u001b[0m - \u001b[33m\u001b[1mCan't find main function in __main__ module. Using sys.argv[0] as a fallback.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:52.062\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m446\u001b[0m - \u001b[1mRunning meds-tab-model with the following configuration:\n", + "input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "do_overwrite: false\n", + "seed: 1\n", + "tqdm: false\n", + "worker: 0\n", + "loguru_init: false\n", + "log_dir: ${output_dir}/.logs/\n", + "cache_dir: ${output_dir}/.cache\n", + "tabularization:\n", + " filtered_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + " allowed_codes: null\n", + " min_code_inclusion_count: 10\n", + " min_code_inclusion_frequency: null\n", + " min_correlation: null\n", + " max_by_correlation: null\n", + " max_included_codes: null\n", + " window_sizes:\n", + " - 1d\n", + " - 30d\n", + " - 365d\n", + " aggs:\n", + " - static/present\n", + " - code/count\n", + " - value/count\n", + " - value/sum\n", + " - value/sum_sqd\n", + " - value/min\n", + " - value/max\n", + " _resolved_codes: ${filter_to_codes:${tabularization.filtered_code_metadata_fp},${tabularization.allowed_codes},${tabularization.min_code_inclusion_count},${tabularization.min_code_inclusion_frequency},${tabularization.max_included_codes}}\n", + "path:\n", + " input_tabularized_cache_dir: ${input_tabularized_cache_dir}\n", + " input_label_cache_dir: ${input_label_cache_dir}\n", + " model_file_stem: xgboost\n", + " model_file_extension: .json\n", + " cache_dir: ${cache_dir}\n", + " sweep_results_dir: ${time_output_model_dir}/sweep_results/\n", + " best_trial_dir: ${time_output_model_dir}/best_trial/\n", + " performance_log_stem: performance\n", + " config_log_stem: config\n", + "data_processing_params:\n", + " imputer:\n", + " imputer_target: null\n", + " normalization:\n", + " normalizer: null\n", + "data_loading_params:\n", + " keep_data_in_memory: true\n", + " binarize_task: true\n", + "model_launcher:\n", + " path: ${path}\n", + " data_processing_params: ${data_processing_params}\n", + " data_loading_params: ${data_loading_params}\n", + " tabularization: ${tabularization}\n", + " _target_: MEDS_tabular_automl.xgboost_model.XGBoostModel.initialize\n", + " model:\n", + " booster: gbtree\n", + " device: cpu\n", + " nthread: 1\n", + " tree_method: hist\n", + " objective: binary:logistic\n", + " eta: 0.008033814121534137\n", + " lambda: 0.0029391213721733406\n", + " alpha: 0.031965211281732187\n", + " subsample: 0.7011144902687296\n", + " min_child_weight: 79.85782023563286\n", + " max_depth: 9\n", + " training_params:\n", + " num_boost_round: 539\n", + " early_stopping_rounds: 1\n", + "task_name: mortality/in_icu/first_24h\n", + "input_tabularized_cache_dir: ${output_dir}/${task_name}/task_cache\n", + "input_label_cache_dir: ${output_dir}/${task_name}/labels\n", + "output_model_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/\n", + "time_output_model_dir: ${output_model_dir}/${now:%Y-%m-%d_%H-%M-%S}\n", + "delete_below_top_k: -1\n", + "name: launch_model\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:52.063\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mcurrent_script_name\u001b[0m:\u001b[36m433\u001b[0m - \u001b[33m\u001b[1mCan't find main function in __main__ module. Using sys.argv[0] as a fallback.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:52.067\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m467\u001b[0m - \u001b[34m\u001b[1mStage config:\n", + "input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "do_overwrite: false\n", + "seed: 1\n", + "tqdm: false\n", + "worker: 0\n", + "loguru_init: false\n", + "log_dir: ${output_dir}/.logs/\n", + "cache_dir: ${output_dir}/.cache\n", + "tabularization:\n", + " filtered_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + " allowed_codes: null\n", + " min_code_inclusion_count: 10\n", + " min_code_inclusion_frequency: null\n", + " min_correlation: null\n", + " max_by_correlation: null\n", + " max_included_codes: null\n", + " window_sizes:\n", + " - 1d\n", + " - 30d\n", + " - 365d\n", + " aggs:\n", + " - static/present\n", + " - code/count\n", + " - value/count\n", + " - value/sum\n", + " - value/sum_sqd\n", + " - value/min\n", + " - value/max\n", + " _resolved_codes: ${filter_to_codes:${tabularization.filtered_code_metadata_fp},${tabularization.allowed_codes},${tabularization.min_code_inclusion_count},${tabularization.min_code_inclusion_frequency},${tabularization.max_included_codes}}\n", + "path:\n", + " input_tabularized_cache_dir: ${input_tabularized_cache_dir}\n", + " input_label_cache_dir: ${input_label_cache_dir}\n", + " model_file_stem: xgboost\n", + " model_file_extension: .json\n", + " cache_dir: ${cache_dir}\n", + " sweep_results_dir: ${time_output_model_dir}/sweep_results/\n", + " best_trial_dir: ${time_output_model_dir}/best_trial/\n", + " performance_log_stem: performance\n", + " config_log_stem: config\n", + "data_processing_params:\n", + " imputer:\n", + " imputer_target: null\n", + " normalization:\n", + " normalizer: null\n", + "data_loading_params:\n", + " keep_data_in_memory: true\n", + " binarize_task: true\n", + "model_launcher:\n", + " path: ${path}\n", + " data_processing_params: ${data_processing_params}\n", + " data_loading_params: ${data_loading_params}\n", + " tabularization: ${tabularization}\n", + " _target_: MEDS_tabular_automl.xgboost_model.XGBoostModel.initialize\n", + " model:\n", + " booster: gbtree\n", + " device: cpu\n", + " nthread: 1\n", + " tree_method: hist\n", + " objective: binary:logistic\n", + " eta: 0.008033814121534137\n", + " lambda: 0.0029391213721733406\n", + " alpha: 0.031965211281732187\n", + " subsample: 0.7011144902687296\n", + " min_child_weight: 79.85782023563286\n", + " max_depth: 9\n", + " training_params:\n", + " num_boost_round: 539\n", + " early_stopping_rounds: 1\n", + "task_name: mortality/in_icu/first_24h\n", + "input_tabularized_cache_dir: ${output_dir}/${task_name}/task_cache\n", + "input_label_cache_dir: ${output_dir}/${task_name}/labels\n", + "output_model_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/\n", + "time_output_model_dir: ${output_model_dir}/${now:%Y-%m-%d_%H-%M-%S}\n", + "delete_below_top_k: -1\n", + "name: launch_model\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", + " - input_label_cache_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels\n", + " - output_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized\n", + " - tabularization.filtered_code_metadata_fp: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:52.067\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m446\u001b[0m - \u001b[1mRunning meds-tab-model with the following configuration:\n", + "input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "do_overwrite: false\n", + "seed: 1\n", + "tqdm: false\n", + "worker: 0\n", + "loguru_init: false\n", + "log_dir: ${output_dir}/.logs/\n", + "cache_dir: ${output_dir}/.cache\n", + "tabularization:\n", + " filtered_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + " allowed_codes: null\n", + " min_code_inclusion_count: 10\n", + " min_code_inclusion_frequency: null\n", + " min_correlation: null\n", + " max_by_correlation: null\n", + " max_included_codes: null\n", + " window_sizes:\n", + " - 1d\n", + " - 30d\n", + " - 365d\n", + " aggs:\n", + " - static/present\n", + " - code/count\n", + " - value/count\n", + " - value/sum\n", + " - value/sum_sqd\n", + " - value/min\n", + " - value/max\n", + " _resolved_codes: ${filter_to_codes:${tabularization.filtered_code_metadata_fp},${tabularization.allowed_codes},${tabularization.min_code_inclusion_count},${tabularization.min_code_inclusion_frequency},${tabularization.max_included_codes}}\n", + "path:\n", + " input_tabularized_cache_dir: ${input_tabularized_cache_dir}\n", + " input_label_cache_dir: ${input_label_cache_dir}\n", + " model_file_stem: xgboost\n", + " model_file_extension: .json\n", + " cache_dir: ${cache_dir}\n", + " sweep_results_dir: ${time_output_model_dir}/sweep_results/\n", + " best_trial_dir: ${time_output_model_dir}/best_trial/\n", + " performance_log_stem: performance\n", + " config_log_stem: config\n", + "data_processing_params:\n", + " imputer:\n", + " imputer_target: null\n", + " normalization:\n", + " normalizer: null\n", + "data_loading_params:\n", + " keep_data_in_memory: true\n", + " binarize_task: true\n", + "model_launcher:\n", + " path: ${path}\n", + " data_processing_params: ${data_processing_params}\n", + " data_loading_params: ${data_loading_params}\n", + " tabularization: ${tabularization}\n", + " _target_: MEDS_tabular_automl.xgboost_model.XGBoostModel.initialize\n", + " model:\n", + " booster: gbtree\n", + " device: cpu\n", + " nthread: 1\n", + " tree_method: hist\n", + " objective: binary:logistic\n", + " eta: 0.4103984719456223\n", + " lambda: 0.004166917970248561\n", + " alpha: 0.001902636489351413\n", + " subsample: 0.9769901151226084\n", + " min_child_weight: 78.45619026345103\n", + " max_depth: 12\n", + " training_params:\n", + " num_boost_round: 311\n", + " early_stopping_rounds: 1\n", + "task_name: mortality/in_icu/first_24h\n", + "input_tabularized_cache_dir: ${output_dir}/${task_name}/task_cache\n", + "input_label_cache_dir: ${output_dir}/${task_name}/labels\n", + "output_model_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/\n", + "time_output_model_dir: ${output_model_dir}/${now:%Y-%m-%d_%H-%M-%S}\n", + "delete_below_top_k: -1\n", + "name: launch_model\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:52.071\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m467\u001b[0m - \u001b[34m\u001b[1mStage config:\n", + "input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "do_overwrite: false\n", + "seed: 1\n", + "tqdm: false\n", + "worker: 0\n", + "loguru_init: false\n", + "log_dir: ${output_dir}/.logs/\n", + "cache_dir: ${output_dir}/.cache\n", + "tabularization:\n", + " filtered_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + " allowed_codes: null\n", + " min_code_inclusion_count: 10\n", + " min_code_inclusion_frequency: null\n", + " min_correlation: null\n", + " max_by_correlation: null\n", + " max_included_codes: null\n", + " window_sizes:\n", + " - 1d\n", + " - 30d\n", + " - 365d\n", + " aggs:\n", + " - static/present\n", + " - code/count\n", + " - value/count\n", + " - value/sum\n", + " - value/sum_sqd\n", + " - value/min\n", + " - value/max\n", + " _resolved_codes: ${filter_to_codes:${tabularization.filtered_code_metadata_fp},${tabularization.allowed_codes},${tabularization.min_code_inclusion_count},${tabularization.min_code_inclusion_frequency},${tabularization.max_included_codes}}\n", + "path:\n", + " input_tabularized_cache_dir: ${input_tabularized_cache_dir}\n", + " input_label_cache_dir: ${input_label_cache_dir}\n", + " model_file_stem: xgboost\n", + " model_file_extension: .json\n", + " cache_dir: ${cache_dir}\n", + " sweep_results_dir: ${time_output_model_dir}/sweep_results/\n", + " best_trial_dir: ${time_output_model_dir}/best_trial/\n", + " performance_log_stem: performance\n", + " config_log_stem: config\n", + "data_processing_params:\n", + " imputer:\n", + " imputer_target: null\n", + " normalization:\n", + " normalizer: null\n", + "data_loading_params:\n", + " keep_data_in_memory: true\n", + " binarize_task: true\n", + "model_launcher:\n", + " path: ${path}\n", + " data_processing_params: ${data_processing_params}\n", + " data_loading_params: ${data_loading_params}\n", + " tabularization: ${tabularization}\n", + " _target_: MEDS_tabular_automl.xgboost_model.XGBoostModel.initialize\n", + " model:\n", + " booster: gbtree\n", + " device: cpu\n", + " nthread: 1\n", + " tree_method: hist\n", + " objective: binary:logistic\n", + " eta: 0.4103984719456223\n", + " lambda: 0.004166917970248561\n", + " alpha: 0.001902636489351413\n", + " subsample: 0.9769901151226084\n", + " min_child_weight: 78.45619026345103\n", + " max_depth: 12\n", + " training_params:\n", + " num_boost_round: 311\n", + " early_stopping_rounds: 1\n", + "task_name: mortality/in_icu/first_24h\n", + "input_tabularized_cache_dir: ${output_dir}/${task_name}/task_cache\n", + "input_label_cache_dir: ${output_dir}/${task_name}/labels\n", + "output_model_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/\n", + "time_output_model_dir: ${output_model_dir}/${now:%Y-%m-%d_%H-%M-%S}\n", + "delete_below_top_k: -1\n", + "name: launch_model\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", + " - input_label_cache_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels\n", + " - output_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized\n", + " - tabularization.filtered_code_metadata_fp: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\u001b[0m\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_tabular_automl/describe_codes.py:63: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", + " if not df.columns == [\"code\", \"count\"]:\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_tabular_automl/describe_codes.py:63: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", + " if not df.columns == [\"code\", \"count\"]:\n", + "\u001b[32m2024-12-14 23:39:53.983\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.launch_model\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m70\u001b[0m - \u001b[34m\u001b[1mModel config and performance logged to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/output/meds_tab/mortality/in_icu/first_24h/2024-12-14_23-39-43/sweep_results/1385397265463664318/config.log and /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/output/meds_tab/mortality/in_icu/first_24h/2024-12-14_23-39-43/sweep_results/1385397265463664318/performance.log\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:53.990\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.launch_model\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m70\u001b[0m - \u001b[34m\u001b[1mModel config and performance logged to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/output/meds_tab/mortality/in_icu/first_24h/2024-12-14_23-39-43/sweep_results/3660540200391396743/config.log and /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/output/meds_tab/mortality/in_icu/first_24h/2024-12-14_23-39-43/sweep_results/3660540200391396743/performance.log\u001b[0m\n", + "[2024-12-14 23:39:54,062][HYDRA] Joblib.Parallel(n_jobs=-1,backend=loky,prefer=processes,require=None,verbose=0,timeout=None,pre_dispatch=2*n_jobs,batch_size=auto,temp_folder=None,max_nbytes=None,mmap_mode=r) is launching 2 jobs\n", + "[2024-12-14 23:39:54,062][HYDRA] Launching jobs, sweep output dir : /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/output/meds_tab/mortality/in_icu/first_24h/2024-12-14_23-39-43/hydra\n", + "[2024-12-14 23:39:54,062][HYDRA] \t#0 : +model_launcher.model.eta=0.008499690637911965 +model_launcher.model.lambda=0.4122092470309962 +model_launcher.model.alpha=0.05837472607350609 +model_launcher.model.subsample=0.540282586557892 +model_launcher.model.min_child_weight=65.46896393357495 +model_launcher.model.max_depth=12 model_launcher.training_params.num_boost_round=708 model_launcher.training_params.early_stopping_rounds=7 input_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data output_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/ output_model_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/ task_name=mortality/in_icu/first_24h do_overwrite=False tabularization.window_sizes=[1d,30d,365d] tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max] tabularization.min_code_inclusion_count=10\n", + "[2024-12-14 23:39:54,062][HYDRA] \t#1 : +model_launcher.model.eta=0.0018901897752212065 +model_launcher.model.lambda=0.15915705152525939 +model_launcher.model.alpha=0.24416448139530966 +model_launcher.model.subsample=0.7731602385301684 +model_launcher.model.min_child_weight=1.4870822825511332 +model_launcher.model.max_depth=9 model_launcher.training_params.num_boost_round=581 model_launcher.training_params.early_stopping_rounds=5 input_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data output_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/ output_model_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/ task_name=mortality/in_icu/first_24h do_overwrite=False tabularization.window_sizes=[1d,30d,365d] tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max] tabularization.min_code_inclusion_count=10\n", + "\u001b[32m2024-12-14 23:39:54.710\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mcurrent_script_name\u001b[0m:\u001b[36m433\u001b[0m - \u001b[33m\u001b[1mCan't find main function in __main__ module. Using sys.argv[0] as a fallback.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:54.713\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m446\u001b[0m - \u001b[1mRunning meds-tab-model with the following configuration:\n", + "input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "do_overwrite: false\n", + "seed: 1\n", + "tqdm: false\n", + "worker: 0\n", + "loguru_init: false\n", + "log_dir: ${output_dir}/.logs/\n", + "cache_dir: ${output_dir}/.cache\n", + "tabularization:\n", + " filtered_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + " allowed_codes: null\n", + " min_code_inclusion_count: 10\n", + " min_code_inclusion_frequency: null\n", + " min_correlation: null\n", + " max_by_correlation: null\n", + " max_included_codes: null\n", + " window_sizes:\n", + " - 1d\n", + " - 30d\n", + " - 365d\n", + " aggs:\n", + " - static/present\n", + " - code/count\n", + " - value/count\n", + " - value/sum\n", + " - value/sum_sqd\n", + " - value/min\n", + " - value/max\n", + " _resolved_codes: ${filter_to_codes:${tabularization.filtered_code_metadata_fp},${tabularization.allowed_codes},${tabularization.min_code_inclusion_count},${tabularization.min_code_inclusion_frequency},${tabularization.max_included_codes}}\n", + "path:\n", + " input_tabularized_cache_dir: ${input_tabularized_cache_dir}\n", + " input_label_cache_dir: ${input_label_cache_dir}\n", + " model_file_stem: xgboost\n", + " model_file_extension: .json\n", + " cache_dir: ${cache_dir}\n", + " sweep_results_dir: ${time_output_model_dir}/sweep_results/\n", + " best_trial_dir: ${time_output_model_dir}/best_trial/\n", + " performance_log_stem: performance\n", + " config_log_stem: config\n", + "data_processing_params:\n", + " imputer:\n", + " imputer_target: null\n", + " normalization:\n", + " normalizer: null\n", + "data_loading_params:\n", + " keep_data_in_memory: true\n", + " binarize_task: true\n", + "model_launcher:\n", + " path: ${path}\n", + " data_processing_params: ${data_processing_params}\n", + " data_loading_params: ${data_loading_params}\n", + " tabularization: ${tabularization}\n", + " _target_: MEDS_tabular_automl.xgboost_model.XGBoostModel.initialize\n", + " model:\n", + " booster: gbtree\n", + " device: cpu\n", + " nthread: 1\n", + " tree_method: hist\n", + " objective: binary:logistic\n", + " eta: 0.008499690637911965\n", + " lambda: 0.4122092470309962\n", + " alpha: 0.05837472607350609\n", + " subsample: 0.540282586557892\n", + " min_child_weight: 65.46896393357495\n", + " max_depth: 12\n", + " training_params:\n", + " num_boost_round: 708\n", + " early_stopping_rounds: 7\n", + "task_name: mortality/in_icu/first_24h\n", + "input_tabularized_cache_dir: ${output_dir}/${task_name}/task_cache\n", + "input_label_cache_dir: ${output_dir}/${task_name}/labels\n", + "output_model_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/\n", + "time_output_model_dir: ${output_model_dir}/${now:%Y-%m-%d_%H-%M-%S}\n", + "delete_below_top_k: -1\n", + "name: launch_model\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:54.718\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m467\u001b[0m - \u001b[34m\u001b[1mStage config:\n", + "input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "do_overwrite: false\n", + "seed: 1\n", + "tqdm: false\n", + "worker: 0\n", + "loguru_init: false\n", + "log_dir: ${output_dir}/.logs/\n", + "cache_dir: ${output_dir}/.cache\n", + "tabularization:\n", + " filtered_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + " allowed_codes: null\n", + " min_code_inclusion_count: 10\n", + " min_code_inclusion_frequency: null\n", + " min_correlation: null\n", + " max_by_correlation: null\n", + " max_included_codes: null\n", + " window_sizes:\n", + " - 1d\n", + " - 30d\n", + " - 365d\n", + " aggs:\n", + " - static/present\n", + " - code/count\n", + " - value/count\n", + " - value/sum\n", + " - value/sum_sqd\n", + " - value/min\n", + " - value/max\n", + " _resolved_codes: ${filter_to_codes:${tabularization.filtered_code_metadata_fp},${tabularization.allowed_codes},${tabularization.min_code_inclusion_count},${tabularization.min_code_inclusion_frequency},${tabularization.max_included_codes}}\n", + "path:\n", + " input_tabularized_cache_dir: ${input_tabularized_cache_dir}\n", + " input_label_cache_dir: ${input_label_cache_dir}\n", + " model_file_stem: xgboost\n", + " model_file_extension: .json\n", + " cache_dir: ${cache_dir}\n", + " sweep_results_dir: ${time_output_model_dir}/sweep_results/\n", + " best_trial_dir: ${time_output_model_dir}/best_trial/\n", + " performance_log_stem: performance\n", + " config_log_stem: config\n", + "data_processing_params:\n", + " imputer:\n", + " imputer_target: null\n", + " normalization:\n", + " normalizer: null\n", + "data_loading_params:\n", + " keep_data_in_memory: true\n", + " binarize_task: true\n", + "model_launcher:\n", + " path: ${path}\n", + " data_processing_params: ${data_processing_params}\n", + " data_loading_params: ${data_loading_params}\n", + " tabularization: ${tabularization}\n", + " _target_: MEDS_tabular_automl.xgboost_model.XGBoostModel.initialize\n", + " model:\n", + " booster: gbtree\n", + " device: cpu\n", + " nthread: 1\n", + " tree_method: hist\n", + " objective: binary:logistic\n", + " eta: 0.008499690637911965\n", + " lambda: 0.4122092470309962\n", + " alpha: 0.05837472607350609\n", + " subsample: 0.540282586557892\n", + " min_child_weight: 65.46896393357495\n", + " max_depth: 12\n", + " training_params:\n", + " num_boost_round: 708\n", + " early_stopping_rounds: 7\n", + "task_name: mortality/in_icu/first_24h\n", + "input_tabularized_cache_dir: ${output_dir}/${task_name}/task_cache\n", + "input_label_cache_dir: ${output_dir}/${task_name}/labels\n", + "output_model_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/\n", + "time_output_model_dir: ${output_model_dir}/${now:%Y-%m-%d_%H-%M-%S}\n", + "delete_below_top_k: -1\n", + "name: launch_model\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", + " - input_label_cache_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels\n", + " - output_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized\n", + " - tabularization.filtered_code_metadata_fp: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:54.791\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mcurrent_script_name\u001b[0m:\u001b[36m433\u001b[0m - \u001b[33m\u001b[1mCan't find main function in __main__ module. Using sys.argv[0] as a fallback.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:54.795\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m446\u001b[0m - \u001b[1mRunning meds-tab-model with the following configuration:\n", + "input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "do_overwrite: false\n", + "seed: 1\n", + "tqdm: false\n", + "worker: 0\n", + "loguru_init: false\n", + "log_dir: ${output_dir}/.logs/\n", + "cache_dir: ${output_dir}/.cache\n", + "tabularization:\n", + " filtered_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + " allowed_codes: null\n", + " min_code_inclusion_count: 10\n", + " min_code_inclusion_frequency: null\n", + " min_correlation: null\n", + " max_by_correlation: null\n", + " max_included_codes: null\n", + " window_sizes:\n", + " - 1d\n", + " - 30d\n", + " - 365d\n", + " aggs:\n", + " - static/present\n", + " - code/count\n", + " - value/count\n", + " - value/sum\n", + " - value/sum_sqd\n", + " - value/min\n", + " - value/max\n", + " _resolved_codes: ${filter_to_codes:${tabularization.filtered_code_metadata_fp},${tabularization.allowed_codes},${tabularization.min_code_inclusion_count},${tabularization.min_code_inclusion_frequency},${tabularization.max_included_codes}}\n", + "path:\n", + " input_tabularized_cache_dir: ${input_tabularized_cache_dir}\n", + " input_label_cache_dir: ${input_label_cache_dir}\n", + " model_file_stem: xgboost\n", + " model_file_extension: .json\n", + " cache_dir: ${cache_dir}\n", + " sweep_results_dir: ${time_output_model_dir}/sweep_results/\n", + " best_trial_dir: ${time_output_model_dir}/best_trial/\n", + " performance_log_stem: performance\n", + " config_log_stem: config\n", + "data_processing_params:\n", + " imputer:\n", + " imputer_target: null\n", + " normalization:\n", + " normalizer: null\n", + "data_loading_params:\n", + " keep_data_in_memory: true\n", + " binarize_task: true\n", + "model_launcher:\n", + " path: ${path}\n", + " data_processing_params: ${data_processing_params}\n", + " data_loading_params: ${data_loading_params}\n", + " tabularization: ${tabularization}\n", + " _target_: MEDS_tabular_automl.xgboost_model.XGBoostModel.initialize\n", + " model:\n", + " booster: gbtree\n", + " device: cpu\n", + " nthread: 1\n", + " tree_method: hist\n", + " objective: binary:logistic\n", + " eta: 0.0018901897752212065\n", + " lambda: 0.15915705152525939\n", + " alpha: 0.24416448139530966\n", + " subsample: 0.7731602385301684\n", + " min_child_weight: 1.4870822825511332\n", + " max_depth: 9\n", + " training_params:\n", + " num_boost_round: 581\n", + " early_stopping_rounds: 5\n", + "task_name: mortality/in_icu/first_24h\n", + "input_tabularized_cache_dir: ${output_dir}/${task_name}/task_cache\n", + "input_label_cache_dir: ${output_dir}/${task_name}/labels\n", + "output_model_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/\n", + "time_output_model_dir: ${output_model_dir}/${now:%Y-%m-%d_%H-%M-%S}\n", + "delete_below_top_k: -1\n", + "name: launch_model\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:54.799\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m467\u001b[0m - \u001b[34m\u001b[1mStage config:\n", + "input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "do_overwrite: false\n", + "seed: 1\n", + "tqdm: false\n", + "worker: 0\n", + "loguru_init: false\n", + "log_dir: ${output_dir}/.logs/\n", + "cache_dir: ${output_dir}/.cache\n", + "tabularization:\n", + " filtered_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + " allowed_codes: null\n", + " min_code_inclusion_count: 10\n", + " min_code_inclusion_frequency: null\n", + " min_correlation: null\n", + " max_by_correlation: null\n", + " max_included_codes: null\n", + " window_sizes:\n", + " - 1d\n", + " - 30d\n", + " - 365d\n", + " aggs:\n", + " - static/present\n", + " - code/count\n", + " - value/count\n", + " - value/sum\n", + " - value/sum_sqd\n", + " - value/min\n", + " - value/max\n", + " _resolved_codes: ${filter_to_codes:${tabularization.filtered_code_metadata_fp},${tabularization.allowed_codes},${tabularization.min_code_inclusion_count},${tabularization.min_code_inclusion_frequency},${tabularization.max_included_codes}}\n", + "path:\n", + " input_tabularized_cache_dir: ${input_tabularized_cache_dir}\n", + " input_label_cache_dir: ${input_label_cache_dir}\n", + " model_file_stem: xgboost\n", + " model_file_extension: .json\n", + " cache_dir: ${cache_dir}\n", + " sweep_results_dir: ${time_output_model_dir}/sweep_results/\n", + " best_trial_dir: ${time_output_model_dir}/best_trial/\n", + " performance_log_stem: performance\n", + " config_log_stem: config\n", + "data_processing_params:\n", + " imputer:\n", + " imputer_target: null\n", + " normalization:\n", + " normalizer: null\n", + "data_loading_params:\n", + " keep_data_in_memory: true\n", + " binarize_task: true\n", + "model_launcher:\n", + " path: ${path}\n", + " data_processing_params: ${data_processing_params}\n", + " data_loading_params: ${data_loading_params}\n", + " tabularization: ${tabularization}\n", + " _target_: MEDS_tabular_automl.xgboost_model.XGBoostModel.initialize\n", + " model:\n", + " booster: gbtree\n", + " device: cpu\n", + " nthread: 1\n", + " tree_method: hist\n", + " objective: binary:logistic\n", + " eta: 0.0018901897752212065\n", + " lambda: 0.15915705152525939\n", + " alpha: 0.24416448139530966\n", + " subsample: 0.7731602385301684\n", + " min_child_weight: 1.4870822825511332\n", + " max_depth: 9\n", + " training_params:\n", + " num_boost_round: 581\n", + " early_stopping_rounds: 5\n", + "task_name: mortality/in_icu/first_24h\n", + "input_tabularized_cache_dir: ${output_dir}/${task_name}/task_cache\n", + "input_label_cache_dir: ${output_dir}/${task_name}/labels\n", + "output_model_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/\n", + "time_output_model_dir: ${output_model_dir}/${now:%Y-%m-%d_%H-%M-%S}\n", + "delete_below_top_k: -1\n", + "name: launch_model\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", + " - input_label_cache_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels\n", + " - output_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized\n", + " - tabularization.filtered_code_metadata_fp: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\u001b[0m\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_tabular_automl/describe_codes.py:63: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", + " if not df.columns == [\"code\", \"count\"]:\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_tabular_automl/describe_codes.py:63: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", + " if not df.columns == [\"code\", \"count\"]:\n", + "\u001b[32m2024-12-14 23:39:56.643\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.launch_model\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m70\u001b[0m - \u001b[34m\u001b[1mModel config and performance logged to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/output/meds_tab/mortality/in_icu/first_24h/2024-12-14_23-39-43/sweep_results/5205035581204184743/config.log and /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/output/meds_tab/mortality/in_icu/first_24h/2024-12-14_23-39-43/sweep_results/5205035581204184743/performance.log\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:57.637\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.launch_model\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m70\u001b[0m - \u001b[34m\u001b[1mModel config and performance logged to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/output/meds_tab/mortality/in_icu/first_24h/2024-12-14_23-39-43/sweep_results/2382245570059491332/config.log and /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/output/meds_tab/mortality/in_icu/first_24h/2024-12-14_23-39-43/sweep_results/2382245570059491332/performance.log\u001b[0m\n", + "[2024-12-14 23:39:57,698][HYDRA] Joblib.Parallel(n_jobs=-1,backend=loky,prefer=processes,require=None,verbose=0,timeout=None,pre_dispatch=2*n_jobs,batch_size=auto,temp_folder=None,max_nbytes=None,mmap_mode=r) is launching 2 jobs\n", + "[2024-12-14 23:39:57,698][HYDRA] Launching jobs, sweep output dir : /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/output/meds_tab/mortality/in_icu/first_24h/2024-12-14_23-39-43/hydra\n", + "[2024-12-14 23:39:57,699][HYDRA] \t#0 : +model_launcher.model.eta=0.005517080675975136 +model_launcher.model.lambda=0.7999484495260932 +model_launcher.model.alpha=0.09897937060934658 +model_launcher.model.subsample=0.8512332295533758 +model_launcher.model.min_child_weight=61.520845937024276 +model_launcher.model.max_depth=5 model_launcher.training_params.num_boost_round=902 model_launcher.training_params.early_stopping_rounds=1 input_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data output_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/ output_model_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/ task_name=mortality/in_icu/first_24h do_overwrite=False tabularization.window_sizes=[1d,30d,365d] tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max] tabularization.min_code_inclusion_count=10\n", + "[2024-12-14 23:39:57,699][HYDRA] \t#1 : +model_launcher.model.eta=0.8748681430152169 +model_launcher.model.lambda=0.8405205393709447 +model_launcher.model.alpha=0.05094278875531456 +model_launcher.model.subsample=0.7458147461040969 +model_launcher.model.min_child_weight=17.379185076142814 +model_launcher.model.max_depth=3 model_launcher.training_params.num_boost_round=951 model_launcher.training_params.early_stopping_rounds=8 input_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data output_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/ output_model_dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/ task_name=mortality/in_icu/first_24h do_overwrite=False tabularization.window_sizes=[1d,30d,365d] tabularization.aggs=[static/present,code/count,value/count,value/sum,value/sum_sqd,value/min,value/max] tabularization.min_code_inclusion_count=10\n", + "\u001b[32m2024-12-14 23:39:58.267\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mcurrent_script_name\u001b[0m:\u001b[36m433\u001b[0m - \u001b[33m\u001b[1mCan't find main function in __main__ module. Using sys.argv[0] as a fallback.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:58.271\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m446\u001b[0m - \u001b[1mRunning meds-tab-model with the following configuration:\n", + "input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "do_overwrite: false\n", + "seed: 1\n", + "tqdm: false\n", + "worker: 0\n", + "loguru_init: false\n", + "log_dir: ${output_dir}/.logs/\n", + "cache_dir: ${output_dir}/.cache\n", + "tabularization:\n", + " filtered_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + " allowed_codes: null\n", + " min_code_inclusion_count: 10\n", + " min_code_inclusion_frequency: null\n", + " min_correlation: null\n", + " max_by_correlation: null\n", + " max_included_codes: null\n", + " window_sizes:\n", + " - 1d\n", + " - 30d\n", + " - 365d\n", + " aggs:\n", + " - static/present\n", + " - code/count\n", + " - value/count\n", + " - value/sum\n", + " - value/sum_sqd\n", + " - value/min\n", + " - value/max\n", + " _resolved_codes: ${filter_to_codes:${tabularization.filtered_code_metadata_fp},${tabularization.allowed_codes},${tabularization.min_code_inclusion_count},${tabularization.min_code_inclusion_frequency},${tabularization.max_included_codes}}\n", + "path:\n", + " input_tabularized_cache_dir: ${input_tabularized_cache_dir}\n", + " input_label_cache_dir: ${input_label_cache_dir}\n", + " model_file_stem: xgboost\n", + " model_file_extension: .json\n", + " cache_dir: ${cache_dir}\n", + " sweep_results_dir: ${time_output_model_dir}/sweep_results/\n", + " best_trial_dir: ${time_output_model_dir}/best_trial/\n", + " performance_log_stem: performance\n", + " config_log_stem: config\n", + "data_processing_params:\n", + " imputer:\n", + " imputer_target: null\n", + " normalization:\n", + " normalizer: null\n", + "data_loading_params:\n", + " keep_data_in_memory: true\n", + " binarize_task: true\n", + "model_launcher:\n", + " path: ${path}\n", + " data_processing_params: ${data_processing_params}\n", + " data_loading_params: ${data_loading_params}\n", + " tabularization: ${tabularization}\n", + " _target_: MEDS_tabular_automl.xgboost_model.XGBoostModel.initialize\n", + " model:\n", + " booster: gbtree\n", + " device: cpu\n", + " nthread: 1\n", + " tree_method: hist\n", + " objective: binary:logistic\n", + " eta: 0.8748681430152169\n", + " lambda: 0.8405205393709447\n", + " alpha: 0.05094278875531456\n", + " subsample: 0.7458147461040969\n", + " min_child_weight: 17.379185076142814\n", + " max_depth: 3\n", + " training_params:\n", + " num_boost_round: 951\n", + " early_stopping_rounds: 8\n", + "task_name: mortality/in_icu/first_24h\n", + "input_tabularized_cache_dir: ${output_dir}/${task_name}/task_cache\n", + "input_label_cache_dir: ${output_dir}/${task_name}/labels\n", + "output_model_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/\n", + "time_output_model_dir: ${output_model_dir}/${now:%Y-%m-%d_%H-%M-%S}\n", + "delete_below_top_k: -1\n", + "name: launch_model\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:58.275\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m467\u001b[0m - \u001b[34m\u001b[1mStage config:\n", + "input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "do_overwrite: false\n", + "seed: 1\n", + "tqdm: false\n", + "worker: 0\n", + "loguru_init: false\n", + "log_dir: ${output_dir}/.logs/\n", + "cache_dir: ${output_dir}/.cache\n", + "tabularization:\n", + " filtered_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + " allowed_codes: null\n", + " min_code_inclusion_count: 10\n", + " min_code_inclusion_frequency: null\n", + " min_correlation: null\n", + " max_by_correlation: null\n", + " max_included_codes: null\n", + " window_sizes:\n", + " - 1d\n", + " - 30d\n", + " - 365d\n", + " aggs:\n", + " - static/present\n", + " - code/count\n", + " - value/count\n", + " - value/sum\n", + " - value/sum_sqd\n", + " - value/min\n", + " - value/max\n", + " _resolved_codes: ${filter_to_codes:${tabularization.filtered_code_metadata_fp},${tabularization.allowed_codes},${tabularization.min_code_inclusion_count},${tabularization.min_code_inclusion_frequency},${tabularization.max_included_codes}}\n", + "path:\n", + " input_tabularized_cache_dir: ${input_tabularized_cache_dir}\n", + " input_label_cache_dir: ${input_label_cache_dir}\n", + " model_file_stem: xgboost\n", + " model_file_extension: .json\n", + " cache_dir: ${cache_dir}\n", + " sweep_results_dir: ${time_output_model_dir}/sweep_results/\n", + " best_trial_dir: ${time_output_model_dir}/best_trial/\n", + " performance_log_stem: performance\n", + " config_log_stem: config\n", + "data_processing_params:\n", + " imputer:\n", + " imputer_target: null\n", + " normalization:\n", + " normalizer: null\n", + "data_loading_params:\n", + " keep_data_in_memory: true\n", + " binarize_task: true\n", + "model_launcher:\n", + " path: ${path}\n", + " data_processing_params: ${data_processing_params}\n", + " data_loading_params: ${data_loading_params}\n", + " tabularization: ${tabularization}\n", + " _target_: MEDS_tabular_automl.xgboost_model.XGBoostModel.initialize\n", + " model:\n", + " booster: gbtree\n", + " device: cpu\n", + " nthread: 1\n", + " tree_method: hist\n", + " objective: binary:logistic\n", + " eta: 0.8748681430152169\n", + " lambda: 0.8405205393709447\n", + " alpha: 0.05094278875531456\n", + " subsample: 0.7458147461040969\n", + " min_child_weight: 17.379185076142814\n", + " max_depth: 3\n", + " training_params:\n", + " num_boost_round: 951\n", + " early_stopping_rounds: 8\n", + "task_name: mortality/in_icu/first_24h\n", + "input_tabularized_cache_dir: ${output_dir}/${task_name}/task_cache\n", + "input_label_cache_dir: ${output_dir}/${task_name}/labels\n", + "output_model_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/\n", + "time_output_model_dir: ${output_model_dir}/${now:%Y-%m-%d_%H-%M-%S}\n", + "delete_below_top_k: -1\n", + "name: launch_model\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", + " - input_label_cache_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels\n", + " - output_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized\n", + " - tabularization.filtered_code_metadata_fp: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:58.298\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mcurrent_script_name\u001b[0m:\u001b[36m433\u001b[0m - \u001b[33m\u001b[1mCan't find main function in __main__ module. Using sys.argv[0] as a fallback.\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:58.302\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m446\u001b[0m - \u001b[1mRunning meds-tab-model with the following configuration:\n", + "input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "do_overwrite: false\n", + "seed: 1\n", + "tqdm: false\n", + "worker: 0\n", + "loguru_init: false\n", + "log_dir: ${output_dir}/.logs/\n", + "cache_dir: ${output_dir}/.cache\n", + "tabularization:\n", + " filtered_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + " allowed_codes: null\n", + " min_code_inclusion_count: 10\n", + " min_code_inclusion_frequency: null\n", + " min_correlation: null\n", + " max_by_correlation: null\n", + " max_included_codes: null\n", + " window_sizes:\n", + " - 1d\n", + " - 30d\n", + " - 365d\n", + " aggs:\n", + " - static/present\n", + " - code/count\n", + " - value/count\n", + " - value/sum\n", + " - value/sum_sqd\n", + " - value/min\n", + " - value/max\n", + " _resolved_codes: ${filter_to_codes:${tabularization.filtered_code_metadata_fp},${tabularization.allowed_codes},${tabularization.min_code_inclusion_count},${tabularization.min_code_inclusion_frequency},${tabularization.max_included_codes}}\n", + "path:\n", + " input_tabularized_cache_dir: ${input_tabularized_cache_dir}\n", + " input_label_cache_dir: ${input_label_cache_dir}\n", + " model_file_stem: xgboost\n", + " model_file_extension: .json\n", + " cache_dir: ${cache_dir}\n", + " sweep_results_dir: ${time_output_model_dir}/sweep_results/\n", + " best_trial_dir: ${time_output_model_dir}/best_trial/\n", + " performance_log_stem: performance\n", + " config_log_stem: config\n", + "data_processing_params:\n", + " imputer:\n", + " imputer_target: null\n", + " normalization:\n", + " normalizer: null\n", + "data_loading_params:\n", + " keep_data_in_memory: true\n", + " binarize_task: true\n", + "model_launcher:\n", + " path: ${path}\n", + " data_processing_params: ${data_processing_params}\n", + " data_loading_params: ${data_loading_params}\n", + " tabularization: ${tabularization}\n", + " _target_: MEDS_tabular_automl.xgboost_model.XGBoostModel.initialize\n", + " model:\n", + " booster: gbtree\n", + " device: cpu\n", + " nthread: 1\n", + " tree_method: hist\n", + " objective: binary:logistic\n", + " eta: 0.005517080675975136\n", + " lambda: 0.7999484495260932\n", + " alpha: 0.09897937060934658\n", + " subsample: 0.8512332295533758\n", + " min_child_weight: 61.520845937024276\n", + " max_depth: 5\n", + " training_params:\n", + " num_boost_round: 902\n", + " early_stopping_rounds: 1\n", + "task_name: mortality/in_icu/first_24h\n", + "input_tabularized_cache_dir: ${output_dir}/${task_name}/task_cache\n", + "input_label_cache_dir: ${output_dir}/${task_name}/labels\n", + "output_model_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/\n", + "time_output_model_dir: ${output_model_dir}/${now:%Y-%m-%d_%H-%M-%S}\n", + "delete_below_top_k: -1\n", + "name: launch_model\n", + "\u001b[0m\n", + "\u001b[32m2024-12-14 23:39:58.306\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.utils\u001b[0m:\u001b[36mstage_init\u001b[0m:\u001b[36m467\u001b[0m - \u001b[34m\u001b[1mStage config:\n", + "input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds//data\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tabularized/\n", + "do_overwrite: false\n", + "seed: 1\n", + "tqdm: false\n", + "worker: 0\n", + "loguru_init: false\n", + "log_dir: ${output_dir}/.logs/\n", + "cache_dir: ${output_dir}/.cache\n", + "tabularization:\n", + " filtered_code_metadata_fp: ${output_dir}/metadata/codes.parquet\n", + " allowed_codes: null\n", + " min_code_inclusion_count: 10\n", + " min_code_inclusion_frequency: null\n", + " min_correlation: null\n", + " max_by_correlation: null\n", + " max_included_codes: null\n", + " window_sizes:\n", + " - 1d\n", + " - 30d\n", + " - 365d\n", + " aggs:\n", + " - static/present\n", + " - code/count\n", + " - value/count\n", + " - value/sum\n", + " - value/sum_sqd\n", + " - value/min\n", + " - value/max\n", + " _resolved_codes: ${filter_to_codes:${tabularization.filtered_code_metadata_fp},${tabularization.allowed_codes},${tabularization.min_code_inclusion_count},${tabularization.min_code_inclusion_frequency},${tabularization.max_included_codes}}\n", + "path:\n", + " input_tabularized_cache_dir: ${input_tabularized_cache_dir}\n", + " input_label_cache_dir: ${input_label_cache_dir}\n", + " model_file_stem: xgboost\n", + " model_file_extension: .json\n", + " cache_dir: ${cache_dir}\n", + " sweep_results_dir: ${time_output_model_dir}/sweep_results/\n", + " best_trial_dir: ${time_output_model_dir}/best_trial/\n", + " performance_log_stem: performance\n", + " config_log_stem: config\n", + "data_processing_params:\n", + " imputer:\n", + " imputer_target: null\n", + " normalization:\n", + " normalizer: null\n", + "data_loading_params:\n", + " keep_data_in_memory: true\n", + " binarize_task: true\n", + "model_launcher:\n", + " path: ${path}\n", + " data_processing_params: ${data_processing_params}\n", + " data_loading_params: ${data_loading_params}\n", + " tabularization: ${tabularization}\n", + " _target_: MEDS_tabular_automl.xgboost_model.XGBoostModel.initialize\n", + " model:\n", + " booster: gbtree\n", + " device: cpu\n", + " nthread: 1\n", + " tree_method: hist\n", + " objective: binary:logistic\n", + " eta: 0.005517080675975136\n", + " lambda: 0.7999484495260932\n", + " alpha: 0.09897937060934658\n", + " subsample: 0.8512332295533758\n", + " min_child_weight: 61.520845937024276\n", + " max_depth: 5\n", + " training_params:\n", + " num_boost_round: 902\n", + " early_stopping_rounds: 1\n", + "task_name: mortality/in_icu/first_24h\n", + "input_tabularized_cache_dir: ${output_dir}/${task_name}/task_cache\n", + "input_label_cache_dir: ${output_dir}/${task_name}/labels\n", + "output_model_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/meds_tab//mortality/in_icu/first_24h/\n", + "time_output_model_dir: ${output_model_dir}/${now:%Y-%m-%d_%H-%M-%S}\n", + "delete_below_top_k: -1\n", + "name: launch_model\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", + " - input_label_cache_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/mortality/in_icu/first_24h/labels\n", + " - output_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized\n", + " - tabularization.filtered_code_metadata_fp: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/tabularized/metadata/codes.parquet\u001b[0m\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_tabular_automl/describe_codes.py:63: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", + " if not df.columns == [\"code\", \"count\"]:\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_tabular_automl/describe_codes.py:63: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", + " if not df.columns == [\"code\", \"count\"]:\n", + "\u001b[32m2024-12-14 23:40:00.242\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.launch_model\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m70\u001b[0m - \u001b[34m\u001b[1mModel config and performance logged to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/output/meds_tab/mortality/in_icu/first_24h/2024-12-14_23-39-43/sweep_results/5017439753981061336/config.log and /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/output/meds_tab/mortality/in_icu/first_24h/2024-12-14_23-39-43/sweep_results/5017439753981061336/performance.log\u001b[0m\n", + "\u001b[32m2024-12-14 23:40:00.242\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mMEDS_tabular_automl.scripts.launch_model\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m70\u001b[0m - \u001b[34m\u001b[1mModel config and performance logged to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/output/meds_tab/mortality/in_icu/first_24h/2024-12-14_23-39-43/sweep_results/8884292553190218621/config.log and /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/output/meds_tab/mortality/in_icu/first_24h/2024-12-14_23-39-43/sweep_results/8884292553190218621/performance.log\u001b[0m\n", + "[2024-12-14 23:40:00,487][HYDRA] Best parameters: {'+model_launcher.model.eta': 0.0018901897752212065, '+model_launcher.model.lambda': 0.15915705152525939, '+model_launcher.model.alpha': 0.24416448139530966, '+model_launcher.model.subsample': 0.7731602385301684, '+model_launcher.model.min_child_weight': 1.4870822825511332, '+model_launcher.model.max_depth': 9, 'model_launcher.training_params.num_boost_round': 581, 'model_launcher.training_params.early_stopping_rounds': 5}\n", + "[2024-12-14 23:40:00,487][HYDRA] Best value: 0.675\n", + "\u001b[32m2024-12-14 23:40:00.523\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.evaluation_callback\u001b[0m:\u001b[36mon_multirun_end\u001b[0m:\u001b[36m21\u001b[0m - \u001b[1m\n", + "Performance of the top 10 models:\n", + "shape: (10, 3)\n", + "┌─────────────────────┬────────────┬──────────┐\n", + "│ trial_name ┆ tuning_auc ┆ test_auc │\n", + "│ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ f64 ┆ f64 │\n", + "╞═════════════════════╪════════════╪══════════╡\n", + "│ 2382245570059491332 ┆ 0.675 ┆ 1.0 │\n", + "│ 1385397265463664318 ┆ 0.5 ┆ 0.5 │\n", + "│ 2674362470617915327 ┆ 0.5 ┆ 0.5 │\n", + "│ 3660540200391396743 ┆ 0.5 ┆ 0.5 │\n", + "│ 4399271614519315579 ┆ 0.5 ┆ 0.5 │\n", + "│ 5017439753981061336 ┆ 0.5 ┆ 0.5 │\n", + "│ 5205035581204184743 ┆ 0.5 ┆ 0.5 │\n", + "│ 5319960564723494202 ┆ 0.5 ┆ 0.5 │\n", + "│ 7990023231025888822 ┆ 0.5 ┆ 0.5 │\n", + "│ 8884292553190218621 ┆ 0.5 ┆ 0.5 │\n", + "└─────────────────────┴────────────┴──────────┘\u001b[0m\n", + "\u001b[32m2024-12-14 23:40:00.524\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.evaluation_callback\u001b[0m:\u001b[36mlog_performance\u001b[0m:\u001b[36m50\u001b[0m - \u001b[1m\n", + "Best model can be found at 2382245570059491332\n", + "Performance of best model:\n", + "Tuning AUC: 0.675\n", + "Test AUC: 1.0\u001b[0m\n", + "\u001b[32m2024-12-14 23:40:00.524\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_tabular_automl.evaluation_callback\u001b[0m:\u001b[36mon_multirun_end\u001b[0m:\u001b[36m29\u001b[0m - \u001b[1mAll models were saved. To automatically delete models, set delete_below_top_k in config.\u001b[0m\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/hydra/_internal/callbacks.py:28: UserWarning: Callback EvaluationCallback.on_multirun_end raised TypeError: unsupported operand type(s) for /: 'str' and 'str'\n", + " warnings.warn(\n" ] } ], @@ -2248,7 +3645,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "dev", + "display_name": "meds_demo", "language": "python", "name": "python3" }, From b35c0ecb1e0a2cc1b9b2b6cbbc250a037bcc90c1 Mon Sep 17 00:00:00 2001 From: Nassim Date: Sun, 15 Dec 2024 03:37:20 -0500 Subject: [PATCH 7/8] added meds-torch demo --- .gitignore | 3 +- demo/meds_torch.ipynb | 2950 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 2952 insertions(+), 1 deletion(-) create mode 100644 demo/meds_torch.ipynb diff --git a/.gitignore b/.gitignore index 3a41f4d..87ef41d 100644 --- a/.gitignore +++ b/.gitignore @@ -362,4 +362,5 @@ meds_env/* src/MEDS_DEV/demo/download/* src/MEDS_DEV/demo/content/* -demo/work_dir \ No newline at end of file +demo/work_dir +demo/outputs \ No newline at end of file diff --git a/demo/meds_torch.ipynb b/demo/meds_torch.ipynb new file mode 100644 index 0000000..da5cf19 --- /dev/null +++ b/demo/meds_torch.ipynb @@ -0,0 +1,2950 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "background_save": true, + "base_uri": "https://localhost:8080/" + }, + "id": "XWB7O1UGhRIo", + "outputId": "ff12dccf-ba74-4352-9e67-3f045e6af754" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting meds-torch\n", + " Downloading meds_torch-0.0.5-py3-none-any.whl.metadata (10 kB)\n", + "Requirement already satisfied: polars in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from meds-torch) (1.17.1)\n", + "Requirement already satisfied: pyarrow in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from meds-torch) (17.0.0)\n", + "Requirement already satisfied: nested_ragged_tensors>=0.1.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from meds-torch) (0.1)\n", + "Requirement already satisfied: loguru in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from meds-torch) (0.7.3)\n", + "Requirement already satisfied: numpy in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from meds-torch) (2.2.0)\n", + "Requirement already satisfied: ml-mixins in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from meds-torch) (0.1.0)\n", + "Collecting torch (from meds-torch)\n", + " Using cached torch-2.5.1-cp312-cp312-manylinux1_x86_64.whl.metadata (28 kB)\n", + "Requirement already satisfied: omegaconf in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from meds-torch) (2.3.0)\n", + "Requirement already satisfied: hydra-core in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from meds-torch) (1.3.2)\n", + "Collecting lightning (from meds-torch)\n", + " Using cached lightning-2.4.0-py3-none-any.whl.metadata (38 kB)\n", + "Collecting x-transformers==1.34.0 (from meds-torch)\n", + " Using cached x_transformers-1.34.0-py3-none-any.whl.metadata (661 bytes)\n", + "Collecting rich (from meds-torch)\n", + " Using cached rich-13.9.4-py3-none-any.whl.metadata (18 kB)\n", + "Collecting hydra-colorlog (from meds-torch)\n", + " Using cached hydra_colorlog-1.2.0-py3-none-any.whl.metadata (949 bytes)\n", + "Requirement already satisfied: hydra-optuna-sweeper in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from meds-torch) (1.2.0)\n", + "Collecting wandb (from meds-torch)\n", + " Downloading wandb-0.19.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)\n", + "Collecting torchmetrics (from meds-torch)\n", + " Using cached torchmetrics-1.6.0-py3-none-any.whl.metadata (20 kB)\n", + "Collecting torchvision (from meds-torch)\n", + " Using cached torchvision-0.20.1-cp312-cp312-manylinux1_x86_64.whl.metadata (6.1 kB)\n", + "Collecting transformers (from meds-torch)\n", + " Downloading transformers-4.47.0-py3-none-any.whl.metadata (43 kB)\n", + "Collecting ray[tune] (from meds-torch)\n", + " Downloading ray-2.40.0-cp312-cp312-manylinux2014_x86_64.whl.metadata (17 kB)\n", + "Collecting einops>=0.8.0 (from x-transformers==1.34.0->meds-torch)\n", + " Using cached einops-0.8.0-py3-none-any.whl.metadata (12 kB)\n", + "Requirement already satisfied: safetensors in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from nested_ragged_tensors>=0.1.0->meds-torch) (0.4.5)\n", + "Collecting filelock (from torch->meds-torch)\n", + " Using cached filelock-3.16.1-py3-none-any.whl.metadata (2.9 kB)\n", + "Requirement already satisfied: typing-extensions>=4.8.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from torch->meds-torch) (4.12.2)\n", + "Requirement already satisfied: networkx in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from torch->meds-torch) (3.3)\n", + "Collecting jinja2 (from torch->meds-torch)\n", + " Using cached jinja2-3.1.4-py3-none-any.whl.metadata (2.6 kB)\n", + "Collecting fsspec (from torch->meds-torch)\n", + " Using cached fsspec-2024.10.0-py3-none-any.whl.metadata (11 kB)\n", + "Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->meds-torch)\n", + " Using cached nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->meds-torch)\n", + " Using cached nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->meds-torch)\n", + " Using cached nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->meds-torch)\n", + " Using cached nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-cublas-cu12==12.4.5.8 (from torch->meds-torch)\n", + " Using cached nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cufft-cu12==11.2.1.3 (from torch->meds-torch)\n", + " Using cached nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-curand-cu12==10.3.5.147 (from torch->meds-torch)\n", + " Using cached nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch->meds-torch)\n", + " Using cached nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch->meds-torch)\n", + " Using cached nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-nccl-cu12==2.21.5 (from torch->meds-torch)\n", + " Using cached nvidia_nccl_cu12-2.21.5-py3-none-manylinux2014_x86_64.whl.metadata (1.8 kB)\n", + "Collecting nvidia-nvtx-cu12==12.4.127 (from torch->meds-torch)\n", + " Using cached nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.7 kB)\n", + "Collecting nvidia-nvjitlink-cu12==12.4.127 (from torch->meds-torch)\n", + " Using cached nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting triton==3.1.0 (from torch->meds-torch)\n", + " Using cached triton-3.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.3 kB)\n", + "Requirement already satisfied: setuptools in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from torch->meds-torch) (75.1.0)\n", + "Collecting sympy==1.13.1 (from torch->meds-torch)\n", + " Using cached sympy-1.13.1-py3-none-any.whl.metadata (12 kB)\n", + "Collecting mpmath<1.4,>=1.1.0 (from sympy==1.13.1->torch->meds-torch)\n", + " Using cached mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)\n", + "Requirement already satisfied: colorlog in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from hydra-colorlog->meds-torch) (6.9.0)\n", + "Requirement already satisfied: antlr4-python3-runtime==4.9.* in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from hydra-core->meds-torch) (4.9.3)\n", + "Requirement already satisfied: packaging in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from hydra-core->meds-torch) (24.2)\n", + "Requirement already satisfied: PyYAML>=5.1.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from omegaconf->meds-torch) (6.0.2)\n", + "Requirement already satisfied: optuna<3.0.0,>=2.10.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from hydra-optuna-sweeper->meds-torch) (2.10.1)\n", + "Collecting lightning-utilities<2.0,>=0.10.0 (from lightning->meds-torch)\n", + " Using cached lightning_utilities-0.11.9-py3-none-any.whl.metadata (5.2 kB)\n", + "Requirement already satisfied: tqdm<6.0,>=4.57.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from lightning->meds-torch) (4.67.1)\n", + "Collecting pytorch-lightning (from lightning->meds-torch)\n", + " Using cached pytorch_lightning-2.4.0-py3-none-any.whl.metadata (21 kB)\n", + "Collecting click>=7.0 (from ray[tune]->meds-torch)\n", + " Using cached click-8.1.7-py3-none-any.whl.metadata (3.0 kB)\n", + "Requirement already satisfied: jsonschema in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from ray[tune]->meds-torch) (4.23.0)\n", + "Collecting msgpack<2.0.0,>=1.0.0 (from ray[tune]->meds-torch)\n", + " Using cached msgpack-1.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.4 kB)\n", + "Collecting protobuf!=3.19.5,>=3.15.3 (from ray[tune]->meds-torch)\n", + " Downloading protobuf-5.29.1-cp38-abi3-manylinux2014_x86_64.whl.metadata (592 bytes)\n", + "Collecting aiosignal (from ray[tune]->meds-torch)\n", + " Downloading aiosignal-1.3.2-py2.py3-none-any.whl.metadata (3.8 kB)\n", + "Collecting frozenlist (from ray[tune]->meds-torch)\n", + " Using cached frozenlist-1.5.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)\n", + "Collecting requests (from ray[tune]->meds-torch)\n", + " Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)\n", + "Requirement already satisfied: pandas in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from ray[tune]->meds-torch) (2.2.3)\n", + "Collecting tensorboardX>=1.9 (from ray[tune]->meds-torch)\n", + " Using cached tensorboardX-2.6.2.2-py2.py3-none-any.whl.metadata (5.8 kB)\n", + "Collecting markdown-it-py>=2.2.0 (from rich->meds-torch)\n", + " Using cached markdown_it_py-3.0.0-py3-none-any.whl.metadata (6.9 kB)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from rich->meds-torch) (2.18.0)\n", + "Collecting pillow!=8.3.*,>=5.3.0 (from torchvision->meds-torch)\n", + " Using cached pillow-11.0.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (9.1 kB)\n", + "Collecting huggingface-hub<1.0,>=0.24.0 (from transformers->meds-torch)\n", + " Downloading huggingface_hub-0.26.5-py3-none-any.whl.metadata (13 kB)\n", + "Collecting regex!=2019.12.17 (from transformers->meds-torch)\n", + " Using cached regex-2024.11.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)\n", + "Collecting tokenizers<0.22,>=0.21 (from transformers->meds-torch)\n", + " Downloading tokenizers-0.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)\n", + "Collecting docker-pycreds>=0.4.0 (from wandb->meds-torch)\n", + " Using cached docker_pycreds-0.4.0-py2.py3-none-any.whl.metadata (1.8 kB)\n", + "Collecting gitpython!=3.1.29,>=1.0.0 (from wandb->meds-torch)\n", + " Using cached GitPython-3.1.43-py3-none-any.whl.metadata (13 kB)\n", + "Requirement already satisfied: platformdirs in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from wandb->meds-torch) (4.3.6)\n", + "Requirement already satisfied: psutil>=5.0.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from wandb->meds-torch) (6.1.0)\n", + "Collecting pydantic<3,>=2.6 (from wandb->meds-torch)\n", + " Downloading pydantic-2.10.3-py3-none-any.whl.metadata (172 kB)\n", + "Collecting sentry-sdk>=2.0.0 (from wandb->meds-torch)\n", + " Downloading sentry_sdk-2.19.2-py2.py3-none-any.whl.metadata (9.9 kB)\n", + "Collecting setproctitle (from wandb->meds-torch)\n", + " Using cached setproctitle-1.3.4-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)\n", + "Requirement already satisfied: six>=1.4.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from docker-pycreds>=0.4.0->wandb->meds-torch) (1.17.0)\n", + "Collecting aiohttp!=4.0.0a0,!=4.0.0a1 (from fsspec[http]<2026.0,>=2022.5.0->lightning->meds-torch)\n", + " Downloading aiohttp-3.11.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.7 kB)\n", + "Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.29,>=1.0.0->wandb->meds-torch)\n", + " Using cached gitdb-4.0.11-py3-none-any.whl.metadata (1.2 kB)\n", + "Collecting mdurl~=0.1 (from markdown-it-py>=2.2.0->rich->meds-torch)\n", + " Using cached mdurl-0.1.2-py3-none-any.whl.metadata (1.6 kB)\n", + "Requirement already satisfied: alembic in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-torch) (1.14.0)\n", + "Requirement already satisfied: cliff in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-torch) (4.8.0)\n", + "Requirement already satisfied: cmaes>=0.8.2 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-torch) (0.11.1)\n", + "Requirement already satisfied: scipy!=1.4.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-torch) (1.13.1)\n", + "Requirement already satisfied: sqlalchemy>=1.1.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-torch) (2.0.36)\n", + "Collecting annotated-types>=0.6.0 (from pydantic<3,>=2.6->wandb->meds-torch)\n", + " Using cached annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)\n", + "Collecting pydantic-core==2.27.1 (from pydantic<3,>=2.6->wandb->meds-torch)\n", + " Downloading pydantic_core-2.27.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n", + "Collecting charset-normalizer<4,>=2 (from requests->ray[tune]->meds-torch)\n", + " Using cached charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (34 kB)\n", + "Collecting idna<4,>=2.5 (from requests->ray[tune]->meds-torch)\n", + " Using cached idna-3.10-py3-none-any.whl.metadata (10 kB)\n", + "Collecting urllib3<3,>=1.21.1 (from requests->ray[tune]->meds-torch)\n", + " Using cached urllib3-2.2.3-py3-none-any.whl.metadata (6.5 kB)\n", + "Collecting certifi>=2017.4.17 (from requests->ray[tune]->meds-torch)\n", + " Downloading certifi-2024.12.14-py3-none-any.whl.metadata (2.3 kB)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from jinja2->torch->meds-torch) (3.0.2)\n", + "Requirement already satisfied: attrs>=22.2.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from jsonschema->ray[tune]->meds-torch) (24.2.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from jsonschema->ray[tune]->meds-torch) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from jsonschema->ray[tune]->meds-torch) (0.35.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from jsonschema->ray[tune]->meds-torch) (0.22.3)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from pandas->ray[tune]->meds-torch) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from pandas->ray[tune]->meds-torch) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from pandas->ray[tune]->meds-torch) (2024.2)\n", + "Collecting aiohappyeyeballs>=2.3.0 (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<2026.0,>=2022.5.0->lightning->meds-torch)\n", + " Downloading aiohappyeyeballs-2.4.4-py3-none-any.whl.metadata (6.1 kB)\n", + "Collecting multidict<7.0,>=4.5 (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<2026.0,>=2022.5.0->lightning->meds-torch)\n", + " Using cached multidict-6.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.0 kB)\n", + "Collecting propcache>=0.2.0 (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<2026.0,>=2022.5.0->lightning->meds-torch)\n", + " Downloading propcache-0.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.2 kB)\n", + "Collecting yarl<2.0,>=1.17.0 (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<2026.0,>=2022.5.0->lightning->meds-torch)\n", + " Downloading yarl-1.18.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (69 kB)\n", + "Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython!=3.1.29,>=1.0.0->wandb->meds-torch)\n", + " Using cached smmap-5.0.1-py3-none-any.whl.metadata (4.3 kB)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from sqlalchemy>=1.1.0->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-torch) (3.1.1)\n", + "Requirement already satisfied: Mako in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from alembic->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-torch) (1.3.8)\n", + "Requirement already satisfied: PrettyTable>=0.7.2 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from cliff->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-torch) (3.12.0)\n", + "Requirement already satisfied: autopage>=0.4.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from cliff->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-torch) (0.5.2)\n", + "Requirement already satisfied: cmd2>=1.0.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from cliff->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-torch) (2.5.7)\n", + "Requirement already satisfied: stevedore>=2.0.1 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from cliff->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-torch) (5.4.0)\n", + "Requirement already satisfied: pyperclip in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from cmd2>=1.0.0->cliff->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-torch) (1.9.0)\n", + "Requirement already satisfied: wcwidth in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from cmd2>=1.0.0->cliff->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-torch) (0.2.13)\n", + "Requirement already satisfied: pbr>=2.0.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from stevedore>=2.0.1->cliff->optuna<3.0.0,>=2.10.0->hydra-optuna-sweeper->meds-torch) (6.1.0)\n", + "Downloading meds_torch-0.0.5-py3-none-any.whl (155 kB)\n", + "Using cached x_transformers-1.34.0-py3-none-any.whl (41 kB)\n", + "Using cached torch-2.5.1-cp312-cp312-manylinux1_x86_64.whl (906.4 MB)\n", + "Using cached nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl (363.4 MB)\n", + "Using cached nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (13.8 MB)\n", + "Using cached nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (24.6 MB)\n", + "Using cached nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (883 kB)\n", + "Using cached nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)\n", + "Using cached nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl (211.5 MB)\n", + "Using cached nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl (56.3 MB)\n", + "Using cached nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl (127.9 MB)\n", + "Using cached nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl (207.5 MB)\n", + "Using cached nvidia_nccl_cu12-2.21.5-py3-none-manylinux2014_x86_64.whl (188.7 MB)\n", + "Using cached nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n", + "Using cached nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (99 kB)\n", + "Using cached sympy-1.13.1-py3-none-any.whl (6.2 MB)\n", + "Using cached triton-3.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (209.6 MB)\n", + "Using cached hydra_colorlog-1.2.0-py3-none-any.whl (3.6 kB)\n", + "Using cached lightning-2.4.0-py3-none-any.whl (810 kB)\n", + "Using cached torchmetrics-1.6.0-py3-none-any.whl (926 kB)\n", + "Using cached rich-13.9.4-py3-none-any.whl (242 kB)\n", + "Using cached torchvision-0.20.1-cp312-cp312-manylinux1_x86_64.whl (7.2 MB)\n", + "Downloading transformers-4.47.0-py3-none-any.whl (10.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.1/10.1 MB\u001b[0m \u001b[31m40.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading wandb-0.19.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (20.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.0/20.0 MB\u001b[0m \u001b[31m121.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hUsing cached click-8.1.7-py3-none-any.whl (97 kB)\n", + "Using cached docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)\n", + "Using cached einops-0.8.0-py3-none-any.whl (43 kB)\n", + "Using cached fsspec-2024.10.0-py3-none-any.whl (179 kB)\n", + "Using cached GitPython-3.1.43-py3-none-any.whl (207 kB)\n", + "Downloading huggingface_hub-0.26.5-py3-none-any.whl (447 kB)\n", + "Using cached lightning_utilities-0.11.9-py3-none-any.whl (28 kB)\n", + "Using cached markdown_it_py-3.0.0-py3-none-any.whl (87 kB)\n", + "Using cached msgpack-1.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (401 kB)\n", + "Using cached pillow-11.0.0-cp312-cp312-manylinux_2_28_x86_64.whl (4.4 MB)\n", + "Downloading protobuf-5.29.1-cp38-abi3-manylinux2014_x86_64.whl (319 kB)\n", + "Downloading pydantic-2.10.3-py3-none-any.whl (456 kB)\n", + "Downloading pydantic_core-2.27.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m135.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hUsing cached regex-2024.11.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (796 kB)\n", + "Using cached requests-2.32.3-py3-none-any.whl (64 kB)\n", + "Downloading sentry_sdk-2.19.2-py2.py3-none-any.whl (322 kB)\n", + "Using cached tensorboardX-2.6.2.2-py2.py3-none-any.whl (101 kB)\n", + "Downloading tokenizers-0.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.0/3.0 MB\u001b[0m \u001b[31m163.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading aiosignal-1.3.2-py2.py3-none-any.whl (7.6 kB)\n", + "Using cached frozenlist-1.5.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (283 kB)\n", + "Using cached filelock-3.16.1-py3-none-any.whl (16 kB)\n", + "Using cached jinja2-3.1.4-py3-none-any.whl (133 kB)\n", + "Using cached pytorch_lightning-2.4.0-py3-none-any.whl (815 kB)\n", + "Downloading ray-2.40.0-cp312-cp312-manylinux2014_x86_64.whl (67.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.0/67.0 MB\u001b[0m \u001b[31m142.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hUsing cached setproctitle-1.3.4-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (31 kB)\n", + "Downloading aiohttp-3.11.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m148.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hUsing cached annotated_types-0.7.0-py3-none-any.whl (13 kB)\n", + "Downloading certifi-2024.12.14-py3-none-any.whl (164 kB)\n", + "Using cached charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (143 kB)\n", + "Using cached gitdb-4.0.11-py3-none-any.whl (62 kB)\n", + "Using cached idna-3.10-py3-none-any.whl (70 kB)\n", + "Using cached mdurl-0.1.2-py3-none-any.whl (10.0 kB)\n", + "Using cached mpmath-1.3.0-py3-none-any.whl (536 kB)\n", + "Using cached urllib3-2.2.3-py3-none-any.whl (126 kB)\n", + "Downloading aiohappyeyeballs-2.4.4-py3-none-any.whl (14 kB)\n", + "Using cached multidict-6.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (131 kB)\n", + "Downloading propcache-0.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (243 kB)\n", + "Using cached smmap-5.0.1-py3-none-any.whl (24 kB)\n", + "Downloading yarl-1.18.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (336 kB)\n", + "Installing collected packages: mpmath, urllib3, sympy, smmap, setproctitle, regex, pydantic-core, protobuf, propcache, pillow, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, multidict, msgpack, mdurl, lightning-utilities, jinja2, idna, fsspec, frozenlist, filelock, einops, docker-pycreds, click, charset-normalizer, certifi, annotated-types, aiohappyeyeballs, yarl, triton, tensorboardX, sentry-sdk, requests, pydantic, nvidia-cusparse-cu12, nvidia-cudnn-cu12, markdown-it-py, gitdb, aiosignal, rich, nvidia-cusolver-cu12, hydra-colorlog, huggingface-hub, gitpython, aiohttp, wandb, torch, tokenizers, ray, x-transformers, transformers, torchvision, torchmetrics, pytorch-lightning, lightning, meds-torch\n", + " Attempting uninstall: nvidia-nccl-cu12\n", + " Found existing installation: nvidia-nccl-cu12 2.23.4\n", + " Uninstalling nvidia-nccl-cu12-2.23.4:\n", + " Successfully uninstalled nvidia-nccl-cu12-2.23.4\n", + "Successfully installed aiohappyeyeballs-2.4.4 aiohttp-3.11.10 aiosignal-1.3.2 annotated-types-0.7.0 certifi-2024.12.14 charset-normalizer-3.4.0 click-8.1.7 docker-pycreds-0.4.0 einops-0.8.0 filelock-3.16.1 frozenlist-1.5.0 fsspec-2024.10.0 gitdb-4.0.11 gitpython-3.1.43 huggingface-hub-0.26.5 hydra-colorlog-1.2.0 idna-3.10 jinja2-3.1.4 lightning-2.4.0 lightning-utilities-0.11.9 markdown-it-py-3.0.0 mdurl-0.1.2 meds-torch-0.0.5 mpmath-1.3.0 msgpack-1.1.0 multidict-6.1.0 nvidia-cublas-cu12-12.4.5.8 nvidia-cuda-cupti-cu12-12.4.127 nvidia-cuda-nvrtc-cu12-12.4.127 nvidia-cuda-runtime-cu12-12.4.127 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.2.1.3 nvidia-curand-cu12-10.3.5.147 nvidia-cusolver-cu12-11.6.1.9 nvidia-cusparse-cu12-12.3.1.170 nvidia-nccl-cu12-2.21.5 nvidia-nvjitlink-cu12-12.4.127 nvidia-nvtx-cu12-12.4.127 pillow-11.0.0 propcache-0.2.1 protobuf-5.29.1 pydantic-2.10.3 pydantic-core-2.27.1 pytorch-lightning-2.4.0 ray-2.40.0 regex-2024.11.6 requests-2.32.3 rich-13.9.4 sentry-sdk-2.19.2 setproctitle-1.3.4 smmap-5.0.1 sympy-1.13.1 tensorboardX-2.6.2.2 tokenizers-0.21.0 torch-2.5.1 torchmetrics-1.6.0 torchvision-0.20.1 transformers-4.47.0 triton-3.1.0 urllib3-2.2.3 wandb-0.19.1 x-transformers-1.34.0 yarl-1.18.3\n", + "Requirement already satisfied: hydra-joblib-launcher in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (1.2.0)\n", + "Requirement already satisfied: hydra-core>=1.1.0.dev7 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from hydra-joblib-launcher) (1.3.2)\n", + "Requirement already satisfied: joblib>=0.14.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from hydra-joblib-launcher) (1.4.2)\n", + "Requirement already satisfied: omegaconf<2.4,>=2.2 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from hydra-core>=1.1.0.dev7->hydra-joblib-launcher) (2.3.0)\n", + "Requirement already satisfied: antlr4-python3-runtime==4.9.* in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from hydra-core>=1.1.0.dev7->hydra-joblib-launcher) (4.9.3)\n", + "Requirement already satisfied: packaging in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from hydra-core>=1.1.0.dev7->hydra-joblib-launcher) (24.2)\n", + "Requirement already satisfied: PyYAML>=5.1.0 in /storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages (from omegaconf<2.4,>=2.2->hydra-core>=1.1.0.dev7->hydra-joblib-launcher) (6.0.2)\n" + ] + } + ], + "source": [ + "#@title Install meds-torch\n", + "\n", + "!pip install meds-torch\n", + "!pip install hydra-joblib-launcher" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#@title Download E-ICU demo\n", + "import os\n", + "from pathlib import Path\n", + "notebook_dir = os.getcwd()\n", + "\n", + "ROOT_DIR=f\"{notebook_dir}/work_dir/mimiciv_demo/\"\n", + "# ROOT_DIR=f\"{notebook_dir}/work_dir/eicu_demo/\"\n", + "Path(ROOT_DIR).mkdir(parents=True, exist_ok=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "background_save": true + }, + "id": "lHbLZcqPUFdh", + "outputId": "ae8c44fc-2202-44e3-8852-ae8a2e82bb05" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cloning into '/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//tmp'...\n", + "remote: Enumerating objects: 656, done.\u001b[K\n", + "remote: Counting objects: 100% (656/656), done.\u001b[K\n", + "remote: Compressing objects: 100% (473/473), done.\u001b[K\n", + "remote: Total 656 (delta 169), reused 442 (delta 112), pack-reused 0 (from 0)\u001b[K\n", + "Receiving objects: 100% (656/656), 705.59 KiB | 7.75 MiB/s, done.\n", + "Resolving deltas: 100% (169/169), done.\n" + ] + } + ], + "source": [ + "# Download pre-meds script, event config (defining how raw data is converted to meds data), and meds-transform config\n", + "!rm -rf {ROOT_DIR}/tmp/\n", + "!mkdir {ROOT_DIR}/meds_torch_example/\n", + "!git clone --depth 1 https://github.com/oufattole/meds-torch.git {ROOT_DIR}/tmp/\n", + "!mv {ROOT_DIR}/tmp/MIMICIV_INDUCTIVE_EXPERIMENTS {ROOT_DIR}/meds_torch_example" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "background_save": true + }, + "id": "RVLBdOn1mnV5" + }, + "outputs": [], + "source": [ + "# Define the window sizes and aggregations to generate features for\n", + "MEDS_DIR = ROOT_DIR + \"/meds/\"\n", + "TASK_DIR = MEDS_DIR + \"/task_labels\"\n", + "TASK_NAME=\"mortality/in_icu/first_24h\"\n", + "# TASK_NAME=\"los_in_hospital_first_48h\"\n", + "OUTPUT_MODEL_DIR= ROOT_DIR + \"/output/meds_tab/\"\n", + "\n", + "TUTORIAL_DIR= ROOT_DIR + \"/meds_torch_example/MIMICIV_INDUCTIVE_EXPERIMENTS/\"\n", + "CONFIGS_DIR=f\"{TUTORIAL_DIR}/configs/\"\n", + "TENSOR_DIR= ROOT_DIR + \"/triplet_tensors/\" # Output tensors directory\n", + "N_PARALLEL_WORKERS=1 # set to the number of parallel workers you want to use\n", + "PIPELINE_CONFIG_PATH=f\"{CONFIGS_DIR}/triplet_config.yaml\" # set to the directory in which the config file is stored, must be an absolute path.\n", + "JOBLIB_RUNNER_CONFIG_PATH=f\"{CONFIGS_DIR}/joblib_runner.yaml\" # set to the directory in which the config file is stored, must be an absolute path." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "background_save": true + }, + "id": "3GuvAfGJcsVe", + "outputId": "8bc8b84d-0c0e-4c24-855a-825c12218d17" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "911" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "triplet_tensorization_config = \"\"\"\n", + "defaults:\n", + " - _preprocess\n", + " - _self_\n", + "input_dir: ${oc.env:MEDS_DIR}\n", + "cohort_dir: ${oc.env:MODEL_DIR}\n", + "stages:\n", + " - fit_filter_and_occlude\n", + " - filter_measurements\n", + " - filter_subjects\n", + " - occlude_outliers\n", + " - fit_normalization\n", + " - fit_vocabulary_indices\n", + " - normalization\n", + " - tokenization\n", + " - tensorization\n", + "\n", + "stage_configs:\n", + " filter_subjects:\n", + " min_events_per_subject: 0\n", + " min_measurements_per_subject: 0\n", + " fit_filter_and_occlude:\n", + " _script: MEDS_transform-aggregate_code_metadata\n", + " aggregations:\n", + " - code/n_occurrences\n", + " - code/n_subjects\n", + " - values/sum\n", + " - values/sum_sqd\n", + " - values/n_occurrences\n", + " filter_measurements:\n", + " min_subjects_per_code: 0\n", + " fit_normalization:\n", + " _script: MEDS_transform-aggregate_code_metadata\n", + " aggregations:\n", + " - code/n_occurrences\n", + " - code/n_subjects\n", + " - name: values/quantiles\n", + " quantiles: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]\n", + "\"\"\"\n", + "from pathlib import Path\n", + "\n", + "Path(PIPELINE_CONFIG_PATH).write_text(triplet_tensorization_config)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "colab": { + "background_save": true + }, + "collapsed": true, + "id": "4F_xYVXnVts6", + "outputId": "dd924e15-c587-459d-db33-32aed94ca173" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running extraction pipeline.\n", + "\u001b[32m2024-12-15 03:05:23.970\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: fit_filter_and_occlude\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:23.980\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_transform-aggregate_code_metadata --config-dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds_torch_example/MIMICIV_INDUCTIVE_EXPERIMENTS/configs --config-name=triplet_config --multirun 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=fit_filter_and_occlude worker=\"range(0,1)\" hydra/launcher=joblib\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:26.269\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "[2024-12-15 03:05:24,766][HYDRA] Joblib.Parallel(n_jobs=-1,backend=loky,prefer=processes,require=None,verbose=0,timeout=None,pre_dispatch=2*n_jobs,batch_size=auto,temp_folder=None,max_nbytes=None,mmap_mode=r) is launching 1 jobs\n", + "[2024-12-15 03:05:24,766][HYDRA] Launching jobs, sweep output dir : /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/fit_filter_and_occlude/.logs\n", + "[2024-12-15 03:05:24,766][HYDRA] \t#0 : stage=fit_filter_and_occlude worker=0\n", + "\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:26.269\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/joblib/externals/loky/backend/fork_exec.py:38: RuntimeWarning: Using fork() can cause Polars to deadlock in the child process.\n", + "In addition, using fork() with Python in general is a recipe for mysterious\n", + "deadlocks and crashes.\n", + "\n", + "The most likely reason you are seeing this error is because you are using the\n", + "multiprocessing module on Linux, which uses fork() by default. This will be\n", + "fixed in Python 3.14. Until then, you want to use the \"spawn\" context instead.\n", + "\n", + "See https://docs.pola.rs/user-guide/misc/multiprocessing/ for details.\n", + "\n", + "If you really know what your doing, you can silence this warning with the warning module\n", + "or by setting POLARS_ALLOW_FORKING_THREAD=1.\n", + "\n", + " pid = os.fork()\n", + "2024-12-15 03:05:25.722 | WARNING | MEDS_transforms.utils:current_script_name:151 - Can't find main function in __main__ module. Using sys.argv[0] as a fallback.\n", + "2024-12-15 03:05:25.724 | WARNING | MEDS_transforms.utils:current_script_name:151 - Can't find main function in __main__ module. Using sys.argv[0] as a fallback.\n", + "2024-12-15 03:05:25.785 | WARNING | MEDS_transforms.utils:current_script_name:151 - Can't find main function in __main__ module. Using sys.argv[0] as a fallback.\n", + "2024-12-15 03:05:25.788 | INFO | MEDS_transforms.utils:stage_init:73 - Running MEDS_transform-aggregate_code_metadata with the following configuration:\n", + "input_dir: ${oc.env:MEDS_DIR}\n", + "cohort_dir: ${oc.env:MODEL_DIR}\n", + "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", + " at the top of your specific pipeline\n", + "\n", + " configuration file.'\n", + "log_dir: ${stage_cfg.output_dir}/.logs\n", + "do_overwrite: false\n", + "seed: 1\n", + "stages:\n", + "- fit_filter_and_occlude\n", + "- filter_measurements\n", + "- filter_subjects\n", + "- occlude_outliers\n", + "- fit_normalization\n", + "- fit_vocabulary_indices\n", + "- normalization\n", + "- tokenization\n", + "- tensorization\n", + "stage_configs:\n", + " reshard_to_split:\n", + " n_subjects_per_shard: 50000\n", + " filter_subjects:\n", + " min_events_per_subject: 0\n", + " min_measurements_per_subject: 0\n", + " add_time_derived_measurements:\n", + " age:\n", + " DOB_code: MEDS_BIRTH\n", + " age_code: AGE\n", + " age_unit: years\n", + " time_of_day:\n", + " time_of_day_code: TIME_OF_DAY\n", + " endpoints:\n", + " - 6\n", + " - 12\n", + " - 18\n", + " - 24\n", + " count_code_occurrences:\n", + " aggregations:\n", + " - code/n_occurrences\n", + " - code/n_subjects\n", + " do_summarize_over_all_codes: true\n", + " filter_measurements:\n", + " min_subjects_per_code: 0\n", + " min_occurrences_per_code: null\n", + " fit_outlier_detection:\n", + " aggregations:\n", + " - values/n_occurrences\n", + " - values/sum\n", + " - values/sum_sqd\n", + " occlude_outliers:\n", + " stddev_cutoff: 4.5\n", + " fit_normalization:\n", + " aggregations:\n", + " - code/n_occurrences\n", + " - code/n_subjects\n", + " - name: values/quantiles\n", + " quantiles:\n", + " - 0.1\n", + " - 0.2\n", + " - 0.3\n", + " - 0.4\n", + " - 0.5\n", + " - 0.6\n", + " - 0.7\n", + " - 0.8\n", + " - 0.9\n", + " _script: MEDS_transform-aggregate_code_metadata\n", + " fit_vocabulary_indices:\n", + " is_metadata: true\n", + " ordering_method: lexicographic\n", + " reorder_measurements:\n", + " ordered_code_patterns: ???\n", + " fit_filter_and_occlude:\n", + " _script: MEDS_transform-aggregate_code_metadata\n", + " aggregations:\n", + " - code/n_occurrences\n", + " - code/n_subjects\n", + " - values/sum\n", + " - values/sum_sqd\n", + " - values/n_occurrences\n", + "worker: 0\n", + "polling_time: 300\n", + "stage: fit_filter_and_occlude\n", + "stage_cfg: ${oc.create:${populate_stage:${stage}, ${input_dir}, ${cohort_dir}, ${stages},\n", + " ${stage_configs}}}\n", + "etl_metadata:\n", + " pipeline_name: ???\n", + " dataset_name: ???\n", + " dataset_version: ???\n", + " package_name: ${get_package_name:}\n", + " package_version: ${get_package_version:}\n", + "etl_metadata.pipeline_name: preprocess\n", + "code_modifiers: ???\n", + "\n", + "2024-12-15 03:05:25.801 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "_script: MEDS_transform-aggregate_code_metadata\n", + "aggregations:\n", + "- code/n_occurrences\n", + "- code/n_subjects\n", + "- values/sum\n", + "- values/sum_sqd\n", + "- values/n_occurrences\n", + "is_metadata: true\n", + "data_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", + "metadata_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/fit_filter_and_occlude\n", + "reducer_output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/fit_filter_and_occlude\n", + "train_only: true\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", + " - output_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/fit_filter_and_occlude\n", + " - metadata_input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata\n", + "2024-12-15 03:05:25.814 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator:600 - Mapping computation over a maximum of 1 shards\n", + "2024-12-15 03:05:25.814 | INFO | MEDS_transforms.mapreduce.mapper:map_over:644 - Processing train split only via shard prefix. Not filtering with /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata/subject_splits.parquet.\n", + "2024-12-15 03:05:25.821 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet into /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/fit_filter_and_occlude/train/0.parquet\n", + "2024-12-15 03:05:25.822 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-15 03:05:25.821980. Double checking no earlier locks have been registered.\n", + "2024-12-15 03:05:25.823 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\n", + "2024-12-15 03:05:25.824 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-15 03:05:25.825 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/fit_filter_and_occlude/train/0.parquet\n", + "2024-12-15 03:05:25.884 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.062441\n", + "2024-12-15 03:05:25.884 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/fit_filter_and_occlude/train/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/fit_filter_and_occlude/train/.0.parquet_cache/locks/2024-12-15T03:05:25.821980.json\n", + "2024-12-15 03:05:25.884 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:00.083149\n", + "2024-12-15 03:05:25.885 | INFO | MEDS_transforms.aggregate_code_metadata:run_map_reduce:757 - Starting reduction process\n", + "2024-12-15 03:05:25.886 | INFO | MEDS_transforms.aggregate_code_metadata:run_map_reduce:764 - All map shards complete! Starting code metadata reduction computation.\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_transforms/aggregate_code_metadata.py:734: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", + " if agg not in df.columns:\n", + "2024-12-15 03:05:25.897 | INFO | MEDS_transforms.aggregate_code_metadata:run_map_reduce:777 - Joining to existing code metadata at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata/codes.parquet\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_transforms/aggregate_code_metadata.py:779: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", + " existing = existing.drop(*[c for c in existing.columns if c in set(reduced.columns) - set(join_cols)])\n", + "2024-12-15 03:05:25.925 | INFO | MEDS_transforms.aggregate_code_metadata:run_map_reduce:783 - Finished reduction in 0:00:00.039388\n", + "\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:26.270\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: filter_measurements\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:26.300\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_transform-filter_measurements --config-dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds_torch_example/MIMICIV_INDUCTIVE_EXPERIMENTS/configs --config-name=triplet_config --multirun 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=filter_measurements worker=\"range(0,1)\" hydra/launcher=joblib\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:29.409\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "[2024-12-15 03:05:27,076][HYDRA] Joblib.Parallel(n_jobs=-1,backend=loky,prefer=processes,require=None,verbose=0,timeout=None,pre_dispatch=2*n_jobs,batch_size=auto,temp_folder=None,max_nbytes=None,mmap_mode=r) is launching 1 jobs\n", + "[2024-12-15 03:05:27,076][HYDRA] Launching jobs, sweep output dir : /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_measurements/.logs\n", + "[2024-12-15 03:05:27,076][HYDRA] \t#0 : stage=filter_measurements worker=0\n", + "\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:29.410\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/joblib/externals/loky/backend/fork_exec.py:38: RuntimeWarning: Using fork() can cause Polars to deadlock in the child process.\n", + "In addition, using fork() with Python in general is a recipe for mysterious\n", + "deadlocks and crashes.\n", + "\n", + "The most likely reason you are seeing this error is because you are using the\n", + "multiprocessing module on Linux, which uses fork() by default. This will be\n", + "fixed in Python 3.14. Until then, you want to use the \"spawn\" context instead.\n", + "\n", + "See https://docs.pola.rs/user-guide/misc/multiprocessing/ for details.\n", + "\n", + "If you really know what your doing, you can silence this warning with the warning module\n", + "or by setting POLARS_ALLOW_FORKING_THREAD=1.\n", + "\n", + " pid = os.fork()\n", + "2024-12-15 03:05:28.012 | WARNING | MEDS_transforms.utils:current_script_name:151 - Can't find main function in __main__ module. Using sys.argv[0] as a fallback.\n", + "2024-12-15 03:05:28.013 | WARNING | MEDS_transforms.utils:current_script_name:151 - Can't find main function in __main__ module. Using sys.argv[0] as a fallback.\n", + "2024-12-15 03:05:28.076 | WARNING | MEDS_transforms.utils:current_script_name:151 - Can't find main function in __main__ module. Using sys.argv[0] as a fallback.\n", + "2024-12-15 03:05:28.079 | INFO | MEDS_transforms.utils:stage_init:73 - Running MEDS_transform-filter_measurements with the following configuration:\n", + "input_dir: ${oc.env:MEDS_DIR}\n", + "cohort_dir: ${oc.env:MODEL_DIR}\n", + "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", + " at the top of your specific pipeline\n", + "\n", + " configuration file.'\n", + "log_dir: ${stage_cfg.output_dir}/.logs\n", + "do_overwrite: false\n", + "seed: 1\n", + "stages:\n", + "- fit_filter_and_occlude\n", + "- filter_measurements\n", + "- filter_subjects\n", + "- occlude_outliers\n", + "- fit_normalization\n", + "- fit_vocabulary_indices\n", + "- normalization\n", + "- tokenization\n", + "- tensorization\n", + "stage_configs:\n", + " reshard_to_split:\n", + " n_subjects_per_shard: 50000\n", + " filter_subjects:\n", + " min_events_per_subject: 0\n", + " min_measurements_per_subject: 0\n", + " add_time_derived_measurements:\n", + " age:\n", + " DOB_code: MEDS_BIRTH\n", + " age_code: AGE\n", + " age_unit: years\n", + " time_of_day:\n", + " time_of_day_code: TIME_OF_DAY\n", + " endpoints:\n", + " - 6\n", + " - 12\n", + " - 18\n", + " - 24\n", + " count_code_occurrences:\n", + " aggregations:\n", + " - code/n_occurrences\n", + " - code/n_subjects\n", + " do_summarize_over_all_codes: true\n", + " filter_measurements:\n", + " min_subjects_per_code: 0\n", + " min_occurrences_per_code: null\n", + " fit_outlier_detection:\n", + " aggregations:\n", + " - values/n_occurrences\n", + " - values/sum\n", + " - values/sum_sqd\n", + " occlude_outliers:\n", + " stddev_cutoff: 4.5\n", + " fit_normalization:\n", + " aggregations:\n", + " - code/n_occurrences\n", + " - code/n_subjects\n", + " - name: values/quantiles\n", + " quantiles:\n", + " - 0.1\n", + " - 0.2\n", + " - 0.3\n", + " - 0.4\n", + " - 0.5\n", + " - 0.6\n", + " - 0.7\n", + " - 0.8\n", + " - 0.9\n", + " _script: MEDS_transform-aggregate_code_metadata\n", + " fit_vocabulary_indices:\n", + " is_metadata: true\n", + " ordering_method: lexicographic\n", + " reorder_measurements:\n", + " ordered_code_patterns: ???\n", + " fit_filter_and_occlude:\n", + " _script: MEDS_transform-aggregate_code_metadata\n", + " aggregations:\n", + " - code/n_occurrences\n", + " - code/n_subjects\n", + " - values/sum\n", + " - values/sum_sqd\n", + " - values/n_occurrences\n", + "worker: 0\n", + "polling_time: 300\n", + "stage: filter_measurements\n", + "stage_cfg: ${oc.create:${populate_stage:${stage}, ${input_dir}, ${cohort_dir}, ${stages},\n", + " ${stage_configs}}}\n", + "etl_metadata:\n", + " pipeline_name: ???\n", + " dataset_name: ???\n", + " dataset_version: ???\n", + " package_name: ${get_package_name:}\n", + " package_version: ${get_package_version:}\n", + "etl_metadata.pipeline_name: preprocess\n", + "code_modifiers: ???\n", + "\n", + "2024-12-15 03:05:28.093 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "min_subjects_per_code: 0\n", + "min_occurrences_per_code: null\n", + "is_metadata: false\n", + "data_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", + "metadata_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/fit_filter_and_occlude\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_measurements\n", + "reducer_output_dir: null\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data\n", + " - output_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_measurements\n", + " - metadata_input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/fit_filter_and_occlude\n", + "2024-12-15 03:05:28.108 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator:600 - Mapping computation over a maximum of 3 shards\n", + "2024-12-15 03:05:28.410 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet into /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_measurements/held_out/0.parquet\n", + "2024-12-15 03:05:28.412 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-15 03:05:28.411732. Double checking no earlier locks have been registered.\n", + "2024-12-15 03:05:28.413 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/held_out/0.parquet\n", + "2024-12-15 03:05:28.414 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-15 03:05:28.416 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_measurements/held_out/0.parquet\n", + "2024-12-15 03:05:28.519 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.108043\n", + "2024-12-15 03:05:28.519 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_measurements/held_out/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_measurements/held_out/.0.parquet_cache/locks/2024-12-15T03:05:28.411732.json\n", + "2024-12-15 03:05:28.520 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet into /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_measurements/tuning/0.parquet\n", + "2024-12-15 03:05:28.521 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-15 03:05:28.520956. Double checking no earlier locks have been registered.\n", + "2024-12-15 03:05:28.521 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/tuning/0.parquet\n", + "2024-12-15 03:05:28.521 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-15 03:05:28.522 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_measurements/tuning/0.parquet\n", + "2024-12-15 03:05:28.682 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.161074\n", + "2024-12-15 03:05:28.682 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_measurements/tuning/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_measurements/tuning/.0.parquet_cache/locks/2024-12-15T03:05:28.520956.json\n", + "2024-12-15 03:05:28.682 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet into /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_measurements/train/0.parquet\n", + "2024-12-15 03:05:28.683 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-15 03:05:28.683424. Double checking no earlier locks have been registered.\n", + "2024-12-15 03:05:28.684 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/data/train/0.parquet\n", + "2024-12-15 03:05:28.684 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-15 03:05:28.684 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_measurements/train/0.parquet\n", + "2024-12-15 03:05:28.975 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.292014\n", + "2024-12-15 03:05:28.975 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_measurements/train/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_measurements/train/.0.parquet_cache/locks/2024-12-15T03:05:28.683424.json\n", + "2024-12-15 03:05:28.975 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:00.882129\n", + "\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:29.411\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: filter_subjects\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:29.432\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_transform-filter_subjects --config-dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds_torch_example/MIMICIV_INDUCTIVE_EXPERIMENTS/configs --config-name=triplet_config --multirun 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=filter_subjects worker=\"range(0,1)\" hydra/launcher=joblib\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:32.201\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "[2024-12-15 03:05:30,211][HYDRA] Joblib.Parallel(n_jobs=-1,backend=loky,prefer=processes,require=None,verbose=0,timeout=None,pre_dispatch=2*n_jobs,batch_size=auto,temp_folder=None,max_nbytes=None,mmap_mode=r) is launching 1 jobs\n", + "[2024-12-15 03:05:30,211][HYDRA] Launching jobs, sweep output dir : /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_subjects/.logs\n", + "[2024-12-15 03:05:30,212][HYDRA] \t#0 : stage=filter_subjects worker=0\n", + "\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:32.201\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/joblib/externals/loky/backend/fork_exec.py:38: RuntimeWarning: Using fork() can cause Polars to deadlock in the child process.\n", + "In addition, using fork() with Python in general is a recipe for mysterious\n", + "deadlocks and crashes.\n", + "\n", + "The most likely reason you are seeing this error is because you are using the\n", + "multiprocessing module on Linux, which uses fork() by default. This will be\n", + "fixed in Python 3.14. Until then, you want to use the \"spawn\" context instead.\n", + "\n", + "See https://docs.pola.rs/user-guide/misc/multiprocessing/ for details.\n", + "\n", + "If you really know what your doing, you can silence this warning with the warning module\n", + "or by setting POLARS_ALLOW_FORKING_THREAD=1.\n", + "\n", + " pid = os.fork()\n", + "2024-12-15 03:05:31.206 | WARNING | MEDS_transforms.utils:current_script_name:151 - Can't find main function in __main__ module. Using sys.argv[0] as a fallback.\n", + "2024-12-15 03:05:31.208 | WARNING | MEDS_transforms.utils:current_script_name:151 - Can't find main function in __main__ module. Using sys.argv[0] as a fallback.\n", + "2024-12-15 03:05:31.274 | WARNING | MEDS_transforms.utils:current_script_name:151 - Can't find main function in __main__ module. Using sys.argv[0] as a fallback.\n", + "2024-12-15 03:05:31.277 | INFO | MEDS_transforms.utils:stage_init:73 - Running MEDS_transform-filter_subjects with the following configuration:\n", + "input_dir: ${oc.env:MEDS_DIR}\n", + "cohort_dir: ${oc.env:MODEL_DIR}\n", + "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", + " at the top of your specific pipeline\n", + "\n", + " configuration file.'\n", + "log_dir: ${stage_cfg.output_dir}/.logs\n", + "do_overwrite: false\n", + "seed: 1\n", + "stages:\n", + "- fit_filter_and_occlude\n", + "- filter_measurements\n", + "- filter_subjects\n", + "- occlude_outliers\n", + "- fit_normalization\n", + "- fit_vocabulary_indices\n", + "- normalization\n", + "- tokenization\n", + "- tensorization\n", + "stage_configs:\n", + " reshard_to_split:\n", + " n_subjects_per_shard: 50000\n", + " filter_subjects:\n", + " min_events_per_subject: 0\n", + " min_measurements_per_subject: 0\n", + " add_time_derived_measurements:\n", + " age:\n", + " DOB_code: MEDS_BIRTH\n", + " age_code: AGE\n", + " age_unit: years\n", + " time_of_day:\n", + " time_of_day_code: TIME_OF_DAY\n", + " endpoints:\n", + " - 6\n", + " - 12\n", + " - 18\n", + " - 24\n", + " count_code_occurrences:\n", + " aggregations:\n", + " - code/n_occurrences\n", + " - code/n_subjects\n", + " do_summarize_over_all_codes: true\n", + " filter_measurements:\n", + " min_subjects_per_code: 0\n", + " min_occurrences_per_code: null\n", + " fit_outlier_detection:\n", + " aggregations:\n", + " - values/n_occurrences\n", + " - values/sum\n", + " - values/sum_sqd\n", + " occlude_outliers:\n", + " stddev_cutoff: 4.5\n", + " fit_normalization:\n", + " aggregations:\n", + " - code/n_occurrences\n", + " - code/n_subjects\n", + " - name: values/quantiles\n", + " quantiles:\n", + " - 0.1\n", + " - 0.2\n", + " - 0.3\n", + " - 0.4\n", + " - 0.5\n", + " - 0.6\n", + " - 0.7\n", + " - 0.8\n", + " - 0.9\n", + " _script: MEDS_transform-aggregate_code_metadata\n", + " fit_vocabulary_indices:\n", + " is_metadata: true\n", + " ordering_method: lexicographic\n", + " reorder_measurements:\n", + " ordered_code_patterns: ???\n", + " fit_filter_and_occlude:\n", + " _script: MEDS_transform-aggregate_code_metadata\n", + " aggregations:\n", + " - code/n_occurrences\n", + " - code/n_subjects\n", + " - values/sum\n", + " - values/sum_sqd\n", + " - values/n_occurrences\n", + "worker: 0\n", + "polling_time: 300\n", + "stage: filter_subjects\n", + "stage_cfg: ${oc.create:${populate_stage:${stage}, ${input_dir}, ${cohort_dir}, ${stages},\n", + " ${stage_configs}}}\n", + "etl_metadata:\n", + " pipeline_name: ???\n", + " dataset_name: ???\n", + " dataset_version: ???\n", + " package_name: ${get_package_name:}\n", + " package_version: ${get_package_version:}\n", + "etl_metadata.pipeline_name: preprocess\n", + "code_modifiers: ???\n", + "\n", + "2024-12-15 03:05:31.295 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "min_events_per_subject: 0\n", + "min_measurements_per_subject: 0\n", + "is_metadata: false\n", + "data_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_measurements\n", + "metadata_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/fit_filter_and_occlude\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_subjects\n", + "reducer_output_dir: null\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_measurements\n", + " - output_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_subjects\n", + " - metadata_input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/fit_filter_and_occlude\n", + "2024-12-15 03:05:31.315 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator:600 - Mapping computation over a maximum of 3 shards\n", + "2024-12-15 03:05:31.324 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_measurements/held_out/0.parquet into /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_subjects/held_out/0.parquet\n", + "2024-12-15 03:05:31.325 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-15 03:05:31.325353. Double checking no earlier locks have been registered.\n", + "2024-12-15 03:05:31.327 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_measurements/held_out/0.parquet\n", + "2024-12-15 03:05:31.327 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-15 03:05:31.327 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_subjects/held_out/0.parquet\n", + "2024-12-15 03:05:31.425 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.100405\n", + "2024-12-15 03:05:31.425 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_subjects/held_out/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_subjects/held_out/.0.parquet_cache/locks/2024-12-15T03:05:31.325353.json\n", + "2024-12-15 03:05:31.426 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_measurements/tuning/0.parquet into /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_subjects/tuning/0.parquet\n", + "2024-12-15 03:05:31.427 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-15 03:05:31.427065. Double checking no earlier locks have been registered.\n", + "2024-12-15 03:05:31.427 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_measurements/tuning/0.parquet\n", + "2024-12-15 03:05:31.427 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-15 03:05:31.427 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_subjects/tuning/0.parquet\n", + "2024-12-15 03:05:31.572 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.145061\n", + "2024-12-15 03:05:31.572 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_subjects/tuning/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_subjects/tuning/.0.parquet_cache/locks/2024-12-15T03:05:31.427065.json\n", + "2024-12-15 03:05:31.572 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_measurements/train/0.parquet into /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_subjects/train/0.parquet\n", + "2024-12-15 03:05:31.573 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-15 03:05:31.573405. Double checking no earlier locks have been registered.\n", + "2024-12-15 03:05:31.573 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_measurements/train/0.parquet\n", + "2024-12-15 03:05:31.574 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-15 03:05:31.574 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_subjects/train/0.parquet\n", + "2024-12-15 03:05:31.841 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.268367\n", + "2024-12-15 03:05:31.842 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_subjects/train/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_subjects/train/.0.parquet_cache/locks/2024-12-15T03:05:31.573405.json\n", + "2024-12-15 03:05:31.842 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:00.546968\n", + "\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:32.202\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: occlude_outliers\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:32.222\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_transform-occlude_outliers --config-dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds_torch_example/MIMICIV_INDUCTIVE_EXPERIMENTS/configs --config-name=triplet_config --multirun 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=occlude_outliers worker=\"range(0,1)\" hydra/launcher=joblib\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:35.431\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "[2024-12-15 03:05:33,044][HYDRA] Joblib.Parallel(n_jobs=-1,backend=loky,prefer=processes,require=None,verbose=0,timeout=None,pre_dispatch=2*n_jobs,batch_size=auto,temp_folder=None,max_nbytes=None,mmap_mode=r) is launching 1 jobs\n", + "[2024-12-15 03:05:33,044][HYDRA] Launching jobs, sweep output dir : /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/occlude_outliers/.logs\n", + "[2024-12-15 03:05:33,044][HYDRA] \t#0 : stage=occlude_outliers worker=0\n", + "\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:35.431\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/joblib/externals/loky/backend/fork_exec.py:38: RuntimeWarning: Using fork() can cause Polars to deadlock in the child process.\n", + "In addition, using fork() with Python in general is a recipe for mysterious\n", + "deadlocks and crashes.\n", + "\n", + "The most likely reason you are seeing this error is because you are using the\n", + "multiprocessing module on Linux, which uses fork() by default. This will be\n", + "fixed in Python 3.14. Until then, you want to use the \"spawn\" context instead.\n", + "\n", + "See https://docs.pola.rs/user-guide/misc/multiprocessing/ for details.\n", + "\n", + "If you really know what your doing, you can silence this warning with the warning module\n", + "or by setting POLARS_ALLOW_FORKING_THREAD=1.\n", + "\n", + " pid = os.fork()\n", + "2024-12-15 03:05:33.911 | WARNING | MEDS_transforms.utils:current_script_name:151 - Can't find main function in __main__ module. Using sys.argv[0] as a fallback.\n", + "2024-12-15 03:05:33.912 | WARNING | MEDS_transforms.utils:current_script_name:151 - Can't find main function in __main__ module. Using sys.argv[0] as a fallback.\n", + "2024-12-15 03:05:33.978 | WARNING | MEDS_transforms.utils:current_script_name:151 - Can't find main function in __main__ module. Using sys.argv[0] as a fallback.\n", + "2024-12-15 03:05:33.981 | INFO | MEDS_transforms.utils:stage_init:73 - Running MEDS_transform-occlude_outliers with the following configuration:\n", + "input_dir: ${oc.env:MEDS_DIR}\n", + "cohort_dir: ${oc.env:MODEL_DIR}\n", + "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", + " at the top of your specific pipeline\n", + "\n", + " configuration file.'\n", + "log_dir: ${stage_cfg.output_dir}/.logs\n", + "do_overwrite: false\n", + "seed: 1\n", + "stages:\n", + "- fit_filter_and_occlude\n", + "- filter_measurements\n", + "- filter_subjects\n", + "- occlude_outliers\n", + "- fit_normalization\n", + "- fit_vocabulary_indices\n", + "- normalization\n", + "- tokenization\n", + "- tensorization\n", + "stage_configs:\n", + " reshard_to_split:\n", + " n_subjects_per_shard: 50000\n", + " filter_subjects:\n", + " min_events_per_subject: 0\n", + " min_measurements_per_subject: 0\n", + " add_time_derived_measurements:\n", + " age:\n", + " DOB_code: MEDS_BIRTH\n", + " age_code: AGE\n", + " age_unit: years\n", + " time_of_day:\n", + " time_of_day_code: TIME_OF_DAY\n", + " endpoints:\n", + " - 6\n", + " - 12\n", + " - 18\n", + " - 24\n", + " count_code_occurrences:\n", + " aggregations:\n", + " - code/n_occurrences\n", + " - code/n_subjects\n", + " do_summarize_over_all_codes: true\n", + " filter_measurements:\n", + " min_subjects_per_code: 0\n", + " min_occurrences_per_code: null\n", + " fit_outlier_detection:\n", + " aggregations:\n", + " - values/n_occurrences\n", + " - values/sum\n", + " - values/sum_sqd\n", + " occlude_outliers:\n", + " stddev_cutoff: 4.5\n", + " fit_normalization:\n", + " aggregations:\n", + " - code/n_occurrences\n", + " - code/n_subjects\n", + " - name: values/quantiles\n", + " quantiles:\n", + " - 0.1\n", + " - 0.2\n", + " - 0.3\n", + " - 0.4\n", + " - 0.5\n", + " - 0.6\n", + " - 0.7\n", + " - 0.8\n", + " - 0.9\n", + " _script: MEDS_transform-aggregate_code_metadata\n", + " fit_vocabulary_indices:\n", + " is_metadata: true\n", + " ordering_method: lexicographic\n", + " reorder_measurements:\n", + " ordered_code_patterns: ???\n", + " fit_filter_and_occlude:\n", + " _script: MEDS_transform-aggregate_code_metadata\n", + " aggregations:\n", + " - code/n_occurrences\n", + " - code/n_subjects\n", + " - values/sum\n", + " - values/sum_sqd\n", + " - values/n_occurrences\n", + "worker: 0\n", + "polling_time: 300\n", + "stage: occlude_outliers\n", + "stage_cfg: ${oc.create:${populate_stage:${stage}, ${input_dir}, ${cohort_dir}, ${stages},\n", + " ${stage_configs}}}\n", + "etl_metadata:\n", + " pipeline_name: ???\n", + " dataset_name: ???\n", + " dataset_version: ???\n", + " package_name: ${get_package_name:}\n", + " package_version: ${get_package_version:}\n", + "etl_metadata.pipeline_name: preprocess\n", + "code_modifiers: ???\n", + "\n", + "2024-12-15 03:05:34.002 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "stddev_cutoff: 4.5\n", + "is_metadata: false\n", + "data_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_subjects\n", + "metadata_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/fit_filter_and_occlude\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/occlude_outliers\n", + "reducer_output_dir: null\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_subjects\n", + " - output_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/occlude_outliers\n", + " - metadata_input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/fit_filter_and_occlude\n", + "2024-12-15 03:05:34.021 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator:600 - Mapping computation over a maximum of 3 shards\n", + "2024-12-15 03:05:34.308 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_subjects/held_out/0.parquet into /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/occlude_outliers/held_out/0.parquet\n", + "2024-12-15 03:05:34.309 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-15 03:05:34.309599. Double checking no earlier locks have been registered.\n", + "2024-12-15 03:05:34.310 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_subjects/held_out/0.parquet\n", + "2024-12-15 03:05:34.311 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-15 03:05:34.311 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/occlude_outliers/held_out/0.parquet\n", + "2024-12-15 03:05:34.438 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.128524\n", + "2024-12-15 03:05:34.438 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/occlude_outliers/held_out/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/occlude_outliers/held_out/.0.parquet_cache/locks/2024-12-15T03:05:34.309599.json\n", + "2024-12-15 03:05:34.438 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_subjects/tuning/0.parquet into /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/occlude_outliers/tuning/0.parquet\n", + "2024-12-15 03:05:34.439 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-15 03:05:34.439401. Double checking no earlier locks have been registered.\n", + "2024-12-15 03:05:34.439 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_subjects/tuning/0.parquet\n", + "2024-12-15 03:05:34.440 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-15 03:05:34.440 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/occlude_outliers/tuning/0.parquet\n", + "2024-12-15 03:05:34.608 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.168763\n", + "2024-12-15 03:05:34.608 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/occlude_outliers/tuning/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/occlude_outliers/tuning/.0.parquet_cache/locks/2024-12-15T03:05:34.439401.json\n", + "2024-12-15 03:05:34.608 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_subjects/train/0.parquet into /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/occlude_outliers/train/0.parquet\n", + "2024-12-15 03:05:34.609 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-15 03:05:34.609560. Double checking no earlier locks have been registered.\n", + "2024-12-15 03:05:34.610 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/filter_subjects/train/0.parquet\n", + "2024-12-15 03:05:34.610 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-15 03:05:34.610 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/occlude_outliers/train/0.parquet\n", + "2024-12-15 03:05:34.919 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.309783\n", + "2024-12-15 03:05:34.919 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/occlude_outliers/train/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/occlude_outliers/train/.0.parquet_cache/locks/2024-12-15T03:05:34.609560.json\n", + "2024-12-15 03:05:34.919 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:00.917667\n", + "\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:35.432\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: fit_normalization\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:35.449\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_transform-aggregate_code_metadata --config-dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds_torch_example/MIMICIV_INDUCTIVE_EXPERIMENTS/configs --config-name=triplet_config --multirun 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=fit_normalization worker=\"range(0,1)\" hydra/launcher=joblib\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:37.822\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "[2024-12-15 03:05:36,236][HYDRA] Joblib.Parallel(n_jobs=-1,backend=loky,prefer=processes,require=None,verbose=0,timeout=None,pre_dispatch=2*n_jobs,batch_size=auto,temp_folder=None,max_nbytes=None,mmap_mode=r) is launching 1 jobs\n", + "[2024-12-15 03:05:36,236][HYDRA] Launching jobs, sweep output dir : /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/fit_normalization/.logs\n", + "[2024-12-15 03:05:36,236][HYDRA] \t#0 : stage=fit_normalization worker=0\n", + "\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:37.823\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/joblib/externals/loky/backend/fork_exec.py:38: RuntimeWarning: Using fork() can cause Polars to deadlock in the child process.\n", + "In addition, using fork() with Python in general is a recipe for mysterious\n", + "deadlocks and crashes.\n", + "\n", + "The most likely reason you are seeing this error is because you are using the\n", + "multiprocessing module on Linux, which uses fork() by default. This will be\n", + "fixed in Python 3.14. Until then, you want to use the \"spawn\" context instead.\n", + "\n", + "See https://docs.pola.rs/user-guide/misc/multiprocessing/ for details.\n", + "\n", + "If you really know what your doing, you can silence this warning with the warning module\n", + "or by setting POLARS_ALLOW_FORKING_THREAD=1.\n", + "\n", + " pid = os.fork()\n", + "2024-12-15 03:05:37.155 | WARNING | MEDS_transforms.utils:current_script_name:151 - Can't find main function in __main__ module. Using sys.argv[0] as a fallback.\n", + "2024-12-15 03:05:37.156 | WARNING | MEDS_transforms.utils:current_script_name:151 - Can't find main function in __main__ module. Using sys.argv[0] as a fallback.\n", + "2024-12-15 03:05:37.220 | WARNING | MEDS_transforms.utils:current_script_name:151 - Can't find main function in __main__ module. Using sys.argv[0] as a fallback.\n", + "2024-12-15 03:05:37.224 | INFO | MEDS_transforms.utils:stage_init:73 - Running MEDS_transform-aggregate_code_metadata with the following configuration:\n", + "input_dir: ${oc.env:MEDS_DIR}\n", + "cohort_dir: ${oc.env:MODEL_DIR}\n", + "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", + " at the top of your specific pipeline\n", + "\n", + " configuration file.'\n", + "log_dir: ${stage_cfg.output_dir}/.logs\n", + "do_overwrite: false\n", + "seed: 1\n", + "stages:\n", + "- fit_filter_and_occlude\n", + "- filter_measurements\n", + "- filter_subjects\n", + "- occlude_outliers\n", + "- fit_normalization\n", + "- fit_vocabulary_indices\n", + "- normalization\n", + "- tokenization\n", + "- tensorization\n", + "stage_configs:\n", + " reshard_to_split:\n", + " n_subjects_per_shard: 50000\n", + " filter_subjects:\n", + " min_events_per_subject: 0\n", + " min_measurements_per_subject: 0\n", + " add_time_derived_measurements:\n", + " age:\n", + " DOB_code: MEDS_BIRTH\n", + " age_code: AGE\n", + " age_unit: years\n", + " time_of_day:\n", + " time_of_day_code: TIME_OF_DAY\n", + " endpoints:\n", + " - 6\n", + " - 12\n", + " - 18\n", + " - 24\n", + " count_code_occurrences:\n", + " aggregations:\n", + " - code/n_occurrences\n", + " - code/n_subjects\n", + " do_summarize_over_all_codes: true\n", + " filter_measurements:\n", + " min_subjects_per_code: 0\n", + " min_occurrences_per_code: null\n", + " fit_outlier_detection:\n", + " aggregations:\n", + " - values/n_occurrences\n", + " - values/sum\n", + " - values/sum_sqd\n", + " occlude_outliers:\n", + " stddev_cutoff: 4.5\n", + " fit_normalization:\n", + " aggregations:\n", + " - code/n_occurrences\n", + " - code/n_subjects\n", + " - name: values/quantiles\n", + " quantiles:\n", + " - 0.1\n", + " - 0.2\n", + " - 0.3\n", + " - 0.4\n", + " - 0.5\n", + " - 0.6\n", + " - 0.7\n", + " - 0.8\n", + " - 0.9\n", + " _script: MEDS_transform-aggregate_code_metadata\n", + " fit_vocabulary_indices:\n", + " is_metadata: true\n", + " ordering_method: lexicographic\n", + " reorder_measurements:\n", + " ordered_code_patterns: ???\n", + " fit_filter_and_occlude:\n", + " _script: MEDS_transform-aggregate_code_metadata\n", + " aggregations:\n", + " - code/n_occurrences\n", + " - code/n_subjects\n", + " - values/sum\n", + " - values/sum_sqd\n", + " - values/n_occurrences\n", + "worker: 0\n", + "polling_time: 300\n", + "stage: fit_normalization\n", + "stage_cfg: ${oc.create:${populate_stage:${stage}, ${input_dir}, ${cohort_dir}, ${stages},\n", + " ${stage_configs}}}\n", + "etl_metadata:\n", + " pipeline_name: ???\n", + " dataset_name: ???\n", + " dataset_version: ???\n", + " package_name: ${get_package_name:}\n", + " package_version: ${get_package_version:}\n", + "etl_metadata.pipeline_name: preprocess\n", + "code_modifiers: ???\n", + "\n", + "2024-12-15 03:05:37.247 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "aggregations:\n", + "- code/n_occurrences\n", + "- code/n_subjects\n", + "- name: values/quantiles\n", + " quantiles:\n", + " - 0.1\n", + " - 0.2\n", + " - 0.3\n", + " - 0.4\n", + " - 0.5\n", + " - 0.6\n", + " - 0.7\n", + " - 0.8\n", + " - 0.9\n", + "_script: MEDS_transform-aggregate_code_metadata\n", + "is_metadata: true\n", + "data_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/occlude_outliers\n", + "metadata_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/fit_filter_and_occlude\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/fit_normalization\n", + "reducer_output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/fit_normalization\n", + "train_only: true\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/occlude_outliers\n", + " - output_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/fit_normalization\n", + " - metadata_input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/fit_filter_and_occlude\n", + "2024-12-15 03:05:37.271 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator:600 - Mapping computation over a maximum of 1 shards\n", + "2024-12-15 03:05:37.271 | INFO | MEDS_transforms.mapreduce.mapper:map_over:644 - Processing train split only via shard prefix. Not filtering with /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/metadata/subject_splits.parquet.\n", + "2024-12-15 03:05:37.283 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/occlude_outliers/train/0.parquet into /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/fit_normalization/train/0.parquet\n", + "2024-12-15 03:05:37.284 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-15 03:05:37.283928. Double checking no earlier locks have been registered.\n", + "2024-12-15 03:05:37.285 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/occlude_outliers/train/0.parquet\n", + "2024-12-15 03:05:37.286 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-15 03:05:37.287 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/fit_normalization/train/0.parquet\n", + "2024-12-15 03:05:37.364 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.080875\n", + "2024-12-15 03:05:37.365 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/fit_normalization/train/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/fit_normalization/train/.0.parquet_cache/locks/2024-12-15T03:05:37.283928.json\n", + "2024-12-15 03:05:37.365 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:00.118095\n", + "2024-12-15 03:05:37.365 | INFO | MEDS_transforms.aggregate_code_metadata:run_map_reduce:757 - Starting reduction process\n", + "2024-12-15 03:05:37.366 | INFO | MEDS_transforms.aggregate_code_metadata:run_map_reduce:764 - All map shards complete! Starting code metadata reduction computation.\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_transforms/aggregate_code_metadata.py:734: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", + " if agg not in df.columns:\n", + "2024-12-15 03:05:37.395 | INFO | MEDS_transforms.aggregate_code_metadata:run_map_reduce:777 - Joining to existing code metadata at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/fit_filter_and_occlude/codes.parquet\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/MEDS_transforms/aggregate_code_metadata.py:779: PerformanceWarning: Determining the column names of a LazyFrame requires resolving its schema, which is a potentially expensive operation. Use `LazyFrame.collect_schema().names()` to get the column names without this warning.\n", + " existing = existing.drop(*[c for c in existing.columns if c in set(reduced.columns) - set(join_cols)])\n", + "2024-12-15 03:05:37.506 | INFO | MEDS_transforms.aggregate_code_metadata:run_map_reduce:783 - Finished reduction in 0:00:00.140397\n", + "\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:37.824\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: fit_vocabulary_indices\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:37.844\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_transform-fit_vocabulary_indices --config-dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds_torch_example/MIMICIV_INDUCTIVE_EXPERIMENTS/configs --config-name=triplet_config --multirun 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=fit_vocabulary_indices worker=\"range(0,1)\" hydra/launcher=joblib\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:40.261\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "[2024-12-15 03:05:38,580][HYDRA] Joblib.Parallel(n_jobs=-1,backend=loky,prefer=processes,require=None,verbose=0,timeout=None,pre_dispatch=2*n_jobs,batch_size=auto,temp_folder=None,max_nbytes=None,mmap_mode=r) is launching 1 jobs\n", + "[2024-12-15 03:05:38,581][HYDRA] Launching jobs, sweep output dir : /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/fit_vocabulary_indices/.logs\n", + "[2024-12-15 03:05:38,581][HYDRA] \t#0 : stage=fit_vocabulary_indices worker=0\n", + "\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:40.262\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/joblib/externals/loky/backend/fork_exec.py:38: RuntimeWarning: Using fork() can cause Polars to deadlock in the child process.\n", + "In addition, using fork() with Python in general is a recipe for mysterious\n", + "deadlocks and crashes.\n", + "\n", + "The most likely reason you are seeing this error is because you are using the\n", + "multiprocessing module on Linux, which uses fork() by default. This will be\n", + "fixed in Python 3.14. Until then, you want to use the \"spawn\" context instead.\n", + "\n", + "See https://docs.pola.rs/user-guide/misc/multiprocessing/ for details.\n", + "\n", + "If you really know what your doing, you can silence this warning with the warning module\n", + "or by setting POLARS_ALLOW_FORKING_THREAD=1.\n", + "\n", + " pid = os.fork()\n", + "2024-12-15 03:05:39.458 | WARNING | MEDS_transforms.utils:current_script_name:151 - Can't find main function in __main__ module. Using sys.argv[0] as a fallback.\n", + "2024-12-15 03:05:39.459 | WARNING | MEDS_transforms.utils:current_script_name:151 - Can't find main function in __main__ module. Using sys.argv[0] as a fallback.\n", + "2024-12-15 03:05:39.531 | INFO | MEDS_transforms.fit_vocabulary_indices:main:202 - Running with config:\n", + "input_dir: ${oc.env:MEDS_DIR}\n", + "cohort_dir: ${oc.env:MODEL_DIR}\n", + "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", + " at the top of your specific pipeline\n", + "\n", + " configuration file.'\n", + "log_dir: ${stage_cfg.output_dir}/.logs\n", + "do_overwrite: false\n", + "seed: 1\n", + "stages:\n", + "- fit_filter_and_occlude\n", + "- filter_measurements\n", + "- filter_subjects\n", + "- occlude_outliers\n", + "- fit_normalization\n", + "- fit_vocabulary_indices\n", + "- normalization\n", + "- tokenization\n", + "- tensorization\n", + "stage_configs:\n", + " reshard_to_split:\n", + " n_subjects_per_shard: 50000\n", + " filter_subjects:\n", + " min_events_per_subject: 0\n", + " min_measurements_per_subject: 0\n", + " add_time_derived_measurements:\n", + " age:\n", + " DOB_code: MEDS_BIRTH\n", + " age_code: AGE\n", + " age_unit: years\n", + " time_of_day:\n", + " time_of_day_code: TIME_OF_DAY\n", + " endpoints:\n", + " - 6\n", + " - 12\n", + " - 18\n", + " - 24\n", + " count_code_occurrences:\n", + " aggregations:\n", + " - code/n_occurrences\n", + " - code/n_subjects\n", + " do_summarize_over_all_codes: true\n", + " filter_measurements:\n", + " min_subjects_per_code: 0\n", + " min_occurrences_per_code: null\n", + " fit_outlier_detection:\n", + " aggregations:\n", + " - values/n_occurrences\n", + " - values/sum\n", + " - values/sum_sqd\n", + " occlude_outliers:\n", + " stddev_cutoff: 4.5\n", + " fit_normalization:\n", + " aggregations:\n", + " - code/n_occurrences\n", + " - code/n_subjects\n", + " - name: values/quantiles\n", + " quantiles:\n", + " - 0.1\n", + " - 0.2\n", + " - 0.3\n", + " - 0.4\n", + " - 0.5\n", + " - 0.6\n", + " - 0.7\n", + " - 0.8\n", + " - 0.9\n", + " _script: MEDS_transform-aggregate_code_metadata\n", + " fit_vocabulary_indices:\n", + " is_metadata: true\n", + " ordering_method: lexicographic\n", + " reorder_measurements:\n", + " ordered_code_patterns: ???\n", + " fit_filter_and_occlude:\n", + " _script: MEDS_transform-aggregate_code_metadata\n", + " aggregations:\n", + " - code/n_occurrences\n", + " - code/n_subjects\n", + " - values/sum\n", + " - values/sum_sqd\n", + " - values/n_occurrences\n", + "worker: 0\n", + "polling_time: 300\n", + "stage: fit_vocabulary_indices\n", + "stage_cfg: ${oc.create:${populate_stage:${stage}, ${input_dir}, ${cohort_dir}, ${stages},\n", + " ${stage_configs}}}\n", + "etl_metadata:\n", + " pipeline_name: ???\n", + " dataset_name: ???\n", + " dataset_version: ???\n", + " package_name: ${get_package_name:}\n", + " package_version: ${get_package_version:}\n", + "etl_metadata.pipeline_name: preprocess\n", + "code_modifiers: ???\n", + "\n", + "Stage: fit_vocabulary_indices\n", + "\n", + "Stage config:\n", + "is_metadata: true\n", + "ordering_method: lexicographic\n", + "data_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/occlude_outliers\n", + "metadata_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/fit_normalization\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/fit_vocabulary_indices\n", + "reducer_output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/metadata\n", + "train_only: true\n", + "\n", + "2024-12-15 03:05:39.847 | INFO | MEDS_transforms.fit_vocabulary_indices:main:221 - Assigning code vocabulary indices via a lexicographic order.\n", + "2024-12-15 03:05:39.861 | INFO | MEDS_transforms.fit_vocabulary_indices:main:232 - Indices assigned. Writing to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/metadata/codes.parquet\n", + "2024-12-15 03:05:39.883 | INFO | MEDS_transforms.fit_vocabulary_indices:main:236 - Done with fit_vocabulary_indices\n", + "\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:40.263\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: normalization\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:40.284\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_transform-normalization --config-dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds_torch_example/MIMICIV_INDUCTIVE_EXPERIMENTS/configs --config-name=triplet_config --multirun 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=normalization worker=\"range(0,1)\" hydra/launcher=joblib\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:42.946\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "[2024-12-15 03:05:41,058][HYDRA] Joblib.Parallel(n_jobs=-1,backend=loky,prefer=processes,require=None,verbose=0,timeout=None,pre_dispatch=2*n_jobs,batch_size=auto,temp_folder=None,max_nbytes=None,mmap_mode=r) is launching 1 jobs\n", + "[2024-12-15 03:05:41,058][HYDRA] Launching jobs, sweep output dir : /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/normalization/.logs\n", + "[2024-12-15 03:05:41,058][HYDRA] \t#0 : stage=normalization worker=0\n", + "\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:42.947\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/joblib/externals/loky/backend/fork_exec.py:38: RuntimeWarning: Using fork() can cause Polars to deadlock in the child process.\n", + "In addition, using fork() with Python in general is a recipe for mysterious\n", + "deadlocks and crashes.\n", + "\n", + "The most likely reason you are seeing this error is because you are using the\n", + "multiprocessing module on Linux, which uses fork() by default. This will be\n", + "fixed in Python 3.14. Until then, you want to use the \"spawn\" context instead.\n", + "\n", + "See https://docs.pola.rs/user-guide/misc/multiprocessing/ for details.\n", + "\n", + "If you really know what your doing, you can silence this warning with the warning module\n", + "or by setting POLARS_ALLOW_FORKING_THREAD=1.\n", + "\n", + " pid = os.fork()\n", + "2024-12-15 03:05:41.964 | WARNING | MEDS_transforms.utils:current_script_name:151 - Can't find main function in __main__ module. Using sys.argv[0] as a fallback.\n", + "2024-12-15 03:05:41.966 | WARNING | MEDS_transforms.utils:current_script_name:151 - Can't find main function in __main__ module. Using sys.argv[0] as a fallback.\n", + "2024-12-15 03:05:42.029 | WARNING | MEDS_transforms.utils:current_script_name:151 - Can't find main function in __main__ module. Using sys.argv[0] as a fallback.\n", + "2024-12-15 03:05:42.032 | INFO | MEDS_transforms.utils:stage_init:73 - Running MEDS_transform-normalization with the following configuration:\n", + "input_dir: ${oc.env:MEDS_DIR}\n", + "cohort_dir: ${oc.env:MODEL_DIR}\n", + "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", + " at the top of your specific pipeline\n", + "\n", + " configuration file.'\n", + "log_dir: ${stage_cfg.output_dir}/.logs\n", + "do_overwrite: false\n", + "seed: 1\n", + "stages:\n", + "- fit_filter_and_occlude\n", + "- filter_measurements\n", + "- filter_subjects\n", + "- occlude_outliers\n", + "- fit_normalization\n", + "- fit_vocabulary_indices\n", + "- normalization\n", + "- tokenization\n", + "- tensorization\n", + "stage_configs:\n", + " reshard_to_split:\n", + " n_subjects_per_shard: 50000\n", + " filter_subjects:\n", + " min_events_per_subject: 0\n", + " min_measurements_per_subject: 0\n", + " add_time_derived_measurements:\n", + " age:\n", + " DOB_code: MEDS_BIRTH\n", + " age_code: AGE\n", + " age_unit: years\n", + " time_of_day:\n", + " time_of_day_code: TIME_OF_DAY\n", + " endpoints:\n", + " - 6\n", + " - 12\n", + " - 18\n", + " - 24\n", + " count_code_occurrences:\n", + " aggregations:\n", + " - code/n_occurrences\n", + " - code/n_subjects\n", + " do_summarize_over_all_codes: true\n", + " filter_measurements:\n", + " min_subjects_per_code: 0\n", + " min_occurrences_per_code: null\n", + " fit_outlier_detection:\n", + " aggregations:\n", + " - values/n_occurrences\n", + " - values/sum\n", + " - values/sum_sqd\n", + " occlude_outliers:\n", + " stddev_cutoff: 4.5\n", + " fit_normalization:\n", + " aggregations:\n", + " - code/n_occurrences\n", + " - code/n_subjects\n", + " - name: values/quantiles\n", + " quantiles:\n", + " - 0.1\n", + " - 0.2\n", + " - 0.3\n", + " - 0.4\n", + " - 0.5\n", + " - 0.6\n", + " - 0.7\n", + " - 0.8\n", + " - 0.9\n", + " _script: MEDS_transform-aggregate_code_metadata\n", + " fit_vocabulary_indices:\n", + " is_metadata: true\n", + " ordering_method: lexicographic\n", + " reorder_measurements:\n", + " ordered_code_patterns: ???\n", + " fit_filter_and_occlude:\n", + " _script: MEDS_transform-aggregate_code_metadata\n", + " aggregations:\n", + " - code/n_occurrences\n", + " - code/n_subjects\n", + " - values/sum\n", + " - values/sum_sqd\n", + " - values/n_occurrences\n", + "worker: 0\n", + "polling_time: 300\n", + "stage: normalization\n", + "stage_cfg: ${oc.create:${populate_stage:${stage}, ${input_dir}, ${cohort_dir}, ${stages},\n", + " ${stage_configs}}}\n", + "etl_metadata:\n", + " pipeline_name: ???\n", + " dataset_name: ???\n", + " dataset_version: ???\n", + " package_name: ${get_package_name:}\n", + " package_version: ${get_package_version:}\n", + "etl_metadata.pipeline_name: preprocess\n", + "code_modifiers: ???\n", + "\n", + "2024-12-15 03:05:42.056 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "is_metadata: false\n", + "data_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/occlude_outliers\n", + "metadata_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/metadata\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/normalization\n", + "reducer_output_dir: null\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/occlude_outliers\n", + " - output_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/normalization\n", + " - metadata_input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/metadata\n", + "2024-12-15 03:05:42.080 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator:600 - Mapping computation over a maximum of 3 shards\n", + "2024-12-15 03:05:42.343 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/occlude_outliers/held_out/0.parquet into /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/normalization/held_out/0.parquet\n", + "2024-12-15 03:05:42.344 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-15 03:05:42.344771. Double checking no earlier locks have been registered.\n", + "2024-12-15 03:05:42.345 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/occlude_outliers/held_out/0.parquet\n", + "2024-12-15 03:05:42.346 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-15 03:05:42.348 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/normalization/held_out/0.parquet\n", + "2024-12-15 03:05:42.407 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.062418\n", + "2024-12-15 03:05:42.407 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/normalization/held_out/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/normalization/held_out/.0.parquet_cache/locks/2024-12-15T03:05:42.344771.json\n", + "2024-12-15 03:05:42.407 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/occlude_outliers/tuning/0.parquet into /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/normalization/tuning/0.parquet\n", + "2024-12-15 03:05:42.408 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-15 03:05:42.408412. Double checking no earlier locks have been registered.\n", + "2024-12-15 03:05:42.408 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/occlude_outliers/tuning/0.parquet\n", + "2024-12-15 03:05:42.409 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-15 03:05:42.409 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/normalization/tuning/0.parquet\n", + "2024-12-15 03:05:42.476 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.067944\n", + "2024-12-15 03:05:42.476 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/normalization/tuning/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/normalization/tuning/.0.parquet_cache/locks/2024-12-15T03:05:42.408412.json\n", + "2024-12-15 03:05:42.477 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/occlude_outliers/train/0.parquet into /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/normalization/train/0.parquet\n", + "2024-12-15 03:05:42.477 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-15 03:05:42.477667. Double checking no earlier locks have been registered.\n", + "2024-12-15 03:05:42.478 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/occlude_outliers/train/0.parquet\n", + "2024-12-15 03:05:42.478 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-15 03:05:42.479 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/normalization/train/0.parquet\n", + "2024-12-15 03:05:42.585 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.107916\n", + "2024-12-15 03:05:42.585 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/normalization/train/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/normalization/train/.0.parquet_cache/locks/2024-12-15T03:05:42.477667.json\n", + "2024-12-15 03:05:42.586 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:00.529857\n", + "\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:42.948\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: tokenization\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:42.967\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_transform-tokenization --config-dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds_torch_example/MIMICIV_INDUCTIVE_EXPERIMENTS/configs --config-name=triplet_config --multirun 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=tokenization worker=\"range(0,1)\" hydra/launcher=joblib\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:45.730\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "[2024-12-15 03:05:43,775][HYDRA] Joblib.Parallel(n_jobs=-1,backend=loky,prefer=processes,require=None,verbose=0,timeout=None,pre_dispatch=2*n_jobs,batch_size=auto,temp_folder=None,max_nbytes=None,mmap_mode=r) is launching 1 jobs\n", + "[2024-12-15 03:05:43,775][HYDRA] Launching jobs, sweep output dir : /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization/.logs\n", + "[2024-12-15 03:05:43,775][HYDRA] \t#0 : stage=tokenization worker=0\n", + "\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:45.731\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/joblib/externals/loky/backend/fork_exec.py:38: RuntimeWarning: Using fork() can cause Polars to deadlock in the child process.\n", + "In addition, using fork() with Python in general is a recipe for mysterious\n", + "deadlocks and crashes.\n", + "\n", + "The most likely reason you are seeing this error is because you are using the\n", + "multiprocessing module on Linux, which uses fork() by default. This will be\n", + "fixed in Python 3.14. Until then, you want to use the \"spawn\" context instead.\n", + "\n", + "See https://docs.pola.rs/user-guide/misc/multiprocessing/ for details.\n", + "\n", + "If you really know what your doing, you can silence this warning with the warning module\n", + "or by setting POLARS_ALLOW_FORKING_THREAD=1.\n", + "\n", + " pid = os.fork()\n", + "2024-12-15 03:05:44.865 | WARNING | MEDS_transforms.utils:current_script_name:151 - Can't find main function in __main__ module. Using sys.argv[0] as a fallback.\n", + "2024-12-15 03:05:44.867 | WARNING | MEDS_transforms.utils:current_script_name:151 - Can't find main function in __main__ module. Using sys.argv[0] as a fallback.\n", + "2024-12-15 03:05:44.941 | INFO | MEDS_transforms.transforms.tokenization:main:257 - Running with config:\n", + "input_dir: ${oc.env:MEDS_DIR}\n", + "cohort_dir: ${oc.env:MODEL_DIR}\n", + "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", + " at the top of your specific pipeline\n", + "\n", + " configuration file.'\n", + "log_dir: ${stage_cfg.output_dir}/.logs\n", + "do_overwrite: false\n", + "seed: 1\n", + "stages:\n", + "- fit_filter_and_occlude\n", + "- filter_measurements\n", + "- filter_subjects\n", + "- occlude_outliers\n", + "- fit_normalization\n", + "- fit_vocabulary_indices\n", + "- normalization\n", + "- tokenization\n", + "- tensorization\n", + "stage_configs:\n", + " reshard_to_split:\n", + " n_subjects_per_shard: 50000\n", + " filter_subjects:\n", + " min_events_per_subject: 0\n", + " min_measurements_per_subject: 0\n", + " add_time_derived_measurements:\n", + " age:\n", + " DOB_code: MEDS_BIRTH\n", + " age_code: AGE\n", + " age_unit: years\n", + " time_of_day:\n", + " time_of_day_code: TIME_OF_DAY\n", + " endpoints:\n", + " - 6\n", + " - 12\n", + " - 18\n", + " - 24\n", + " count_code_occurrences:\n", + " aggregations:\n", + " - code/n_occurrences\n", + " - code/n_subjects\n", + " do_summarize_over_all_codes: true\n", + " filter_measurements:\n", + " min_subjects_per_code: 0\n", + " min_occurrences_per_code: null\n", + " fit_outlier_detection:\n", + " aggregations:\n", + " - values/n_occurrences\n", + " - values/sum\n", + " - values/sum_sqd\n", + " occlude_outliers:\n", + " stddev_cutoff: 4.5\n", + " fit_normalization:\n", + " aggregations:\n", + " - code/n_occurrences\n", + " - code/n_subjects\n", + " - name: values/quantiles\n", + " quantiles:\n", + " - 0.1\n", + " - 0.2\n", + " - 0.3\n", + " - 0.4\n", + " - 0.5\n", + " - 0.6\n", + " - 0.7\n", + " - 0.8\n", + " - 0.9\n", + " _script: MEDS_transform-aggregate_code_metadata\n", + " fit_vocabulary_indices:\n", + " is_metadata: true\n", + " ordering_method: lexicographic\n", + " reorder_measurements:\n", + " ordered_code_patterns: ???\n", + " fit_filter_and_occlude:\n", + " _script: MEDS_transform-aggregate_code_metadata\n", + " aggregations:\n", + " - code/n_occurrences\n", + " - code/n_subjects\n", + " - values/sum\n", + " - values/sum_sqd\n", + " - values/n_occurrences\n", + "worker: 0\n", + "polling_time: 300\n", + "stage: tokenization\n", + "stage_cfg: ${oc.create:${populate_stage:${stage}, ${input_dir}, ${cohort_dir}, ${stages},\n", + " ${stage_configs}}}\n", + "etl_metadata:\n", + " pipeline_name: ???\n", + " dataset_name: ???\n", + " dataset_version: ???\n", + " package_name: ${get_package_name:}\n", + " package_version: ${get_package_version:}\n", + "etl_metadata.pipeline_name: preprocess\n", + "code_modifiers: ???\n", + "\n", + "Stage: tokenization\n", + "\n", + "Stage config:\n", + "is_metadata: false\n", + "data_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/normalization\n", + "metadata_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/metadata\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization\n", + "reducer_output_dir: null\n", + "\n", + "2024-12-15 03:05:44.973 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator:600 - Mapping computation over a maximum of 3 shards\n", + "2024-12-15 03:05:44.974 | INFO | MEDS_transforms.transforms.tokenization:main:274 - Tokenizing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/normalization/held_out/0.parquet into schemas at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization/schemas/held_out/0.parquet\n", + "2024-12-15 03:05:44.974 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-15 03:05:44.974697. Double checking no earlier locks have been registered.\n", + "2024-12-15 03:05:44.976 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/normalization/held_out/0.parquet\n", + "2024-12-15 03:05:44.976 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-15 03:05:44.978 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization/schemas/held_out/0.parquet\n", + "2024-12-15 03:05:45.020 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.045742\n", + "2024-12-15 03:05:45.020 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization/schemas/held_out/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization/schemas/held_out/.0.parquet_cache/locks/2024-12-15T03:05:44.974697.json\n", + "2024-12-15 03:05:45.021 | INFO | MEDS_transforms.transforms.tokenization:main:285 - Tokenizing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/normalization/held_out/0.parquet into event_seqs at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization/event_seqs/held_out/0.parquet\n", + "2024-12-15 03:05:45.021 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-15 03:05:45.021494. Double checking no earlier locks have been registered.\n", + "2024-12-15 03:05:45.021 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/normalization/held_out/0.parquet\n", + "2024-12-15 03:05:45.022 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-15 03:05:45.022 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization/event_seqs/held_out/0.parquet\n", + "2024-12-15 03:05:45.067 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.046403\n", + "2024-12-15 03:05:45.068 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization/event_seqs/held_out/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization/event_seqs/held_out/.0.parquet_cache/locks/2024-12-15T03:05:45.021494.json\n", + "2024-12-15 03:05:45.068 | INFO | MEDS_transforms.transforms.tokenization:main:274 - Tokenizing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/normalization/tuning/0.parquet into schemas at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization/schemas/tuning/0.parquet\n", + "2024-12-15 03:05:45.069 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-15 03:05:45.069143. Double checking no earlier locks have been registered.\n", + "2024-12-15 03:05:45.069 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/normalization/tuning/0.parquet\n", + "2024-12-15 03:05:45.069 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-15 03:05:45.070 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization/schemas/tuning/0.parquet\n", + "2024-12-15 03:05:45.105 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.036594\n", + "2024-12-15 03:05:45.105 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization/schemas/tuning/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization/schemas/tuning/.0.parquet_cache/locks/2024-12-15T03:05:45.069143.json\n", + "2024-12-15 03:05:45.106 | INFO | MEDS_transforms.transforms.tokenization:main:285 - Tokenizing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/normalization/tuning/0.parquet into event_seqs at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization/event_seqs/tuning/0.parquet\n", + "2024-12-15 03:05:45.107 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-15 03:05:45.106895. Double checking no earlier locks have been registered.\n", + "2024-12-15 03:05:45.107 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/normalization/tuning/0.parquet\n", + "2024-12-15 03:05:45.107 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-15 03:05:45.108 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization/event_seqs/tuning/0.parquet\n", + "2024-12-15 03:05:45.180 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.073530\n", + "2024-12-15 03:05:45.180 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization/event_seqs/tuning/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization/event_seqs/tuning/.0.parquet_cache/locks/2024-12-15T03:05:45.106895.json\n", + "2024-12-15 03:05:45.181 | INFO | MEDS_transforms.transforms.tokenization:main:274 - Tokenizing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/normalization/train/0.parquet into schemas at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization/schemas/train/0.parquet\n", + "2024-12-15 03:05:45.181 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-15 03:05:45.181718. Double checking no earlier locks have been registered.\n", + "2024-12-15 03:05:45.182 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/normalization/train/0.parquet\n", + "2024-12-15 03:05:45.182 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-15 03:05:45.182 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization/schemas/train/0.parquet\n", + "2024-12-15 03:05:45.242 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.060453\n", + "2024-12-15 03:05:45.242 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization/schemas/train/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization/schemas/train/.0.parquet_cache/locks/2024-12-15T03:05:45.181718.json\n", + "2024-12-15 03:05:45.242 | INFO | MEDS_transforms.transforms.tokenization:main:285 - Tokenizing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/normalization/train/0.parquet into event_seqs at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization/event_seqs/train/0.parquet\n", + "2024-12-15 03:05:45.243 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-15 03:05:45.243306. Double checking no earlier locks have been registered.\n", + "2024-12-15 03:05:45.243 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/normalization/train/0.parquet\n", + "2024-12-15 03:05:45.243 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-15 03:05:45.244 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization/event_seqs/train/0.parquet\n", + "2024-12-15 03:05:45.359 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.116516\n", + "2024-12-15 03:05:45.360 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization/event_seqs/train/.0.parquet_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization/event_seqs/train/.0.parquet_cache/locks/2024-12-15T03:05:45.243306.json\n", + "2024-12-15 03:05:45.360 | INFO | MEDS_transforms.transforms.tokenization:main:296 - Done with tokenization\n", + "\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:45.732\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mmain\u001b[0m:\u001b[36m326\u001b[0m - \u001b[1mRunning stage: tensorization\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:45.754\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m255\u001b[0m - \u001b[1mRunning command: MEDS_transform-tensorization --config-dir=/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds_torch_example/MIMICIV_INDUCTIVE_EXPERIMENTS/configs --config-name=triplet_config --multirun 'hydra.searchpath=[pkg://MEDS_transforms.configs]' stage=tensorization worker=\"range(0,1)\" hydra/launcher=joblib\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:48.912\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m263\u001b[0m - \u001b[1mCommand output:\n", + "[2024-12-15 03:05:46,544][HYDRA] Joblib.Parallel(n_jobs=-1,backend=loky,prefer=processes,require=None,verbose=0,timeout=None,pre_dispatch=2*n_jobs,batch_size=auto,temp_folder=None,max_nbytes=None,mmap_mode=r) is launching 1 jobs\n", + "[2024-12-15 03:05:46,544][HYDRA] Launching jobs, sweep output dir : /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/data/.logs\n", + "[2024-12-15 03:05:46,544][HYDRA] \t#0 : stage=tensorization worker=0\n", + "\u001b[0m\n", + "\u001b[32m2024-12-15 03:05:48.912\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mMEDS_transforms.runner\u001b[0m:\u001b[36mrun_stage\u001b[0m:\u001b[36m264\u001b[0m - \u001b[1mCommand error:\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/joblib/externals/loky/backend/fork_exec.py:38: RuntimeWarning: Using fork() can cause Polars to deadlock in the child process.\n", + "In addition, using fork() with Python in general is a recipe for mysterious\n", + "deadlocks and crashes.\n", + "\n", + "The most likely reason you are seeing this error is because you are using the\n", + "multiprocessing module on Linux, which uses fork() by default. This will be\n", + "fixed in Python 3.14. Until then, you want to use the \"spawn\" context instead.\n", + "\n", + "See https://docs.pola.rs/user-guide/misc/multiprocessing/ for details.\n", + "\n", + "If you really know what your doing, you can silence this warning with the warning module\n", + "or by setting POLARS_ALLOW_FORKING_THREAD=1.\n", + "\n", + " pid = os.fork()\n", + "2024-12-15 03:05:47.443 | WARNING | MEDS_transforms.utils:current_script_name:151 - Can't find main function in __main__ module. Using sys.argv[0] as a fallback.\n", + "2024-12-15 03:05:47.444 | WARNING | MEDS_transforms.utils:current_script_name:151 - Can't find main function in __main__ module. Using sys.argv[0] as a fallback.\n", + "2024-12-15 03:05:47.507 | WARNING | MEDS_transforms.utils:current_script_name:151 - Can't find main function in __main__ module. Using sys.argv[0] as a fallback.\n", + "2024-12-15 03:05:47.510 | INFO | MEDS_transforms.utils:stage_init:73 - Running MEDS_transform-tensorization with the following configuration:\n", + "input_dir: ${oc.env:MEDS_DIR}\n", + "cohort_dir: ${oc.env:MODEL_DIR}\n", + "_default_description: 'This is a MEDS pipeline ETL. Please set a more detailed description\n", + " at the top of your specific pipeline\n", + "\n", + " configuration file.'\n", + "log_dir: ${stage_cfg.output_dir}/.logs\n", + "do_overwrite: false\n", + "seed: 1\n", + "stages:\n", + "- fit_filter_and_occlude\n", + "- filter_measurements\n", + "- filter_subjects\n", + "- occlude_outliers\n", + "- fit_normalization\n", + "- fit_vocabulary_indices\n", + "- normalization\n", + "- tokenization\n", + "- tensorization\n", + "stage_configs:\n", + " reshard_to_split:\n", + " n_subjects_per_shard: 50000\n", + " filter_subjects:\n", + " min_events_per_subject: 0\n", + " min_measurements_per_subject: 0\n", + " add_time_derived_measurements:\n", + " age:\n", + " DOB_code: MEDS_BIRTH\n", + " age_code: AGE\n", + " age_unit: years\n", + " time_of_day:\n", + " time_of_day_code: TIME_OF_DAY\n", + " endpoints:\n", + " - 6\n", + " - 12\n", + " - 18\n", + " - 24\n", + " count_code_occurrences:\n", + " aggregations:\n", + " - code/n_occurrences\n", + " - code/n_subjects\n", + " do_summarize_over_all_codes: true\n", + " filter_measurements:\n", + " min_subjects_per_code: 0\n", + " min_occurrences_per_code: null\n", + " fit_outlier_detection:\n", + " aggregations:\n", + " - values/n_occurrences\n", + " - values/sum\n", + " - values/sum_sqd\n", + " occlude_outliers:\n", + " stddev_cutoff: 4.5\n", + " fit_normalization:\n", + " aggregations:\n", + " - code/n_occurrences\n", + " - code/n_subjects\n", + " - name: values/quantiles\n", + " quantiles:\n", + " - 0.1\n", + " - 0.2\n", + " - 0.3\n", + " - 0.4\n", + " - 0.5\n", + " - 0.6\n", + " - 0.7\n", + " - 0.8\n", + " - 0.9\n", + " _script: MEDS_transform-aggregate_code_metadata\n", + " fit_vocabulary_indices:\n", + " is_metadata: true\n", + " ordering_method: lexicographic\n", + " reorder_measurements:\n", + " ordered_code_patterns: ???\n", + " fit_filter_and_occlude:\n", + " _script: MEDS_transform-aggregate_code_metadata\n", + " aggregations:\n", + " - code/n_occurrences\n", + " - code/n_subjects\n", + " - values/sum\n", + " - values/sum_sqd\n", + " - values/n_occurrences\n", + "worker: 0\n", + "polling_time: 300\n", + "stage: tensorization\n", + "stage_cfg: ${oc.create:${populate_stage:${stage}, ${input_dir}, ${cohort_dir}, ${stages},\n", + " ${stage_configs}}}\n", + "etl_metadata:\n", + " pipeline_name: ???\n", + " dataset_name: ???\n", + " dataset_version: ???\n", + " package_name: ${get_package_name:}\n", + " package_version: ${get_package_version:}\n", + "etl_metadata.pipeline_name: preprocess\n", + "code_modifiers: ???\n", + "\n", + "2024-12-15 03:05:47.534 | DEBUG | MEDS_transforms.utils:stage_init:97 - Stage config:\n", + "is_metadata: false\n", + "data_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization\n", + "metadata_input_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/metadata\n", + "output_dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/data\n", + "reducer_output_dir: null\n", + "\n", + "Paths: (checkbox indicates if it exists)\n", + " - input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization\n", + " - output_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/data\n", + " - metadata_input_dir: ✅ /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/metadata\n", + "2024-12-15 03:05:47.561 | INFO | MEDS_transforms.mapreduce.utils:shard_iterator:600 - Mapping computation over a maximum of 3 shards\n", + "2024-12-15 03:05:47.573 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization/event_seqs/held_out/0.parquet into /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/data/held_out/0.nrt\n", + "2024-12-15 03:05:47.574 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-15 03:05:47.574257. Double checking no earlier locks have been registered.\n", + "2024-12-15 03:05:47.576 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization/event_seqs/held_out/0.parquet\n", + "2024-12-15 03:05:47.581 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-15 03:05:47.738 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/data/held_out/0.nrt\n", + "2024-12-15 03:05:47.739 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.165529\n", + "2024-12-15 03:05:47.739 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/data/held_out/.0.nrt_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/data/held_out/.0.nrt_cache/locks/2024-12-15T03:05:47.574257.json\n", + "2024-12-15 03:05:47.740 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization/event_seqs/tuning/0.parquet into /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/data/tuning/0.nrt\n", + "2024-12-15 03:05:47.740 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-15 03:05:47.740699. Double checking no earlier locks have been registered.\n", + "2024-12-15 03:05:47.741 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization/event_seqs/tuning/0.parquet\n", + "2024-12-15 03:05:47.741 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-15 03:05:48.016 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/data/tuning/0.nrt\n", + "2024-12-15 03:05:48.018 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.278052\n", + "2024-12-15 03:05:48.018 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/data/tuning/.0.nrt_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/data/tuning/.0.nrt_cache/locks/2024-12-15T03:05:47.740699.json\n", + "2024-12-15 03:05:48.019 | INFO | MEDS_transforms.mapreduce.mapper:map_over:672 - Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization/event_seqs/train/0.parquet into /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/data/train/0.nrt\n", + "2024-12-15 03:05:48.020 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:307 - Registered lock at 2024-12-15 03:05:48.019959. Double checking no earlier locks have been registered.\n", + "2024-12-15 03:05:48.020 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:314 - Reading input dataframe from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/tokenization/event_seqs/train/0.parquet\n", + "2024-12-15 03:05:48.020 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:316 - Read dataset\n", + "2024-12-15 03:05:48.586 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:328 - Writing final output to /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/data/train/0.nrt\n", + "2024-12-15 03:05:48.589 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:330 - Succeeded in 0:00:00.569773\n", + "2024-12-15 03:05:48.589 | INFO | MEDS_transforms.mapreduce.utils:rwlock_wrap:331 - Leaving cache directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/data/train/.0.nrt_cache, but clearing lock at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/triplet_tensors/data/train/.0.nrt_cache/locks/2024-12-15T03:05:48.019959.json\n", + "2024-12-15 03:05:48.590 | INFO | MEDS_transforms.mapreduce.mapper:map_over:683 - Finished mapping in 0:00:01.055565\n", + "\u001b[0m\n" + ] + } + ], + "source": [ + "!bash \"$TUTORIAL_DIR/tokenize.sh\" \"$MEDS_DIR\" \"$TENSOR_DIR\" \"$N_PARALLEL_WORKERS\" \"$PIPELINE_CONFIG_PATH\" \"stage_runner_fp=$JOBLIB_RUNNER_CONFIG_PATH\"" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "background_save": true + }, + "id": "JzE605XLZ76s", + "outputId": "cb843d6d-f013-4c68-bd60-9b4bbc32cee5" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (5_577, 13)
codecode/n_occurrencescode/n_subjectsvalues/quantilesvalues/sumvalues/sum_sqdvalues/n_occurrencesdescriptionparent_codespossibly_cpt_codeitemidvalueuomcode/vocab_index
stru16u8struct[9]f32f32u16strlist[str]list[str]list[str]list[str]u16
"BMI (kg/m2)"39134{null,null,null,null,null,null,null,null,null}0.00.00nullnullnullnullnull1
"Blood Pressure"42632{null,null,null,null,null,null,null,null,null}0.00.00nullnullnullnullnull2
"Blood Pressure Lying"11{null,null,null,null,null,null,null,null,null}0.00.00nullnullnullnullnull3
"Blood Pressure Sitting"11{null,null,null,null,null,null,null,null,null}0.00.00nullnullnullnullnull4
"Blood Pressure Standing (3 min…11{null,null,null,null,null,null,null,null,null}0.00.00nullnullnullnullnull5
"TRANSFER_TO//transfer//Surgica…66{null,null,null,null,null,null,null,null,null}0.00.00nullnullnullnullnull5573
"TRANSFER_TO//transfer//Transpl…114{null,null,null,null,null,null,null,null,null}0.00.00nullnullnullnullnull5574
"TRANSFER_TO//transfer//Trauma …87{null,null,null,null,null,null,null,null,null}0.00.00nullnullnullnullnull5575
"TRANSFER_TO//transfer//Vascula…83{null,null,null,null,null,null,null,null,null}0.00.00nullnullnullnullnull5576
"Weight (Lbs)"40436{null,null,null,null,null,null,null,null,null}0.00.00nullnullnullnullnull5577
" + ], + "text/plain": [ + "shape: (5_577, 13)\n", + "┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐\n", + "│ code ┆ code/n_oc ┆ code/n_su ┆ values/qu ┆ … ┆ possibly_ ┆ itemid ┆ valueuom ┆ code/voc │\n", + "│ --- ┆ currences ┆ bjects ┆ antiles ┆ ┆ cpt_code ┆ --- ┆ --- ┆ ab_index │\n", + "│ str ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ list[str] ┆ list[str] ┆ --- │\n", + "│ ┆ u16 ┆ u8 ┆ struct[9] ┆ ┆ list[str] ┆ ┆ ┆ u16 │\n", + "╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡\n", + "│ BMI ┆ 391 ┆ 34 ┆ {null,nul ┆ … ┆ null ┆ null ┆ null ┆ 1 │\n", + "│ (kg/m2) ┆ ┆ ┆ l,null,nu ┆ ┆ ┆ ┆ ┆ │\n", + "│ ┆ ┆ ┆ ll,null,n ┆ ┆ ┆ ┆ ┆ │\n", + "│ ┆ ┆ ┆ ull… ┆ ┆ ┆ ┆ ┆ │\n", + "│ Blood ┆ 426 ┆ 32 ┆ {null,nul ┆ … ┆ null ┆ null ┆ null ┆ 2 │\n", + "│ Pressure ┆ ┆ ┆ l,null,nu ┆ ┆ ┆ ┆ ┆ │\n", + "│ ┆ ┆ ┆ ll,null,n ┆ ┆ ┆ ┆ ┆ │\n", + "│ ┆ ┆ ┆ ull… ┆ ┆ ┆ ┆ ┆ │\n", + "│ Blood ┆ 1 ┆ 1 ┆ {null,nul ┆ … ┆ null ┆ null ┆ null ┆ 3 │\n", + "│ Pressure ┆ ┆ ┆ l,null,nu ┆ ┆ ┆ ┆ ┆ │\n", + "│ Lying ┆ ┆ ┆ ll,null,n ┆ ┆ ┆ ┆ ┆ │\n", + "│ ┆ ┆ ┆ ull… ┆ ┆ ┆ ┆ ┆ │\n", + "│ Blood ┆ 1 ┆ 1 ┆ {null,nul ┆ … ┆ null ┆ null ┆ null ┆ 4 │\n", + "│ Pressure ┆ ┆ ┆ l,null,nu ┆ ┆ ┆ ┆ ┆ │\n", + "│ Sitting ┆ ┆ ┆ ll,null,n ┆ ┆ ┆ ┆ ┆ │\n", + "│ ┆ ┆ ┆ ull… ┆ ┆ ┆ ┆ ┆ │\n", + "│ Blood ┆ 1 ┆ 1 ┆ {null,nul ┆ … ┆ null ┆ null ┆ null ┆ 5 │\n", + "│ Pressure ┆ ┆ ┆ l,null,nu ┆ ┆ ┆ ┆ ┆ │\n", + "│ Standing ┆ ┆ ┆ ll,null,n ┆ ┆ ┆ ┆ ┆ │\n", + "│ (3 min… ┆ ┆ ┆ ull… ┆ ┆ ┆ ┆ ┆ │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ TRANSFER_ ┆ 6 ┆ 6 ┆ {null,nul ┆ … ┆ null ┆ null ┆ null ┆ 5573 │\n", + "│ TO//trans ┆ ┆ ┆ l,null,nu ┆ ┆ ┆ ┆ ┆ │\n", + "│ fer//Surg ┆ ┆ ┆ ll,null,n ┆ ┆ ┆ ┆ ┆ │\n", + "│ ica… ┆ ┆ ┆ ull… ┆ ┆ ┆ ┆ ┆ │\n", + "│ TRANSFER_ ┆ 11 ┆ 4 ┆ {null,nul ┆ … ┆ null ┆ null ┆ null ┆ 5574 │\n", + "│ TO//trans ┆ ┆ ┆ l,null,nu ┆ ┆ ┆ ┆ ┆ │\n", + "│ fer//Tran ┆ ┆ ┆ ll,null,n ┆ ┆ ┆ ┆ ┆ │\n", + "│ spl… ┆ ┆ ┆ ull… ┆ ┆ ┆ ┆ ┆ │\n", + "│ TRANSFER_ ┆ 8 ┆ 7 ┆ {null,nul ┆ … ┆ null ┆ null ┆ null ┆ 5575 │\n", + "│ TO//trans ┆ ┆ ┆ l,null,nu ┆ ┆ ┆ ┆ ┆ │\n", + "│ fer//Trau ┆ ┆ ┆ ll,null,n ┆ ┆ ┆ ┆ ┆ │\n", + "│ ma … ┆ ┆ ┆ ull… ┆ ┆ ┆ ┆ ┆ │\n", + "│ TRANSFER_ ┆ 8 ┆ 3 ┆ {null,nul ┆ … ┆ null ┆ null ┆ null ┆ 5576 │\n", + "│ TO//trans ┆ ┆ ┆ l,null,nu ┆ ┆ ┆ ┆ ┆ │\n", + "│ fer//Vasc ┆ ┆ ┆ ll,null,n ┆ ┆ ┆ ┆ ┆ │\n", + "│ ula… ┆ ┆ ┆ ull… ┆ ┆ ┆ ┆ ┆ │\n", + "│ Weight ┆ 404 ┆ 36 ┆ {null,nul ┆ … ┆ null ┆ null ┆ null ┆ 5577 │\n", + "│ (Lbs) ┆ ┆ ┆ l,null,nu ┆ ┆ ┆ ┆ ┆ │\n", + "│ ┆ ┆ ┆ ll,null,n ┆ ┆ ┆ ┆ ┆ │\n", + "│ ┆ ┆ ┆ ull… ┆ ┆ ┆ ┆ ┆ │\n", + "└───────────┴───────────┴───────────┴───────────┴───┴───────────┴───────────┴───────────┴──────────┘" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import polars as pl\n", + "metadata_df = pl.read_parquet(ROOT_DIR + \"/triplet_tensors/metadata/codes.parquet\")\n", + "metadata_df" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "colab": { + "background_save": true + }, + "id": "79OAOTQXYkCt", + "outputId": "ce364b33-49e3-4e66-f770-f4211ff26eb6" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[32m2024-12-15 03:32:01.416\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.utils.pylogger\u001b[0m:\u001b[36minfo\u001b[0m:\u001b[36m88\u001b[0m - \u001b[1m[rank0] Enforcing tags! \u001b[0m\n", + "\u001b[32m2024-12-15 03:32:01.419\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.utils.pylogger\u001b[0m:\u001b[36minfo\u001b[0m:\u001b[36m88\u001b[0m - \u001b[1m[rank0] Printing config tree with Rich! \u001b[0m\n", + "\u001b[2mCONFIG\u001b[0m\n", + "\u001b[2m├── \u001b[0m\u001b[2mdata\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m└── \u001b[0m\u001b[2;91;40m_target_\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mmeds_torch.data.datamodule.MEDSDataModule.initialize \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mdataset_cls\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mmeds_torch.data.components.pytorch_dataset.PytorchDataset \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mdata_dir\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mmeds_cohort_dir\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mcache_dir\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mcode_metadata_fp\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimici\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mschema_files_root\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimic\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mtasks_root\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40msplit_names\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mtrain\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtrain \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mvalidate\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtuning \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mtest\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mheld_out \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mpredict_dataset\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mval \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mtask_root_dir\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_d\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mtask_name\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mmortality/in_icu/first_24h \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mtask_label_path\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mtask_info_path\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mtrain_subset_size\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mnull \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mtrain_subset_seed\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m1 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mcollate_type\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtriplet \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mtokenizer\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40memilyalsentzer/Bio_ClinicalBERT \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mdo_include_subject_id\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mdo_include_subsequence_indices\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mdo_include_start_time_min\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mdo_include_end_time\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mdo_include_prediction_time\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mdo_prepend_static_data\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mnum_value_code_quantiles\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mnull \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mmax_seq_len\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m128 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mmin_seq_len\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m1 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40msubsequence_sampling_strategy\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mto_end \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mseq_padding_side\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mright \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mtext_max_seq_len\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m8 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mdo_flatten_tensors\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtrue \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mdataloader\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mbatch_size\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m4 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mnum_workers\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m6 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mpin_memory\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mvocab_size\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m5578 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40meos_offset\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m1 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mEOS_TOKEN_ID\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m5577 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mpostpend_eos_token\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m├── \u001b[0m\u001b[2mmodel\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m└── \u001b[0m\u001b[2;91;40minput_encoder\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40m_target_\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mmeds_torch.input_encoder.triplet_encoder.TripletEncoder.init\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mcollate_style\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtriplet \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mmax_seq_len\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m128 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mtoken_dim\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m128 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mvocab_size\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m5578 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mbackbone\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mnheads\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m4 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mn_layers\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m2 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mdropout\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m0 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mtoken_dim\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m128 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mmax_seq_len\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m128 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mvocab_size\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m5578 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mget_last_token\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtrue \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40muse_cls_token\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mpostpend_eos_token\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mmodel_type\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mnull \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40m_target_\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mmeds_torch.models.components.transformer_encoder.Transformer\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mmodel\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40m_target_\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mx_transformers.TransformerWrapper \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mlogits_dim\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m128 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mnum_tokens\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m5578 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mmax_seq_len\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m128 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40memb_dropout\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m0 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40muse_abs_pos_emb\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mattn_layers\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40m_target_\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mx_transformers.Encoder \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mdim\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m128 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mdepth\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m2 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mheads\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m4 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mlayer_dropout\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m0 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mattn_dropout\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m0 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mff_dropout\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m0 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mrotary_pos_emb\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtrue \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40muse_cls_token\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtrue \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mtoken_emb\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40m_target_\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtorch.nn.Identity \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40moptimizer\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40m_target_\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtorch.optim.Adam \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40m_partial_\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtrue \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mlr\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m0.001 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mweight_decay\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m0.0 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40m_target_\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mmeds_torch.models.supervised_model.SupervisedModule.initialize\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mmax_seq_len\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m128 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mtoken_dim\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m128 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mvocab_size\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m5578 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mget_representations\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mtask_name\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mmortality/in_icu/first_24h \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mbatch_size\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m4 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mcompile\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m├── \u001b[0m\u001b[2mcallbacks\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m└── \u001b[0m\u001b[2;91;40mmodel_checkpoint\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40m_target_\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mlightning.pytorch.callbacks.ModelCheckpoint \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mdirpath\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mfilename\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mepoch_{epoch:03d} \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mmonitor\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mval/loss \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mverbose\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40msave_last\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtrue \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40msave_top_k\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m1 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mmode\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mmin \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mauto_insert_metric_name\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40msave_weights_only\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mevery_n_train_steps\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mnull \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mtrain_time_interval\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mnull \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mevery_n_epochs\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mnull \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40msave_on_train_epoch_end\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mnull \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mearly_stopping\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40m_target_\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mlightning.pytorch.callbacks.EarlyStopping \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mmonitor\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mval/loss \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mmin_delta\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m0.0 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mpatience\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m3 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mverbose\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mmode\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mmin \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mstrict\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtrue \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mcheck_finite\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtrue \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mstopping_threshold\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mnull \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mdivergence_threshold\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mnull \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mcheck_on_train_epoch_end\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mnull \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mmodel_summary\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40m_target_\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mlightning.pytorch.callbacks.RichModelSummary \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mmax_depth\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m-1 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m├── \u001b[0m\u001b[2mlogger\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m└── \u001b[0m\u001b[2;91;40mwandb\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40m_target_\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mlightning.pytorch.loggers.wandb.WandbLogger \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40msave_dir\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40moffline\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mid\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mnull \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40manonymous\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mnull \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mproject\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mlightning-hydra-template \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mlog_model\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mprefix\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;93;40m'\u001b[0m\u001b[2;93;40m'\u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mgroup\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mmimiciv_tokenization \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mtags\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m-\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mmimiciv \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m-\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtriplet \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m-\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtransformer_encoder \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mjob_type\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;93;40m'\u001b[0m\u001b[2;93;40m'\u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m├── \u001b[0m\u001b[2mtrainer\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m└── \u001b[0m\u001b[2;91;40m_target_\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mlightning.pytorch.trainer.Trainer \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mdefault_root_dir\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimici\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mmin_epochs\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m1 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mmax_epochs\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m10 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40maccelerator\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mgpu \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mdevices\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m1 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mcheck_val_every_n_epoch\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m1 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mdeterministic\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mgradient_clip_val\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m1.0 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m├── \u001b[0m\u001b[2mpaths\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m└── \u001b[0m\u001b[2;91;40mdata_dir\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mmeds_cohort_dir\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40moutput_dir\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mtime_output_dir\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mlog_dir\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//m\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mwork_dir\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m├── \u001b[0m\u001b[2mextras\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m└── \u001b[0m\u001b[2;91;40mignore_warnings\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40menforce_tags\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtrue \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mprint_config\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtrue \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m├── \u001b[0m\u001b[2mname\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m└── \u001b[0m\u001b[2;40mtrain \u001b[0m\n", + "\u001b[2m├── \u001b[0m\u001b[2mtest_devices\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m└── \u001b[0m\u001b[2;40m1 \u001b[0m\n", + "\u001b[2m├── \u001b[0m\u001b[2mbest_config_path\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m└── \u001b[0m\u001b[2;40mNone \u001b[0m\n", + "\u001b[2m├── \u001b[0m\u001b[2mtask_name\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m└── \u001b[0m\u001b[2;40mNone \u001b[0m\n", + "\u001b[2m├── \u001b[0m\u001b[2mtags\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m└── \u001b[0m\u001b[2;40m[\u001b[0m\u001b[2;93;40m'\u001b[0m\u001b[2;93;40mmimiciv\u001b[0m\u001b[2;93;40m'\u001b[0m\u001b[2;40m,\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;93;40m'\u001b[0m\u001b[2;93;40mtriplet\u001b[0m\u001b[2;93;40m'\u001b[0m\u001b[2;40m,\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;93;40m'\u001b[0m\u001b[2;93;40mtransformer_encoder\u001b[0m\u001b[2;93;40m'\u001b[0m\u001b[2;40m] \u001b[0m\n", + "\u001b[2m├── \u001b[0m\u001b[2mtrain\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m└── \u001b[0m\u001b[2;40mTrue \u001b[0m\n", + "\u001b[2m├── \u001b[0m\u001b[2mtest\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m└── \u001b[0m\u001b[2;40mTrue \u001b[0m\n", + "\u001b[2m├── \u001b[0m\u001b[2mckpt_path\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m└── \u001b[0m\u001b[2;40mNone \u001b[0m\n", + "\u001b[2m├── \u001b[0m\u001b[2mseed\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m└── \u001b[0m\u001b[2;40m0 \u001b[0m\n", + "\u001b[2m└── \u001b[0m\u001b[2mray_memory_monitor_refresh_ms\u001b[0m\n", + "\u001b[2m \u001b[0m\u001b[2m└── \u001b[0m\u001b[2;40m0 \u001b[0m\n", + "\u001b[32m2024-12-15 03:32:01.530\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.utils.pylogger\u001b[0m:\u001b[36minfo\u001b[0m:\u001b[36m88\u001b[0m - \u001b[1m[rank0] Instantiating datamodule \u001b[0m\n", + "\u001b[32m2024-12-15 03:32:01.543\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.utils.pylogger\u001b[0m:\u001b[36minfo\u001b[0m:\u001b[36m88\u001b[0m - \u001b[1m[rank0] Instantiating model \u001b[0m\n", + "\u001b[32m2024-12-15 03:32:01.632\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.utils.pylogger\u001b[0m:\u001b[36minfo\u001b[0m:\u001b[36m88\u001b[0m - \u001b[1m[rank0] Instantiating callbacks...\u001b[0m\n", + "\u001b[32m2024-12-15 03:32:01.632\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.utils.pylogger\u001b[0m:\u001b[36minfo\u001b[0m:\u001b[36m88\u001b[0m - \u001b[1m[rank0] Instantiating callback \u001b[0m\n", + "\u001b[32m2024-12-15 03:32:01.633\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.utils.pylogger\u001b[0m:\u001b[36minfo\u001b[0m:\u001b[36m88\u001b[0m - \u001b[1m[rank0] Instantiating callback \u001b[0m\n", + "\u001b[32m2024-12-15 03:32:01.634\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.utils.pylogger\u001b[0m:\u001b[36minfo\u001b[0m:\u001b[36m88\u001b[0m - \u001b[1m[rank0] Instantiating callback \u001b[0m\n", + "\u001b[32m2024-12-15 03:32:01.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.utils.pylogger\u001b[0m:\u001b[36minfo\u001b[0m:\u001b[36m88\u001b[0m - \u001b[1m[rank0] Instantiating loggers...\u001b[0m\n", + "\u001b[32m2024-12-15 03:32:01.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.utils.pylogger\u001b[0m:\u001b[36minfo\u001b[0m:\u001b[36m88\u001b[0m - \u001b[1m[rank0] Instantiating logger \u001b[0m\n", + "\u001b[32m2024-12-15 03:32:01.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.utils.pylogger\u001b[0m:\u001b[36minfo\u001b[0m:\u001b[36m88\u001b[0m - \u001b[1m[rank0] Instantiating trainer \u001b[0m\n", + "Trainer already configured with model summary callbacks: []. Skipping setting a default `ModelSummary` callback.\n", + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n", + "\u001b[32m2024-12-15 03:32:01.747\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.utils.pylogger\u001b[0m:\u001b[36minfo\u001b[0m:\u001b[36m88\u001b[0m - \u001b[1m[rank0] Logging hyperparameters!\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mnoufattole\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.19.1\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds_torch/train_output/2024-12-15_03-32-01_385164/wandb/run-20241215_033202-9uotaoyd\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mwobbly-gorge-4323\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/noufattole/lightning-hydra-template\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/noufattole/lightning-hydra-template/runs/9uotaoyd\u001b[0m\n", + "\u001b[32m2024-12-15 03:32:03.188\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.utils.pylogger\u001b[0m:\u001b[36minfo\u001b[0m:\u001b[36m88\u001b[0m - \u001b[1m[rank0] Starting training!\u001b[0m\n", + "\u001b[32m2024-12-15 03:32:03.850\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.data.components.pytorch_dataset\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m512\u001b[0m - \u001b[1mReading subject schema and static data\u001b[0m\n", + "\u001b[32m2024-12-15 03:32:03.878\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.data.components.pytorch_dataset\u001b[0m:\u001b[36mread_subject_descriptors\u001b[0m:\u001b[36m588\u001b[0m - \u001b[1mIf the task file is not found at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/task_labels/mortality/in_icu/first_24h.parquet\u001b[0m\n", + "\u001b[32m2024-12-15 03:32:03.878\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.data.components.pytorch_dataset\u001b[0m:\u001b[36mread_subject_descriptors\u001b[0m:\u001b[36m590\u001b[0m - \u001b[1mSearching for task parquets over the glob /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/task_labels/mortality/in_icu/first_24h/**/*.parquet\u001b[0m\n", + "\u001b[32m2024-12-15 03:32:03.878\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.data.components.pytorch_dataset\u001b[0m:\u001b[36mread_subject_descriptors\u001b[0m:\u001b[36m592\u001b[0m - \u001b[1mReading task constraints for mortality/in_icu/first_24h from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/task_labels/mortality/in_icu/first_24h/**/*.parquet\u001b[0m\n", + "\u001b[32m2024-12-15 03:32:03.910\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.data.components.pytorch_dataset\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m512\u001b[0m - \u001b[1mReading subject schema and static data\u001b[0m\n", + "\u001b[32m2024-12-15 03:32:03.921\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.data.components.pytorch_dataset\u001b[0m:\u001b[36mread_subject_descriptors\u001b[0m:\u001b[36m588\u001b[0m - \u001b[1mIf the task file is not found at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/task_labels/mortality/in_icu/first_24h.parquet\u001b[0m\n", + "\u001b[32m2024-12-15 03:32:03.922\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.data.components.pytorch_dataset\u001b[0m:\u001b[36mread_subject_descriptors\u001b[0m:\u001b[36m590\u001b[0m - \u001b[1mSearching for task parquets over the glob /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/task_labels/mortality/in_icu/first_24h/**/*.parquet\u001b[0m\n", + "\u001b[32m2024-12-15 03:32:03.922\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.data.components.pytorch_dataset\u001b[0m:\u001b[36mread_subject_descriptors\u001b[0m:\u001b[36m592\u001b[0m - \u001b[1mReading task constraints for mortality/in_icu/first_24h from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/task_labels/mortality/in_icu/first_24h/**/*.parquet\u001b[0m\n", + "\u001b[32m2024-12-15 03:32:03.938\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.data.components.pytorch_dataset\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m512\u001b[0m - \u001b[1mReading subject schema and static data\u001b[0m\n", + "\u001b[32m2024-12-15 03:32:03.942\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.data.components.pytorch_dataset\u001b[0m:\u001b[36mread_subject_descriptors\u001b[0m:\u001b[36m588\u001b[0m - \u001b[1mIf the task file is not found at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/task_labels/mortality/in_icu/first_24h.parquet\u001b[0m\n", + "\u001b[32m2024-12-15 03:32:03.942\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.data.components.pytorch_dataset\u001b[0m:\u001b[36mread_subject_descriptors\u001b[0m:\u001b[36m590\u001b[0m - \u001b[1mSearching for task parquets over the glob /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/task_labels/mortality/in_icu/first_24h/**/*.parquet\u001b[0m\n", + "\u001b[32m2024-12-15 03:32:03.943\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.data.components.pytorch_dataset\u001b[0m:\u001b[36mread_subject_descriptors\u001b[0m:\u001b[36m592\u001b[0m - \u001b[1mReading task constraints for mortality/in_icu/first_24h from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/task_labels/mortality/in_icu/first_24h/**/*.parquet\u001b[0m\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "┏━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━┳━━━━━┳━━━━┓\n", + "┃\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mName \u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mTy…\u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mPa…\u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mM…\u001b[0m\u001b[1;35m \u001b[0m┃\n", + "┡━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━╇━━━━━╇━━━━┩\n", + "│\u001b[2m \u001b[0m\u001b[2m0 \u001b[0m\u001b[2m \u001b[0m│ model │ Tr… │ 542 │ t… │\n", + "│\u001b[2m \u001b[0m│ │ │ K │ │\n", + "│\u001b[2m \u001b[0m\u001b[2m1 \u001b[0m\u001b[2m \u001b[0m│ model.model │ Tr… │ 542 │ t… │\n", + "│\u001b[2m \u001b[0m│ │ │ K │ │\n", + "│\u001b[2m \u001b[0m\u001b[2m2 \u001b[0m\u001b[2m \u001b[0m│ model.model.token_emb │ Id… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m3 \u001b[0m\u001b[2m \u001b[0m│ model.model.post_emb_norm │ Id… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m4 \u001b[0m\u001b[2m \u001b[0m│ model.model.emb_dropout │ Dr… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m5 \u001b[0m\u001b[2m \u001b[0m│ model.model.project_emb │ Id… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m6 \u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers │ En… │ 526 │ t… │\n", + "│\u001b[2m \u001b[0m│ │ │ K │ │\n", + "│\u001b[2m \u001b[0m\u001b[2m7 \u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers │ Mo… │ 526 │ t… │\n", + "│\u001b[2m \u001b[0m│ │ │ K │ │\n", + "│\u001b[2m \u001b[0m\u001b[2m8 \u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.0 │ Mo… │ 131 │ t… │\n", + "│\u001b[2m \u001b[0m│ │ │ K │ │\n", + "│\u001b[2m \u001b[0m\u001b[2m9 \u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.0.0 │ Mo… │ 128 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m10\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.0.0.0 │ La… │ 128 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m11\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.0.0.0.ln │ La… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m12\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.0.1 │ At… │ 131 │ t… │\n", + "│\u001b[2m \u001b[0m│ │ │ K │ │\n", + "│\u001b[2m \u001b[0m\u001b[2m13\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.0.1.to_q │ Li… │ 32… │ t… │\n", + "│\u001b[2m \u001b[0m│ │ │ K │ │\n", + "│\u001b[2m \u001b[0m\u001b[2m14\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.0.1.to_k │ Li… │ 32… │ t… │\n", + "│\u001b[2m \u001b[0m│ │ │ K │ │\n", + "│\u001b[2m \u001b[0m\u001b[2m15\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.0.1.to_v │ Li… │ 32… │ t… │\n", + "│\u001b[2m \u001b[0m│ │ │ K │ │\n", + "│\u001b[2m \u001b[0m\u001b[2m16\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.0.1.attend │ At… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m17\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.0.1.attend.attn_dropout │ Dr… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m18\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.0.1.to_out │ Li… │ 32… │ t… │\n", + "│\u001b[2m \u001b[0m│ │ │ K │ │\n", + "│\u001b[2m \u001b[0m\u001b[2m19\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.0.2 │ Re… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m20\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.1 │ Mo… │ 131 │ t… │\n", + "│\u001b[2m \u001b[0m│ │ │ K │ │\n", + "│\u001b[2m \u001b[0m\u001b[2m21\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.1.0 │ Mo… │ 128 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m22\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.1.0.0 │ La… │ 128 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m23\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.1.0.0.ln │ La… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m24\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.1.1 │ Fe… │ 131 │ t… │\n", + "│\u001b[2m \u001b[0m│ │ │ K │ │\n", + "│\u001b[2m \u001b[0m\u001b[2m25\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.1.1.ff │ Se… │ 131 │ t… │\n", + "│\u001b[2m \u001b[0m│ │ │ K │ │\n", + "│\u001b[2m \u001b[0m\u001b[2m26\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.1.1.ff.0 │ Se… │ 66… │ t… │\n", + "│\u001b[2m \u001b[0m│ │ │ K │ │\n", + "│\u001b[2m \u001b[0m\u001b[2m27\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.1.1.ff.0.0 │ Li… │ 66… │ t… │\n", + "│\u001b[2m \u001b[0m│ │ │ K │ │\n", + "│\u001b[2m \u001b[0m\u001b[2m28\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.1.1.ff.0.1 │ GE… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m29\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.1.1.ff.1 │ Dr… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m30\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.1.1.ff.2 │ Li… │ 65… │ t… │\n", + "│\u001b[2m \u001b[0m│ │ │ K │ │\n", + "│\u001b[2m \u001b[0m\u001b[2m31\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.1.2 │ Re… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m32\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.2 │ Mo… │ 131 │ t… │\n", + "│\u001b[2m \u001b[0m│ │ │ K │ │\n", + "│\u001b[2m \u001b[0m\u001b[2m33\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.2.0 │ Mo… │ 128 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m34\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.2.0.0 │ La… │ 128 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m35\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.2.0.0.ln │ La… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m36\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.2.1 │ At… │ 131 │ t… │\n", + "│\u001b[2m \u001b[0m│ │ │ K │ │\n", + "│\u001b[2m \u001b[0m\u001b[2m37\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.2.1.to_q │ Li… │ 32… │ t… │\n", + "│\u001b[2m \u001b[0m│ │ │ K │ │\n", + "│\u001b[2m \u001b[0m\u001b[2m38\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.2.1.to_k │ Li… │ 32… │ t… │\n", + "│\u001b[2m \u001b[0m│ │ │ K │ │\n", + "│\u001b[2m \u001b[0m\u001b[2m39\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.2.1.to_v │ Li… │ 32… │ t… │\n", + "│\u001b[2m \u001b[0m│ │ │ K │ │\n", + "│\u001b[2m \u001b[0m\u001b[2m40\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.2.1.attend │ At… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m41\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.2.1.attend.attn_dropout │ Dr… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m42\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.2.1.to_out │ Li… │ 32… │ t… │\n", + "│\u001b[2m \u001b[0m│ │ │ K │ │\n", + "│\u001b[2m \u001b[0m\u001b[2m43\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.2.2 │ Re… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m44\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.3 │ Mo… │ 131 │ t… │\n", + "│\u001b[2m \u001b[0m│ │ │ K │ │\n", + "│\u001b[2m \u001b[0m\u001b[2m45\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.3.0 │ Mo… │ 128 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m46\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.3.0.0 │ La… │ 128 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m47\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.3.0.0.ln │ La… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m48\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.3.1 │ Fe… │ 131 │ t… │\n", + "│\u001b[2m \u001b[0m│ │ │ K │ │\n", + "│\u001b[2m \u001b[0m\u001b[2m49\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.3.1.ff │ Se… │ 131 │ t… │\n", + "│\u001b[2m \u001b[0m│ │ │ K │ │\n", + "│\u001b[2m \u001b[0m\u001b[2m50\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.3.1.ff.0 │ Se… │ 66… │ t… │\n", + "│\u001b[2m \u001b[0m│ │ │ K │ │\n", + "│\u001b[2m \u001b[0m\u001b[2m51\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.3.1.ff.0.0 │ Li… │ 66… │ t… │\n", + "│\u001b[2m \u001b[0m│ │ │ K │ │\n", + "│\u001b[2m \u001b[0m\u001b[2m52\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.3.1.ff.0.1 │ GE… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m53\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.3.1.ff.1 │ Dr… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m54\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.3.1.ff.2 │ Li… │ 65… │ t… │\n", + "│\u001b[2m \u001b[0m│ │ │ K │ │\n", + "│\u001b[2m \u001b[0m\u001b[2m55\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.layers.3.2 │ Re… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m56\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.rotary_pos_emb │ Ro… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m57\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.adaptive_mlp │ Id… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m58\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.final_norm │ La… │ 128 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m59\u001b[0m\u001b[2m \u001b[0m│ model.model.attn_layers.final_norm.ln │ La… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m60\u001b[0m\u001b[2m \u001b[0m│ model.model.to_logits │ Li… │ 16… │ t… │\n", + "│\u001b[2m \u001b[0m│ │ │ K │ │\n", + "│\u001b[2m \u001b[0m\u001b[2m61\u001b[0m\u001b[2m \u001b[0m│ input_encoder │ Tr… │ 714 │ t… │\n", + "│\u001b[2m \u001b[0m│ │ │ K │ │\n", + "│\u001b[2m \u001b[0m\u001b[2m62\u001b[0m\u001b[2m \u001b[0m│ input_encoder.date_embedder │ CVE │ 256 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m63\u001b[0m\u001b[2m \u001b[0m│ input_encoder.date_embedder.layer │ Li… │ 256 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m64\u001b[0m\u001b[2m \u001b[0m│ input_encoder.code_embedder │ Em… │ 713 │ t… │\n", + "│\u001b[2m \u001b[0m│ │ │ K │ │\n", + "│\u001b[2m \u001b[0m\u001b[2m65\u001b[0m\u001b[2m \u001b[0m│ input_encoder.numeric_value_embedder │ CVE │ 256 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m66\u001b[0m\u001b[2m \u001b[0m│ input_encoder.numeric_value_embedder.layer │ Li… │ 256 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m67\u001b[0m\u001b[2m \u001b[0m│ projection │ Li… │ 129 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m68\u001b[0m\u001b[2m \u001b[0m│ train_acc │ Bi… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m69\u001b[0m\u001b[2m \u001b[0m│ train_auc │ Bi… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m70\u001b[0m\u001b[2m \u001b[0m│ train_apr │ Bi… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m71\u001b[0m\u001b[2m \u001b[0m│ val_acc │ Bi… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m72\u001b[0m\u001b[2m \u001b[0m│ val_auc │ Bi… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m73\u001b[0m\u001b[2m \u001b[0m│ val_apr │ Bi… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m74\u001b[0m\u001b[2m \u001b[0m│ test_acc │ Bi… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m75\u001b[0m\u001b[2m \u001b[0m│ test_auc │ Bi… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m76\u001b[0m\u001b[2m \u001b[0m│ test_apr │ Bi… │ 0 │ t… │\n", + "│\u001b[2m \u001b[0m\u001b[2m77\u001b[0m\u001b[2m \u001b[0m│ criterion │ BC… │ 0 │ t… │\n", + "└────┴────────────────────────────────────────────────────────┴─────┴─────┴────┘\n", + "\u001b[1mTrainable params\u001b[0m: 1.3 M \n", + "\u001b[1mNon-trainable params\u001b[0m: 0 \n", + "\u001b[1mTotal params\u001b[0m: 1.3 M \n", + "\u001b[1mTotal estimated model params size (MB)\u001b[0m: 5 \n", + "\u001b[1mModules in train mode\u001b[0m: 78 \n", + "\u001b[1mModules in eval mode\u001b[0m: 0 \n", + "Sanity Checking: | | 0/? [00:00\u001b[0m\n", + "\u001b[32m2024-12-15 03:33:55.173\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.utils.pylogger\u001b[0m:\u001b[36minfo\u001b[0m:\u001b[36m88\u001b[0m - \u001b[1m[rank0] Printing config tree with Rich! \u001b[0m\n", + "\u001b[32m2024-12-15 03:33:55.173\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mmeds_torch.utils.pylogger\u001b[0m:\u001b[36mwarning\u001b[0m:\u001b[36m93\u001b[0m - \u001b[33m\u001b[1m[rank0] Field 'callbacks' not found in config. Skipping 'callbacks' config printing...\u001b[0m\n", + "\u001b[2mCONFIG\u001b[0m\n", + "\u001b[2m├── \u001b[0m\u001b[2mdata\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m└── \u001b[0m\u001b[2;91;40m_target_\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mmeds_torch.data.datamodule.MEDSDataModule.initialize \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mdataset_cls\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mmeds_torch.data.components.pytorch_dataset.PytorchDataset \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mdata_dir\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mmeds_cohort_dir\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mcache_dir\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mcode_metadata_fp\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimici\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mschema_files_root\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimic\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mtasks_root\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40msplit_names\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mtrain\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtrain \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mvalidate\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtuning \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mtest\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mheld_out \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mpredict_dataset\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mval \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mtask_root_dir\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_d\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mtask_name\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mmortality/in_icu/first_24h \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mtask_label_path\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mtask_info_path\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mtrain_subset_size\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mnull \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mtrain_subset_seed\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m1 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mcollate_type\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtriplet \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mtokenizer\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40memilyalsentzer/Bio_ClinicalBERT \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mdo_include_subject_id\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtrue \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mdo_include_subsequence_indices\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mdo_include_start_time_min\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mdo_include_end_time\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mdo_include_prediction_time\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtrue \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mdo_prepend_static_data\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mnum_value_code_quantiles\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mnull \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mmax_seq_len\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m128 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mmin_seq_len\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m1 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40msubsequence_sampling_strategy\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mto_end \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mseq_padding_side\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mright \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mtext_max_seq_len\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m8 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mdo_flatten_tensors\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtrue \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mdataloader\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mbatch_size\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m64 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mnum_workers\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m6 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mpin_memory\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mvocab_size\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m5578 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40meos_offset\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m1 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mEOS_TOKEN_ID\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m5577 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mpostpend_eos_token\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m├── \u001b[0m\u001b[2mmodel\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m└── \u001b[0m\u001b[2;91;40minput_encoder\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40m_target_\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mmeds_torch.input_encoder.triplet_encoder.TripletEncoder.init\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mcollate_style\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtriplet \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mmax_seq_len\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m128 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mtoken_dim\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m128 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mvocab_size\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m5578 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mbackbone\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mnheads\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m4 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mn_layers\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m2 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mdropout\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m0 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mtoken_dim\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m128 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mmax_seq_len\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m128 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mvocab_size\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m5578 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mget_last_token\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtrue \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40muse_cls_token\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mpostpend_eos_token\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mmodel_type\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mnull \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40m_target_\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mmeds_torch.models.components.transformer_encoder.Transformer\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mmodel\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40m_target_\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mx_transformers.TransformerWrapper \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mlogits_dim\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m128 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mnum_tokens\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m5578 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mmax_seq_len\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m128 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40memb_dropout\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m0 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40muse_abs_pos_emb\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mattn_layers\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40m_target_\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mx_transformers.Encoder \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mdim\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m128 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mdepth\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m2 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mheads\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m4 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mlayer_dropout\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m0 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mattn_dropout\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m0 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mff_dropout\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m0 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mrotary_pos_emb\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtrue \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40muse_cls_token\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtrue \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mtoken_emb\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40m_target_\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtorch.nn.Identity \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40moptimizer\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40m_target_\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtorch.optim.Adam \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40m_partial_\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtrue \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mlr\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m0.001 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mweight_decay\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m0.0 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40m_target_\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mmeds_torch.models.supervised_model.SupervisedModule.initialize\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mmax_seq_len\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m128 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mtoken_dim\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m128 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mvocab_size\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m5578 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mget_representations\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mtask_name\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mmortality/in_icu/first_24h \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mbatch_size\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m64 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mcompile\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m├── \u001b[0m\u001b[2mlogger\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m└── \u001b[0m\u001b[2;91;40mwandb\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40m_target_\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mlightning.pytorch.loggers.wandb.WandbLogger \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40msave_dir\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40moffline\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mid\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mnull \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40manonymous\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mnull \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mproject\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mlightning-hydra-template \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mlog_model\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mprefix\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;93;40m'\u001b[0m\u001b[2;93;40m'\u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mgroup\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mmimiciv_tokenization \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mtags\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m-\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mmimiciv \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m-\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtriplet \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m-\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtransformer_encoder \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;91;40mjob_type\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;93;40m'\u001b[0m\u001b[2;93;40m'\u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m├── \u001b[0m\u001b[2mtrainer\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m└── \u001b[0m\u001b[2;91;40m_target_\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mlightning.pytorch.trainer.Trainer \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mdefault_root_dir\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimici\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mmin_epochs\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m1 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mmax_epochs\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m10 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40maccelerator\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mgpu \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mdevices\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m1 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mcheck_val_every_n_epoch\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m1 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mdeterministic\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mgradient_clip_val\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m1.0 \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m├── \u001b[0m\u001b[2mpaths\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m└── \u001b[0m\u001b[2;91;40mpredict_fp\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mdata_dir\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mmeds_cohort_dir\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40moutput_dir\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mtime_output_dir\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mlog_dir\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//m\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mwork_dir\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m├── \u001b[0m\u001b[2mextras\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m└── \u001b[0m\u001b[2;91;40mignore_warnings\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mfalse \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40menforce_tags\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtrue \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;91;40mprint_config\u001b[0m\u001b[2;97;40m:\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;40mtrue \u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m \u001b[0m\u001b[2;40m \u001b[0m\n", + "\u001b[2m├── \u001b[0m\u001b[2mname\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m└── \u001b[0m\u001b[2;40mpredict \u001b[0m\n", + "\u001b[2m├── \u001b[0m\u001b[2mtask_name\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m└── \u001b[0m\u001b[2;40mNone \u001b[0m\n", + "\u001b[2m├── \u001b[0m\u001b[2mtags\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m└── \u001b[0m\u001b[2;40m[\u001b[0m\u001b[2;93;40m'\u001b[0m\u001b[2;93;40mmimiciv\u001b[0m\u001b[2;93;40m'\u001b[0m\u001b[2;40m,\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;93;40m'\u001b[0m\u001b[2;93;40mtriplet\u001b[0m\u001b[2;93;40m'\u001b[0m\u001b[2;40m,\u001b[0m\u001b[2;97;40m \u001b[0m\u001b[2;93;40m'\u001b[0m\u001b[2;93;40mtransformer_encoder\u001b[0m\u001b[2;93;40m'\u001b[0m\u001b[2;40m] \u001b[0m\n", + "\u001b[2m├── \u001b[0m\u001b[2mckpt_path\u001b[0m\n", + "\u001b[2m│ \u001b[0m\u001b[2m└── \u001b[0m\u001b[2;40m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds_torch\u001b[0m\n", + "\u001b[2m└── \u001b[0m\u001b[2mseed\u001b[0m\n", + "\u001b[2m \u001b[0m\u001b[2m└── \u001b[0m\u001b[2;40m0 \u001b[0m\n", + "\u001b[32m2024-12-15 03:33:55.262\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.predict\u001b[0m:\u001b[36mpredict\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mSet all seeds to 0\u001b[0m\n", + "\u001b[32m2024-12-15 03:33:55.262\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.utils.pylogger\u001b[0m:\u001b[36minfo\u001b[0m:\u001b[36m88\u001b[0m - \u001b[1m[rank0] Instantiating datamodule \u001b[0m\n", + "\u001b[32m2024-12-15 03:33:55.275\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.utils.pylogger\u001b[0m:\u001b[36minfo\u001b[0m:\u001b[36m88\u001b[0m - \u001b[1m[rank0] Instantiating model \u001b[0m\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/site-packages/meds_torch/predict.py:332: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n", + " model.load_state_dict(torch.load(cfg.ckpt_path)[\"state_dict\"])\n", + "\u001b[32m2024-12-15 03:33:56.160\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.utils.pylogger\u001b[0m:\u001b[36minfo\u001b[0m:\u001b[36m88\u001b[0m - \u001b[1m[rank0] Instantiating loggers...\u001b[0m\n", + "\u001b[32m2024-12-15 03:33:56.161\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.utils.pylogger\u001b[0m:\u001b[36minfo\u001b[0m:\u001b[36m88\u001b[0m - \u001b[1m[rank0] Instantiating logger \u001b[0m\n", + "\u001b[32m2024-12-15 03:33:56.163\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.utils.pylogger\u001b[0m:\u001b[36minfo\u001b[0m:\u001b[36m88\u001b[0m - \u001b[1m[rank0] Instantiating trainer \u001b[0m\n", + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n", + "\u001b[32m2024-12-15 03:33:56.178\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.utils.pylogger\u001b[0m:\u001b[36minfo\u001b[0m:\u001b[36m88\u001b[0m - \u001b[1m[rank0] Logging hyperparameters!\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mnoufattole\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.19.1\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds_torch/predict_output/2024-12-15_03-33-55_136346/wandb/run-20241215_033356-svs5y5oi\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mconfused-puddle-4324\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/noufattole/lightning-hydra-template\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/noufattole/lightning-hydra-template/runs/svs5y5oi\u001b[0m\n", + "\u001b[32m2024-12-15 03:33:57.566\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.utils.pylogger\u001b[0m:\u001b[36minfo\u001b[0m:\u001b[36m88\u001b[0m - \u001b[1m[rank0] Starting Generating Predictions!\u001b[0m\n", + "\u001b[32m2024-12-15 03:33:57.570\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.data.components.pytorch_dataset\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m512\u001b[0m - \u001b[1mReading subject schema and static data\u001b[0m\n", + "\u001b[32m2024-12-15 03:33:57.596\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.data.components.pytorch_dataset\u001b[0m:\u001b[36mread_subject_descriptors\u001b[0m:\u001b[36m588\u001b[0m - \u001b[1mIf the task file is not found at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/task_labels/mortality/in_icu/first_24h.parquet\u001b[0m\n", + "\u001b[32m2024-12-15 03:33:57.596\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.data.components.pytorch_dataset\u001b[0m:\u001b[36mread_subject_descriptors\u001b[0m:\u001b[36m590\u001b[0m - \u001b[1mSearching for task parquets over the glob /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/task_labels/mortality/in_icu/first_24h/**/*.parquet\u001b[0m\n", + "\u001b[32m2024-12-15 03:33:57.596\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.data.components.pytorch_dataset\u001b[0m:\u001b[36mread_subject_descriptors\u001b[0m:\u001b[36m592\u001b[0m - \u001b[1mReading task constraints for mortality/in_icu/first_24h from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/task_labels/mortality/in_icu/first_24h/**/*.parquet\u001b[0m\n", + "\u001b[32m2024-12-15 03:33:57.626\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.data.components.pytorch_dataset\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m512\u001b[0m - \u001b[1mReading subject schema and static data\u001b[0m\n", + "\u001b[32m2024-12-15 03:33:57.632\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.data.components.pytorch_dataset\u001b[0m:\u001b[36mread_subject_descriptors\u001b[0m:\u001b[36m588\u001b[0m - \u001b[1mIf the task file is not found at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/task_labels/mortality/in_icu/first_24h.parquet\u001b[0m\n", + "\u001b[32m2024-12-15 03:33:57.632\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.data.components.pytorch_dataset\u001b[0m:\u001b[36mread_subject_descriptors\u001b[0m:\u001b[36m590\u001b[0m - \u001b[1mSearching for task parquets over the glob /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/task_labels/mortality/in_icu/first_24h/**/*.parquet\u001b[0m\n", + "\u001b[32m2024-12-15 03:33:57.632\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.data.components.pytorch_dataset\u001b[0m:\u001b[36mread_subject_descriptors\u001b[0m:\u001b[36m592\u001b[0m - \u001b[1mReading task constraints for mortality/in_icu/first_24h from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/task_labels/mortality/in_icu/first_24h/**/*.parquet\u001b[0m\n", + "\u001b[32m2024-12-15 03:33:57.644\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.data.components.pytorch_dataset\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m512\u001b[0m - \u001b[1mReading subject schema and static data\u001b[0m\n", + "\u001b[32m2024-12-15 03:33:57.648\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.data.components.pytorch_dataset\u001b[0m:\u001b[36mread_subject_descriptors\u001b[0m:\u001b[36m588\u001b[0m - \u001b[1mIf the task file is not found at /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/task_labels/mortality/in_icu/first_24h.parquet\u001b[0m\n", + "\u001b[32m2024-12-15 03:33:57.648\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.data.components.pytorch_dataset\u001b[0m:\u001b[36mread_subject_descriptors\u001b[0m:\u001b[36m590\u001b[0m - \u001b[1mSearching for task parquets over the glob /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/task_labels/mortality/in_icu/first_24h/**/*.parquet\u001b[0m\n", + "\u001b[32m2024-12-15 03:33:57.649\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.data.components.pytorch_dataset\u001b[0m:\u001b[36mread_subject_descriptors\u001b[0m:\u001b[36m592\u001b[0m - \u001b[1mReading task constraints for mortality/in_icu/first_24h from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds/task_labels/mortality/in_icu/first_24h/**/*.parquet\u001b[0m\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n", + "/storage/nassim/miniconda3/envs/meds_demo/lib/python3.12/multiprocessing/popen_fork.py:66: RuntimeWarning: Using fork() can cause Polars to deadlock in the child process.\n", + "In addition, using fork() with Python in general is a recipe for mysterious\n", + "deadlocks and crashes.\n", + "\n", + "The most likely reason you are seeing this error is because you are using the\n", + "multiprocessing module on Linux, which uses fork() by default. This will be\n", + "fixed in Python 3.14. Until then, you want to use the \"spawn\" context instead.\n", + "\n", + "See https://docs.pola.rs/user-guide/misc/multiprocessing/ for details.\n", + "\n", + "If you really know what your doing, you can silence this warning with the warning module\n", + "or by setting POLARS_ALLOW_FORKING_THREAD=1.\n", + "\n", + " self.pid = os.fork()\n", + "Predicting DataLoader 0: 100%|████████████████████| 1/1 [00:00<00:00, 2.11it/s]\n", + "\u001b[32m2024-12-15 03:33:58.499\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.utils.pylogger\u001b[0m:\u001b[36minfo\u001b[0m:\u001b[36m88\u001b[0m - \u001b[1m[rank0] Output dir: /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/meds_torch/predict_output/2024-12-15_03-33-55_136346\u001b[0m\n", + "\u001b[32m2024-12-15 03:33:58.500\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmeds_torch.utils.pylogger\u001b[0m:\u001b[36minfo\u001b[0m:\u001b[36m88\u001b[0m - \u001b[1m[rank0] Closing wandb!\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mconfused-puddle-4324\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/noufattole/lightning-hydra-template/runs/svs5y5oi\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at: \u001b[34m\u001b[4mhttps://wandb.ai/noufattole/lightning-hydra-template\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./work_dir/mimiciv_demo/meds_torch/predict_output/2024-12-15_03-33-55_136346/wandb/run-20241215_033356-svs5y5oi/logs\u001b[0m\n" + ] + } + ], + "source": [ + "PREDICT_OUTPUT_DIR = ROOT_DIR + \"/meds_torch/predict_output/\"\n", + "predict_cmd = f\"\"\"\n", + "meds-torch-predict \\\n", + " experiment=triplet_mtr \\\n", + " ckpt_path={TRAIN_CKPT_PATH} \\\n", + " paths.data_dir={TENSOR_DIR} \\\n", + " paths.meds_cohort_dir={MEDS_DIR} \\\n", + " paths.output_dir={PREDICT_OUTPUT_DIR} \\\n", + " data.task_name={TASK_NAME} \\\n", + " data.task_root_dir={TASK_DIR} \\\n", + " data.do_include_subject_id=True \\\n", + " data.do_include_prediction_time=True \\\n", + " hydra.searchpath=[pkg://meds_torch.configs,{CONFIGS_DIR}/meds-torch-configs]\n", + "\"\"\"\n", + "! $predict_cmd" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "id": "p_D07KzxjVUl" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//meds_torch/predict_output/2024-12-15_03-33-55_136346//predict.parquet\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "shape: (22, 7)
subject_idprediction_timeboolean_valuepredicted_boolean_valuepredicted_boolean_probabilityembeddingslogits
i64datetime[ns]boolboolf64list[f64]list[f64]
100024952141-05-23 20:18:01falsefalse0.136801[-2.545923, 0.025286, … 0.705734][-1.842114]
100044222111-01-18 09:44:50falsefalse0.124603[-2.531284, 0.059085, … 0.699216][-1.949547]
100047202186-11-13 19:55:00truefalse0.113988[-2.485285, 0.113076, … 0.744316][-2.050634]
100047332174-12-05 11:28:24falsefalse0.109338[-2.510099, 0.121363, … 0.794356][-2.097525]
100079282129-04-07 00:25:00falsefalse0.116089[-2.485562, 0.094246, … 0.770296][-2.03]
100274452142-08-01 01:41:00falsefalse0.123286[-2.5613, 0.075122, … 0.75831][-1.96167]
100276022201-10-31 12:25:00falsefalse0.12781[-2.488924, 0.077882, … 0.719922][-1.92046]
100356312116-02-29 18:43:20falsefalse0.105775[-2.481711, 0.131169, … 0.778387][-2.134639]
100379752185-01-18 19:12:12truefalse0.107896[-2.522013, 0.146351, … 0.817377][-2.112419]
100397082140-01-24 18:08:00falsefalse0.121061[-2.535217, 0.070299, … 0.750187][-1.982419]
" + ], + "text/plain": [ + "shape: (22, 7)\n", + "┌────────────┬──────────────┬──────────────┬─────────────┬─────────────┬─────────────┬─────────────┐\n", + "│ subject_id ┆ prediction_t ┆ boolean_valu ┆ predicted_b ┆ predicted_b ┆ embeddings ┆ logits │\n", + "│ --- ┆ ime ┆ e ┆ oolean_valu ┆ oolean_prob ┆ --- ┆ --- │\n", + "│ i64 ┆ --- ┆ --- ┆ e ┆ ability ┆ list[f64] ┆ list[f64] │\n", + "│ ┆ datetime[ns] ┆ bool ┆ --- ┆ --- ┆ ┆ │\n", + "│ ┆ ┆ ┆ bool ┆ f64 ┆ ┆ │\n", + "╞════════════╪══════════════╪══════════════╪═════════════╪═════════════╪═════════════╪═════════════╡\n", + "│ 10002495 ┆ 2141-05-23 ┆ false ┆ false ┆ 0.136801 ┆ [-2.545923, ┆ [-1.842114] │\n", + "│ ┆ 20:18:01 ┆ ┆ ┆ ┆ 0.025286, … ┆ │\n", + "│ ┆ ┆ ┆ ┆ ┆ 0.7057… ┆ │\n", + "│ 10004422 ┆ 2111-01-18 ┆ false ┆ false ┆ 0.124603 ┆ [-2.531284, ┆ [-1.949547] │\n", + "│ ┆ 09:44:50 ┆ ┆ ┆ ┆ 0.059085, … ┆ │\n", + "│ ┆ ┆ ┆ ┆ ┆ 0.6992… ┆ │\n", + "│ 10004720 ┆ 2186-11-13 ┆ true ┆ false ┆ 0.113988 ┆ [-2.485285, ┆ [-2.050634] │\n", + "│ ┆ 19:55:00 ┆ ┆ ┆ ┆ 0.113076, … ┆ │\n", + "│ ┆ ┆ ┆ ┆ ┆ 0.7443… ┆ │\n", + "│ 10004733 ┆ 2174-12-05 ┆ false ┆ false ┆ 0.109338 ┆ [-2.510099, ┆ [-2.097525] │\n", + "│ ┆ 11:28:24 ┆ ┆ ┆ ┆ 0.121363, … ┆ │\n", + "│ ┆ ┆ ┆ ┆ ┆ 0.7943… ┆ │\n", + "│ 10007928 ┆ 2129-04-07 ┆ false ┆ false ┆ 0.116089 ┆ [-2.485562, ┆ [-2.03] │\n", + "│ ┆ 00:25:00 ┆ ┆ ┆ ┆ 0.094246, … ┆ │\n", + "│ ┆ ┆ ┆ ┆ ┆ 0.7702… ┆ │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 10027445 ┆ 2142-08-01 ┆ false ┆ false ┆ 0.123286 ┆ [-2.5613, ┆ [-1.96167] │\n", + "│ ┆ 01:41:00 ┆ ┆ ┆ ┆ 0.075122, … ┆ │\n", + "│ ┆ ┆ ┆ ┆ ┆ 0.75831] ┆ │\n", + "│ 10027602 ┆ 2201-10-31 ┆ false ┆ false ┆ 0.12781 ┆ [-2.488924, ┆ [-1.92046] │\n", + "│ ┆ 12:25:00 ┆ ┆ ┆ ┆ 0.077882, … ┆ │\n", + "│ ┆ ┆ ┆ ┆ ┆ 0.7199… ┆ │\n", + "│ 10035631 ┆ 2116-02-29 ┆ false ┆ false ┆ 0.105775 ┆ [-2.481711, ┆ [-2.134639] │\n", + "│ ┆ 18:43:20 ┆ ┆ ┆ ┆ 0.131169, … ┆ │\n", + "│ ┆ ┆ ┆ ┆ ┆ 0.7783… ┆ │\n", + "│ 10037975 ┆ 2185-01-18 ┆ true ┆ false ┆ 0.107896 ┆ [-2.522013, ┆ [-2.112419] │\n", + "│ ┆ 19:12:12 ┆ ┆ ┆ ┆ 0.146351, … ┆ │\n", + "│ ┆ ┆ ┆ ┆ ┆ 0.8173… ┆ │\n", + "│ 10039708 ┆ 2140-01-24 ┆ false ┆ false ┆ 0.121061 ┆ [-2.535217, ┆ [-1.982419] │\n", + "│ ┆ 18:08:00 ┆ ┆ ┆ ┆ 0.070299, … ┆ │\n", + "│ ┆ ┆ ┆ ┆ ┆ 0.7501… ┆ │\n", + "└────────────┴──────────────┴──────────────┴─────────────┴─────────────┴─────────────┴─────────────┘" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "PREDICT_OUTPUT_FP =!echo $(meds-torch-latest-dir path=$PREDICT_OUTPUT_DIR)/\n", + "PREDICT_OUTPUT_FP = PREDICT_OUTPUT_FP[0] + \"/predict.parquet\"\n", + "print(PREDICT_OUTPUT_FP)\n", + "predictions_df = pl.read_parquet(PREDICT_OUTPUT_FP)\n", + "predictions_df" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "np.float64(0.27499999999999997)" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.metrics import roc_auc_score\n", + "roc_auc_score(predictions_df['boolean_value'], predictions_df['predicted_boolean_probability'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "cbdBGJcNmWR7", + "outputId": "74ba78e4-cf98-4843-e7a5-ec1742d111b2" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting git+https://github.com/kamilest/meds-evaluation.git\n", + " Cloning https://github.com/kamilest/meds-evaluation.git to /tmp/pip-req-build-7m5po6_6\n", + " Running command git clone --filter=blob:none --quiet https://github.com/kamilest/meds-evaluation.git /tmp/pip-req-build-7m5po6_6\n", + " Resolved https://github.com/kamilest/meds-evaluation.git to commit 472e15298e385ae148a48dcb3f3c31a67398ef94\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting polars==1.5.0 (from meds-evaluation==0.1.dev76+g472e152)\n", + " Downloading polars-1.5.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (14 kB)\n", + "Requirement already satisfied: pyarrow in /usr/local/lib/python3.10/dist-packages (from meds-evaluation==0.1.dev76+g472e152) (17.0.0)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from meds-evaluation==0.1.dev76+g472e152) (1.26.4)\n", + "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from meds-evaluation==0.1.dev76+g472e152) (1.5.2)\n", + "Collecting hydra-core (from meds-evaluation==0.1.dev76+g472e152)\n", + " Downloading hydra_core-1.3.2-py3-none-any.whl.metadata (5.5 kB)\n", + "Collecting loguru (from meds-evaluation==0.1.dev76+g472e152)\n", + " Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)\n", + "Collecting omegaconf<2.4,>=2.2 (from hydra-core->meds-evaluation==0.1.dev76+g472e152)\n", + " Downloading omegaconf-2.3.0-py3-none-any.whl.metadata (3.9 kB)\n", + "Collecting antlr4-python3-runtime==4.9.* (from hydra-core->meds-evaluation==0.1.dev76+g472e152)\n", + " Downloading antlr4-python3-runtime-4.9.3.tar.gz (117 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.0/117.0 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from hydra-core->meds-evaluation==0.1.dev76+g472e152) (24.2)\n", + "Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->meds-evaluation==0.1.dev76+g472e152) (1.13.1)\n", + "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->meds-evaluation==0.1.dev76+g472e152) (1.4.2)\n", + "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->meds-evaluation==0.1.dev76+g472e152) (3.5.0)\n", + "Requirement already satisfied: PyYAML>=5.1.0 in /usr/local/lib/python3.10/dist-packages (from omegaconf<2.4,>=2.2->hydra-core->meds-evaluation==0.1.dev76+g472e152) (6.0.2)\n", + "Downloading polars-1.5.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (31.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m31.6/31.6 MB\u001b[0m \u001b[31m45.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading hydra_core-1.3.2-py3-none-any.whl (154 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m154.5/154.5 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading loguru-0.7.3-py3-none-any.whl (61 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.6/61.6 kB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading omegaconf-2.3.0-py3-none-any.whl (79 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.5/79.5 kB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hBuilding wheels for collected packages: meds-evaluation, antlr4-python3-runtime\n", + " Building wheel for meds-evaluation (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for meds-evaluation: filename=meds_evaluation-0.1.dev76+g472e152-py3-none-any.whl size=8159 sha256=6b616229fda46dba9c2b0f4a9a2cc5f2b9c61449b5e7b8ab9c5fb74f5d99640d\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-5etx7mzx/wheels/16/c4/cd/448d9248b6d2599d9dbced9e8a095cd4a2a328ca08547f5977\n", + " Building wheel for antlr4-python3-runtime (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for antlr4-python3-runtime: filename=antlr4_python3_runtime-4.9.3-py3-none-any.whl size=144555 sha256=1c21aaab011040f748cc9e6204177bb6a58f41165ab18ff91325191924af5ea3\n", + " Stored in directory: /root/.cache/pip/wheels/12/93/dd/1f6a127edc45659556564c5730f6d4e300888f4bca2d4c5a88\n", + "Successfully built meds-evaluation antlr4-python3-runtime\n", + "Installing collected packages: antlr4-python3-runtime, polars, omegaconf, loguru, hydra-core, meds-evaluation\n", + " Attempting uninstall: polars\n", + " Found existing installation: polars 1.9.0\n", + " Uninstalling polars-1.9.0:\n", + " Successfully uninstalled polars-1.9.0\n", + "Successfully installed antlr4-python3-runtime-4.9.3 hydra-core-1.3.2 loguru-0.7.3 meds-evaluation-0.1.dev76+g472e152 omegaconf-2.3.0 polars-1.5.0\n" + ] + }, + { + "data": { + "application/vnd.colab-display-data+json": { + "id": "2f4be86227d1412090215e11c35cb1b7", + "pip_warning": { + "packages": [ + "pydevd_plugins" + ] + } + } + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# !pip install git+https://github.com/kamilest/meds-evaluation.git" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "meds_demo", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.8" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} From f319e7023af1c5a2c3a1ec8e56c0b5c7e31b5a5c Mon Sep 17 00:00:00 2001 From: Nassim Date: Sun, 15 Dec 2024 04:14:08 -0500 Subject: [PATCH 8/8] updated cehrbert demo, runs e2e --- demo/meds_cehrbert.ipynb | 1436 ++++++++++++++++++++++++++++++++++---- 1 file changed, 1296 insertions(+), 140 deletions(-) diff --git a/demo/meds_cehrbert.ipynb b/demo/meds_cehrbert.ipynb index ea1e302..f5c2689 100644 --- a/demo/meds_cehrbert.ipynb +++ b/demo/meds_cehrbert.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 11, "id": "e4c2dca6", "metadata": {}, "outputs": [ @@ -21,103 +21,500 @@ "output_type": "stream", "text": [ "Collecting meds_reader==0.1.9\n", - " Downloading meds_reader-0.1.9-cp312-cp312-macosx_12_0_x86_64.whl.metadata (3.0 kB)\n", - "Requirement already satisfied: pyarrow>=9 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds_reader==0.1.9) (17.0.0)\n", - "Requirement already satisfied: meds==0.3.3 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds_reader==0.1.9) (0.3.3)\n", + " Downloading meds_reader-0.1.9-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)\n", + "Collecting pyarrow>=9 (from meds_reader==0.1.9)\n", + " Downloading pyarrow-18.1.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (3.3 kB)\n", + "Collecting meds==0.3.3 (from meds_reader==0.1.9)\n", + " Using cached meds-0.3.3-py3-none-any.whl.metadata (10 kB)\n", "Collecting numpy<2,>=1.16 (from meds_reader==0.1.9)\n", - " Using cached numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl.metadata (61 kB)\n", - "Requirement already satisfied: pandas>=2.2 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds_reader==0.1.9) (2.2.3)\n", - "Requirement already satisfied: jsonschema>=4.0.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds==0.3.3->meds_reader==0.1.9) (4.23.0)\n", - "Requirement already satisfied: typing-extensions>=4.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from meds==0.3.3->meds_reader==0.1.9) (4.12.2)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from pandas>=2.2->meds_reader==0.1.9) (2.9.0.post0)\n", - "Requirement already satisfied: pytz>=2020.1 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from pandas>=2.2->meds_reader==0.1.9) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from pandas>=2.2->meds_reader==0.1.9) (2024.2)\n", - "Requirement already satisfied: attrs>=22.2.0 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds==0.3.3->meds_reader==0.1.9) (24.2.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds==0.3.3->meds_reader==0.1.9) (2024.10.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds==0.3.3->meds_reader==0.1.9) (0.35.1)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from jsonschema>=4.0.0->meds==0.3.3->meds_reader==0.1.9) (0.22.3)\n", - "Requirement already satisfied: six>=1.5 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas>=2.2->meds_reader==0.1.9) (1.17.0)\n", - "Downloading meds_reader-0.1.9-cp312-cp312-macosx_12_0_x86_64.whl (3.3 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.3/3.3 MB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mta \u001b[36m0:00:01\u001b[0m\n", - "\u001b[?25hUsing cached numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl (20.3 MB)\n", - "Installing collected packages: numpy, meds_reader\n", - " Attempting uninstall: numpy\n", - " Found existing installation: numpy 2.2.0\n", - " Uninstalling numpy-2.2.0:\n", - " Successfully uninstalled numpy-2.2.0\n", - "Successfully installed meds_reader-0.1.9 numpy-1.26.4\n", - "Requirement already satisfied: setuptools in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (75.1.0)\n", + " Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)\n", + "Collecting pandas>=2.2 (from meds_reader==0.1.9)\n", + " Using cached pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)\n", + "Collecting jsonschema>=4.0.0 (from meds==0.3.3->meds_reader==0.1.9)\n", + " Using cached jsonschema-4.23.0-py3-none-any.whl.metadata (7.9 kB)\n", + "Requirement already satisfied: typing-extensions>=4.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from meds==0.3.3->meds_reader==0.1.9) (4.12.2)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from pandas>=2.2->meds_reader==0.1.9) (2.9.0.post0)\n", + "Collecting pytz>=2020.1 (from pandas>=2.2->meds_reader==0.1.9)\n", + " Using cached pytz-2024.2-py2.py3-none-any.whl.metadata (22 kB)\n", + "Collecting tzdata>=2022.7 (from pandas>=2.2->meds_reader==0.1.9)\n", + " Using cached tzdata-2024.2-py2.py3-none-any.whl.metadata (1.4 kB)\n", + "Collecting attrs>=22.2.0 (from jsonschema>=4.0.0->meds==0.3.3->meds_reader==0.1.9)\n", + " Using cached attrs-24.2.0-py3-none-any.whl.metadata (11 kB)\n", + "Collecting jsonschema-specifications>=2023.03.6 (from jsonschema>=4.0.0->meds==0.3.3->meds_reader==0.1.9)\n", + " Using cached jsonschema_specifications-2024.10.1-py3-none-any.whl.metadata (3.0 kB)\n", + "Collecting referencing>=0.28.4 (from jsonschema>=4.0.0->meds==0.3.3->meds_reader==0.1.9)\n", + " Using cached referencing-0.35.1-py3-none-any.whl.metadata (2.8 kB)\n", + "Collecting rpds-py>=0.7.1 (from jsonschema>=4.0.0->meds==0.3.3->meds_reader==0.1.9)\n", + " Downloading rpds_py-0.22.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.2 kB)\n", + "Requirement already satisfied: six>=1.5 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from python-dateutil>=2.8.2->pandas>=2.2->meds_reader==0.1.9) (1.17.0)\n", + "Downloading meds_reader-0.1.9-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.6/4.6 MB\u001b[0m \u001b[31m12.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hUsing cached meds-0.3.3-py3-none-any.whl (12 kB)\n", + "Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m18.3/18.3 MB\u001b[0m \u001b[31m87.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n", + "\u001b[?25hUsing cached pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.1 MB)\n", + "Downloading pyarrow-18.1.0-cp311-cp311-manylinux_2_28_x86_64.whl (40.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.1/40.1 MB\u001b[0m \u001b[31m143.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hUsing cached jsonschema-4.23.0-py3-none-any.whl (88 kB)\n", + "Using cached pytz-2024.2-py2.py3-none-any.whl (508 kB)\n", + "Using cached tzdata-2024.2-py2.py3-none-any.whl (346 kB)\n", + "Using cached attrs-24.2.0-py3-none-any.whl (63 kB)\n", + "Using cached jsonschema_specifications-2024.10.1-py3-none-any.whl (18 kB)\n", + "Using cached referencing-0.35.1-py3-none-any.whl (26 kB)\n", + "Downloading rpds_py-0.22.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (381 kB)\n", + "Installing collected packages: pytz, tzdata, rpds-py, pyarrow, numpy, attrs, referencing, pandas, jsonschema-specifications, jsonschema, meds, meds_reader\n", + "Successfully installed attrs-24.2.0 jsonschema-4.23.0 jsonschema-specifications-2024.10.1 meds-0.3.3 meds_reader-0.1.9 numpy-1.26.4 pandas-2.2.3 pyarrow-18.1.0 pytz-2024.2 referencing-0.35.1 rpds-py-0.22.3 tzdata-2024.2\n", + "Requirement already satisfied: setuptools in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (75.1.0)\n", "Collecting cehrbert==1.3.1\n", - " Downloading cehrbert-1.3.1-py3-none-any.whl.metadata (9.9 kB)\n", + " Using cached cehrbert-1.3.1-py3-none-any.whl.metadata (9.9 kB)\n", "Collecting dask==2024.1.1 (from cehrbert==1.3.1)\n", - " Downloading dask-2024.1.1-py3-none-any.whl.metadata (3.7 kB)\n", + " Using cached dask-2024.1.1-py3-none-any.whl.metadata (3.7 kB)\n", "Collecting datasets==2.16.1 (from cehrbert==1.3.1)\n", - " Downloading datasets-2.16.1-py3-none-any.whl.metadata (20 kB)\n", + " Using cached datasets-2.16.1-py3-none-any.whl.metadata (20 kB)\n", "Collecting evaluate==0.4.1 (from cehrbert==1.3.1)\n", - " Downloading evaluate-0.4.1-py3-none-any.whl.metadata (9.4 kB)\n", + " Using cached evaluate-0.4.1-py3-none-any.whl.metadata (9.4 kB)\n", "Collecting fast-ml==3.68 (from cehrbert==1.3.1)\n", - " Downloading fast_ml-3.68-py3-none-any.whl.metadata (12 kB)\n", + " Using cached fast_ml-3.68-py3-none-any.whl.metadata (12 kB)\n", "Collecting femr==0.2.0 (from cehrbert==1.3.1)\n", - " Downloading femr-0.2.0-py3-none-any.whl.metadata (11 kB)\n", + " Using cached femr-0.2.0-py3-none-any.whl.metadata (11 kB)\n", "Collecting Jinja2==3.1.3 (from cehrbert==1.3.1)\n", - " Downloading Jinja2-3.1.3-py3-none-any.whl.metadata (3.3 kB)\n", - "Requirement already satisfied: meds==0.3.3 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from cehrbert==1.3.1) (0.3.3)\n", - "Requirement already satisfied: meds_reader==0.1.9 in /Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages (from cehrbert==1.3.1) (0.1.9)\n", + " Using cached Jinja2-3.1.3-py3-none-any.whl.metadata (3.3 kB)\n", + "Requirement already satisfied: meds==0.3.3 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (0.3.3)\n", + "Requirement already satisfied: meds_reader==0.1.9 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (0.1.9)\n", "Collecting networkx==3.2.1 (from cehrbert==1.3.1)\n", - " Downloading networkx-3.2.1-py3-none-any.whl.metadata (5.2 kB)\n", + " Using cached networkx-3.2.1-py3-none-any.whl.metadata (5.2 kB)\n", "Collecting numpy==1.24.3 (from cehrbert==1.3.1)\n", - " Downloading numpy-1.24.3.tar.gz (10.9 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.9/10.9 MB\u001b[0m \u001b[31m30.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m0:01\u001b[0m\n", - "\u001b[?25h Installing build dependencies ... \u001b[?25ldone\n", - "\u001b[?25h Getting requirements to build wheel ... \u001b[?25lerror\n", - " \u001b[1;31merror\u001b[0m: \u001b[1msubprocess-exited-with-error\u001b[0m\n", - " \n", - " \u001b[31m×\u001b[0m \u001b[32mGetting requirements to build wheel\u001b[0m did not run successfully.\n", - " \u001b[31m│\u001b[0m exit code: \u001b[1;36m1\u001b[0m\n", - " \u001b[31m╰─>\u001b[0m \u001b[31m[33 lines of output]\u001b[0m\n", - " \u001b[31m \u001b[0m Traceback (most recent call last):\n", - " \u001b[31m \u001b[0m File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/pip/_vendor/pyproject_hooks/_in_process/_in_process.py\", line 353, in \n", - " \u001b[31m \u001b[0m main()\n", - " \u001b[31m \u001b[0m File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/pip/_vendor/pyproject_hooks/_in_process/_in_process.py\", line 335, in main\n", - " \u001b[31m \u001b[0m json_out['return_val'] = hook(**hook_input['kwargs'])\n", - " \u001b[31m \u001b[0m ^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - " \u001b[31m \u001b[0m File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/pip/_vendor/pyproject_hooks/_in_process/_in_process.py\", line 112, in get_requires_for_build_wheel\n", - " \u001b[31m \u001b[0m backend = _build_backend()\n", - " \u001b[31m \u001b[0m ^^^^^^^^^^^^^^^^\n", - " \u001b[31m \u001b[0m File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/site-packages/pip/_vendor/pyproject_hooks/_in_process/_in_process.py\", line 77, in _build_backend\n", - " \u001b[31m \u001b[0m obj = import_module(mod_path)\n", - " \u001b[31m \u001b[0m ^^^^^^^^^^^^^^^^^^^^^^^\n", - " \u001b[31m \u001b[0m File \"/Users/sim/miniconda3/envs/dev/lib/python3.12/importlib/__init__.py\", line 90, in import_module\n", - " \u001b[31m \u001b[0m return _bootstrap._gcd_import(name[level:], package, level)\n", - " \u001b[31m \u001b[0m ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - " \u001b[31m \u001b[0m File \"\", line 1387, in _gcd_import\n", - " \u001b[31m \u001b[0m File \"\", line 1360, in _find_and_load\n", - " \u001b[31m \u001b[0m File \"\", line 1310, in _find_and_load_unlocked\n", - " \u001b[31m \u001b[0m File \"\", line 488, in _call_with_frames_removed\n", - " \u001b[31m \u001b[0m File \"\", line 1387, in _gcd_import\n", - " \u001b[31m \u001b[0m File \"\", line 1360, in _find_and_load\n", - " \u001b[31m \u001b[0m File \"\", line 1331, in _find_and_load_unlocked\n", - " \u001b[31m \u001b[0m File \"\", line 935, in _load_unlocked\n", - " \u001b[31m \u001b[0m File \"\", line 999, in exec_module\n", - " \u001b[31m \u001b[0m File \"\", line 488, in _call_with_frames_removed\n", - " \u001b[31m \u001b[0m File \"/private/var/folders/fm/gwbmjrfx6gxg6x7xxcxbnrth0000gn/T/pip-build-env-4n11flhi/overlay/lib/python3.12/site-packages/setuptools/__init__.py\", line 16, in \n", - " \u001b[31m \u001b[0m import setuptools.version\n", - " \u001b[31m \u001b[0m File \"/private/var/folders/fm/gwbmjrfx6gxg6x7xxcxbnrth0000gn/T/pip-build-env-4n11flhi/overlay/lib/python3.12/site-packages/setuptools/version.py\", line 1, in \n", - " \u001b[31m \u001b[0m import pkg_resources\n", - " \u001b[31m \u001b[0m File \"/private/var/folders/fm/gwbmjrfx6gxg6x7xxcxbnrth0000gn/T/pip-build-env-4n11flhi/overlay/lib/python3.12/site-packages/pkg_resources/__init__.py\", line 2172, in \n", - " \u001b[31m \u001b[0m register_finder(pkgutil.ImpImporter, find_on_path)\n", - " \u001b[31m \u001b[0m ^^^^^^^^^^^^^^^^^^^\n", - " \u001b[31m \u001b[0m AttributeError: module 'pkgutil' has no attribute 'ImpImporter'. Did you mean: 'zipimporter'?\n", - " \u001b[31m \u001b[0m \u001b[31m[end of output]\u001b[0m\n", - " \n", - " \u001b[1;35mnote\u001b[0m: This error originates from a subprocess, and is likely not a problem with pip.\n", - "\u001b[?25h\u001b[1;31merror\u001b[0m: \u001b[1msubprocess-exited-with-error\u001b[0m\n", - "\n", - "\u001b[31m×\u001b[0m \u001b[32mGetting requirements to build wheel\u001b[0m did not run successfully.\n", - "\u001b[31m│\u001b[0m exit code: \u001b[1;36m1\u001b[0m\n", - "\u001b[31m╰─>\u001b[0m See above for output.\n", - "\n", - "\u001b[1;35mnote\u001b[0m: This error originates from a subprocess, and is likely not a problem with pip.\n" + " Downloading numpy-1.24.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)\n", + "Collecting packaging==23.2 (from cehrbert==1.3.1)\n", + " Downloading packaging-23.2-py3-none-any.whl.metadata (3.2 kB)\n", + "Collecting pandas==2.2.0 (from cehrbert==1.3.1)\n", + " Downloading pandas-2.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)\n", + "Collecting peft==0.10.0 (from cehrbert==1.3.1)\n", + " Downloading peft-0.10.0-py3-none-any.whl.metadata (13 kB)\n", + "Collecting Pillow==10.3.0 (from cehrbert==1.3.1)\n", + " Downloading pillow-10.3.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (9.2 kB)\n", + "Collecting pyarrow==15.0.0 (from cehrbert==1.3.1)\n", + " Downloading pyarrow-15.0.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (3.0 kB)\n", + "Collecting pydantic==2.6.0 (from cehrbert==1.3.1)\n", + " Downloading pydantic-2.6.0-py3-none-any.whl.metadata (81 kB)\n", + "Collecting python-dateutil==2.8.2 (from cehrbert==1.3.1)\n", + " Downloading python_dateutil-2.8.2-py2.py3-none-any.whl.metadata (8.2 kB)\n", + "Collecting PyYAML==6.0.1 (from cehrbert==1.3.1)\n", + " Downloading PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.1 kB)\n", + "Collecting scikit-learn==1.4.0 (from cehrbert==1.3.1)\n", + " Downloading scikit_learn-1.4.0-1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)\n", + "Collecting scipy==1.12.0 (from cehrbert==1.3.1)\n", + " Downloading scipy-1.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)\n", + "Collecting tensorflow==2.15.0 (from cehrbert==1.3.1)\n", + " Downloading tensorflow-2.15.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)\n", + "Collecting tensorflow-datasets==4.5.2 (from cehrbert==1.3.1)\n", + " Downloading tensorflow_datasets-4.5.2-py3-none-any.whl.metadata (6.1 kB)\n", + "Collecting tqdm==4.66.1 (from cehrbert==1.3.1)\n", + " Downloading tqdm-4.66.1-py3-none-any.whl.metadata (57 kB)\n", + "Collecting torch==2.4.0 (from cehrbert==1.3.1)\n", + " Downloading torch-2.4.0-cp311-cp311-manylinux1_x86_64.whl.metadata (26 kB)\n", + "Collecting tokenizers==0.19.0 (from cehrbert==1.3.1)\n", + " Downloading tokenizers-0.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)\n", + "Collecting transformers==4.40.0 (from cehrbert==1.3.1)\n", + " Downloading transformers-4.40.0-py3-none-any.whl.metadata (137 kB)\n", + "Collecting accelerate==0.31.0 (from cehrbert==1.3.1)\n", + " Downloading accelerate-0.31.0-py3-none-any.whl.metadata (19 kB)\n", + "Collecting Werkzeug==3.0.1 (from cehrbert==1.3.1)\n", + " Downloading werkzeug-3.0.1-py3-none-any.whl.metadata (4.1 kB)\n", + "Collecting wandb==0.17.8 (from cehrbert==1.3.1)\n", + " Downloading wandb-0.17.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)\n", + "Collecting xgboost==2.0.3 (from cehrbert==1.3.1)\n", + " Downloading xgboost-2.0.3-py3-none-manylinux2014_x86_64.whl.metadata (2.0 kB)\n", + "Collecting cehrbert_data==0.0.4 (from cehrbert==1.3.1)\n", + " Downloading cehrbert_data-0.0.4-py3-none-any.whl.metadata (5.7 kB)\n", + "Requirement already satisfied: psutil in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from accelerate==0.31.0->cehrbert==1.3.1) (6.1.0)\n", + "Collecting huggingface-hub (from accelerate==0.31.0->cehrbert==1.3.1)\n", + " Using cached huggingface_hub-0.26.5-py3-none-any.whl.metadata (13 kB)\n", + "Collecting safetensors>=0.3.1 (from accelerate==0.31.0->cehrbert==1.3.1)\n", + " Using cached safetensors-0.4.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)\n", + "Collecting pyspark==3.1.2 (from cehrbert_data==0.0.4->cehrbert==1.3.1)\n", + " Downloading pyspark-3.1.2.tar.gz (212.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.4/212.4 MB\u001b[0m \u001b[31m80.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25ldone\n", + "\u001b[?25hCollecting click>=8.1 (from dask==2024.1.1->cehrbert==1.3.1)\n", + " Using cached click-8.1.7-py3-none-any.whl.metadata (3.0 kB)\n", + "Collecting cloudpickle>=1.5.0 (from dask==2024.1.1->cehrbert==1.3.1)\n", + " Using cached cloudpickle-3.1.0-py3-none-any.whl.metadata (7.0 kB)\n", + "Collecting fsspec>=2021.09.0 (from dask==2024.1.1->cehrbert==1.3.1)\n", + " Using cached fsspec-2024.10.0-py3-none-any.whl.metadata (11 kB)\n", + "Collecting partd>=1.2.0 (from dask==2024.1.1->cehrbert==1.3.1)\n", + " Downloading partd-1.4.2-py3-none-any.whl.metadata (4.6 kB)\n", + "Collecting toolz>=0.10.0 (from dask==2024.1.1->cehrbert==1.3.1)\n", + " Downloading toolz-1.0.0-py3-none-any.whl.metadata (5.1 kB)\n", + "Collecting importlib-metadata>=4.13.0 (from dask==2024.1.1->cehrbert==1.3.1)\n", + " Downloading importlib_metadata-8.5.0-py3-none-any.whl.metadata (4.8 kB)\n", + "Collecting filelock (from datasets==2.16.1->cehrbert==1.3.1)\n", + " Using cached filelock-3.16.1-py3-none-any.whl.metadata (2.9 kB)\n", + "Collecting pyarrow-hotfix (from datasets==2.16.1->cehrbert==1.3.1)\n", + " Downloading pyarrow_hotfix-0.6-py3-none-any.whl.metadata (3.6 kB)\n", + "Collecting dill<0.3.8,>=0.3.0 (from datasets==2.16.1->cehrbert==1.3.1)\n", + " Downloading dill-0.3.7-py3-none-any.whl.metadata (9.9 kB)\n", + "Collecting requests>=2.19.0 (from datasets==2.16.1->cehrbert==1.3.1)\n", + " Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)\n", + "Collecting xxhash (from datasets==2.16.1->cehrbert==1.3.1)\n", + " Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n", + "Collecting multiprocess (from datasets==2.16.1->cehrbert==1.3.1)\n", + " Downloading multiprocess-0.70.17-py311-none-any.whl.metadata (7.2 kB)\n", + "Collecting fsspec>=2021.09.0 (from dask==2024.1.1->cehrbert==1.3.1)\n", + " Downloading fsspec-2023.10.0-py3-none-any.whl.metadata (6.8 kB)\n", + "Collecting aiohttp (from datasets==2.16.1->cehrbert==1.3.1)\n", + " Downloading aiohttp-3.11.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.7 kB)\n", + "Collecting responses<0.19 (from evaluate==0.4.1->cehrbert==1.3.1)\n", + " Downloading responses-0.18.0-py3-none-any.whl.metadata (29 kB)\n", + "Collecting zstandard>=0.18 (from femr==0.2.0->cehrbert==1.3.1)\n", + " Downloading zstandard-0.23.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)\n", + "Collecting icecream==2.1.3 (from femr==0.2.0->cehrbert==1.3.1)\n", + " Downloading icecream-2.1.3-py2.py3-none-any.whl.metadata (1.4 kB)\n", + "Collecting nptyping==2.4.1 (from femr==0.2.0->cehrbert==1.3.1)\n", + " Downloading nptyping-2.4.1-py3-none-any.whl.metadata (7.7 kB)\n", + "Collecting msgpack>=1.0.5 (from femr==0.2.0->cehrbert==1.3.1)\n", + " Downloading msgpack-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.4 kB)\n", + "Collecting MarkupSafe>=2.0 (from Jinja2==3.1.3->cehrbert==1.3.1)\n", + " Using cached MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.0 kB)\n", + "Requirement already satisfied: jsonschema>=4.0.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from meds==0.3.3->cehrbert==1.3.1) (4.23.0)\n", + "Requirement already satisfied: typing-extensions>=4.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from meds==0.3.3->cehrbert==1.3.1) (4.12.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from pandas==2.2.0->cehrbert==1.3.1) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from pandas==2.2.0->cehrbert==1.3.1) (2024.2)\n", + "Collecting annotated-types>=0.4.0 (from pydantic==2.6.0->cehrbert==1.3.1)\n", + " Using cached annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)\n", + "Collecting pydantic-core==2.16.1 (from pydantic==2.6.0->cehrbert==1.3.1)\n", + " Downloading pydantic_core-2.16.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.5 kB)\n", + "Requirement already satisfied: six>=1.5 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from python-dateutil==2.8.2->cehrbert==1.3.1) (1.17.0)\n", + "Collecting joblib>=1.2.0 (from scikit-learn==1.4.0->cehrbert==1.3.1)\n", + " Using cached joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)\n", + "Collecting threadpoolctl>=2.0.0 (from scikit-learn==1.4.0->cehrbert==1.3.1)\n", + " Using cached threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)\n", + "Collecting absl-py>=1.0.0 (from tensorflow==2.15.0->cehrbert==1.3.1)\n", + " Using cached absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)\n", + "Collecting astunparse>=1.6.0 (from tensorflow==2.15.0->cehrbert==1.3.1)\n", + " Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)\n", + "Collecting flatbuffers>=23.5.26 (from tensorflow==2.15.0->cehrbert==1.3.1)\n", + " Downloading flatbuffers-24.3.25-py2.py3-none-any.whl.metadata (850 bytes)\n", + "Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow==2.15.0->cehrbert==1.3.1)\n", + " Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)\n", + "Collecting google-pasta>=0.1.1 (from tensorflow==2.15.0->cehrbert==1.3.1)\n", + " Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)\n", + "Collecting h5py>=2.9.0 (from tensorflow==2.15.0->cehrbert==1.3.1)\n", + " Downloading h5py-3.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.5 kB)\n", + "Collecting libclang>=13.0.0 (from tensorflow==2.15.0->cehrbert==1.3.1)\n", + " Downloading libclang-18.1.1-py2.py3-none-manylinux2010_x86_64.whl.metadata (5.2 kB)\n", + "Collecting ml-dtypes~=0.2.0 (from tensorflow==2.15.0->cehrbert==1.3.1)\n", + " Downloading ml_dtypes-0.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)\n", + "Collecting opt-einsum>=2.3.2 (from tensorflow==2.15.0->cehrbert==1.3.1)\n", + " Downloading opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 kB)\n", + "Collecting protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3 (from tensorflow==2.15.0->cehrbert==1.3.1)\n", + " Downloading protobuf-4.25.5-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)\n", + "Requirement already satisfied: setuptools in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from tensorflow==2.15.0->cehrbert==1.3.1) (75.1.0)\n", + "Collecting termcolor>=1.1.0 (from tensorflow==2.15.0->cehrbert==1.3.1)\n", + " Using cached termcolor-2.5.0-py3-none-any.whl.metadata (6.1 kB)\n", + "Collecting wrapt<1.15,>=1.11.0 (from tensorflow==2.15.0->cehrbert==1.3.1)\n", + " Downloading wrapt-1.14.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)\n", + "Collecting tensorflow-io-gcs-filesystem>=0.23.1 (from tensorflow==2.15.0->cehrbert==1.3.1)\n", + " Downloading tensorflow_io_gcs_filesystem-0.37.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (14 kB)\n", + "Collecting grpcio<2.0,>=1.24.3 (from tensorflow==2.15.0->cehrbert==1.3.1)\n", + " Downloading grpcio-1.68.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.9 kB)\n", + "Collecting tensorboard<2.16,>=2.15 (from tensorflow==2.15.0->cehrbert==1.3.1)\n", + " Downloading tensorboard-2.15.2-py3-none-any.whl.metadata (1.7 kB)\n", + "Collecting tensorflow-estimator<2.16,>=2.15.0 (from tensorflow==2.15.0->cehrbert==1.3.1)\n", + " Downloading tensorflow_estimator-2.15.0-py2.py3-none-any.whl.metadata (1.3 kB)\n", + "Collecting keras<2.16,>=2.15.0 (from tensorflow==2.15.0->cehrbert==1.3.1)\n", + " Downloading keras-2.15.0-py3-none-any.whl.metadata (2.4 kB)\n", + "Collecting promise (from tensorflow-datasets==4.5.2->cehrbert==1.3.1)\n", + " Downloading promise-2.3.tar.gz (19 kB)\n", + " Preparing metadata (setup.py) ... \u001b[?25ldone\n", + "\u001b[?25hCollecting tensorflow-metadata (from tensorflow-datasets==4.5.2->cehrbert==1.3.1)\n", + " Downloading tensorflow_metadata-1.16.1-py3-none-any.whl.metadata (2.4 kB)\n", + "Collecting sympy (from torch==2.4.0->cehrbert==1.3.1)\n", + " Using cached sympy-1.13.3-py3-none-any.whl.metadata (12 kB)\n", + "Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.4.0->cehrbert==1.3.1)\n", + " Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch==2.4.0->cehrbert==1.3.1)\n", + " Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch==2.4.0->cehrbert==1.3.1)\n", + " Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch==2.4.0->cehrbert==1.3.1)\n", + " Using cached nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-cublas-cu12==12.1.3.1 (from torch==2.4.0->cehrbert==1.3.1)\n", + " Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cufft-cu12==11.0.2.54 (from torch==2.4.0->cehrbert==1.3.1)\n", + " Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-curand-cu12==10.3.2.106 (from torch==2.4.0->cehrbert==1.3.1)\n", + " Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cusolver-cu12==11.4.5.107 (from torch==2.4.0->cehrbert==1.3.1)\n", + " Using cached nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-cusparse-cu12==12.1.0.106 (from torch==2.4.0->cehrbert==1.3.1)\n", + " Using cached nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-nccl-cu12==2.20.5 (from torch==2.4.0->cehrbert==1.3.1)\n", + " Using cached nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl.metadata (1.8 kB)\n", + "Collecting nvidia-nvtx-cu12==12.1.105 (from torch==2.4.0->cehrbert==1.3.1)\n", + " Using cached nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.7 kB)\n", + "Collecting triton==3.0.0 (from torch==2.4.0->cehrbert==1.3.1)\n", + " Downloading triton-3.0.0-1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.3 kB)\n", + "Collecting regex!=2019.12.17 (from transformers==4.40.0->cehrbert==1.3.1)\n", + " Downloading regex-2024.11.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)\n", + "Collecting docker-pycreds>=0.4.0 (from wandb==0.17.8->cehrbert==1.3.1)\n", + " Using cached docker_pycreds-0.4.0-py2.py3-none-any.whl.metadata (1.8 kB)\n", + "Collecting gitpython!=3.1.29,>=1.0.0 (from wandb==0.17.8->cehrbert==1.3.1)\n", + " Using cached GitPython-3.1.43-py3-none-any.whl.metadata (13 kB)\n", + "Requirement already satisfied: platformdirs in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from wandb==0.17.8->cehrbert==1.3.1) (4.3.6)\n", + "Collecting sentry-sdk>=1.0.0 (from wandb==0.17.8->cehrbert==1.3.1)\n", + " Using cached sentry_sdk-2.19.2-py2.py3-none-any.whl.metadata (9.9 kB)\n", + "Collecting setproctitle (from wandb==0.17.8->cehrbert==1.3.1)\n", + " Downloading setproctitle-1.3.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)\n", + "Collecting colorama>=0.3.9 (from icecream==2.1.3->femr==0.2.0->cehrbert==1.3.1)\n", + " Using cached colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)\n", + "Requirement already satisfied: pygments>=2.2.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from icecream==2.1.3->femr==0.2.0->cehrbert==1.3.1) (2.18.0)\n", + "Requirement already satisfied: executing>=0.3.1 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from icecream==2.1.3->femr==0.2.0->cehrbert==1.3.1) (2.1.0)\n", + "Requirement already satisfied: asttokens>=2.0.1 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from icecream==2.1.3->femr==0.2.0->cehrbert==1.3.1) (3.0.0)\n", + "Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch==2.4.0->cehrbert==1.3.1)\n", + " Downloading nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl.metadata (1.5 kB)\n", + "Collecting py4j==0.10.9 (from pyspark==3.1.2->cehrbert_data==0.0.4->cehrbert==1.3.1)\n", + " Downloading py4j-0.10.9-py2.py3-none-any.whl.metadata (1.3 kB)\n", + "Requirement already satisfied: wheel<1.0,>=0.23.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from astunparse>=1.6.0->tensorflow==2.15.0->cehrbert==1.3.1) (0.44.0)\n", + "Collecting aiohappyeyeballs>=2.3.0 (from aiohttp->datasets==2.16.1->cehrbert==1.3.1)\n", + " Using cached aiohappyeyeballs-2.4.4-py3-none-any.whl.metadata (6.1 kB)\n", + "Collecting aiosignal>=1.1.2 (from aiohttp->datasets==2.16.1->cehrbert==1.3.1)\n", + " Using cached aiosignal-1.3.2-py2.py3-none-any.whl.metadata (3.8 kB)\n", + "Requirement already satisfied: attrs>=17.3.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from aiohttp->datasets==2.16.1->cehrbert==1.3.1) (24.2.0)\n", + "Collecting frozenlist>=1.1.1 (from aiohttp->datasets==2.16.1->cehrbert==1.3.1)\n", + " Downloading frozenlist-1.5.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)\n", + "Collecting multidict<7.0,>=4.5 (from aiohttp->datasets==2.16.1->cehrbert==1.3.1)\n", + " Downloading multidict-6.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.0 kB)\n", + "Collecting propcache>=0.2.0 (from aiohttp->datasets==2.16.1->cehrbert==1.3.1)\n", + " Downloading propcache-0.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.2 kB)\n", + "Collecting yarl<2.0,>=1.17.0 (from aiohttp->datasets==2.16.1->cehrbert==1.3.1)\n", + " Downloading yarl-1.18.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (69 kB)\n", + "Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.29,>=1.0.0->wandb==0.17.8->cehrbert==1.3.1)\n", + " Using cached gitdb-4.0.11-py3-none-any.whl.metadata (1.2 kB)\n", + "Collecting zipp>=3.20 (from importlib-metadata>=4.13.0->dask==2024.1.1->cehrbert==1.3.1)\n", + " Downloading zipp-3.21.0-py3-none-any.whl.metadata (3.7 kB)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from jsonschema>=4.0.0->meds==0.3.3->cehrbert==1.3.1) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from jsonschema>=4.0.0->meds==0.3.3->cehrbert==1.3.1) (0.35.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from jsonschema>=4.0.0->meds==0.3.3->cehrbert==1.3.1) (0.22.3)\n", + "Collecting locket (from partd>=1.2.0->dask==2024.1.1->cehrbert==1.3.1)\n", + " Downloading locket-1.0.0-py2.py3-none-any.whl.metadata (2.8 kB)\n", + "Collecting charset-normalizer<4,>=2 (from requests>=2.19.0->datasets==2.16.1->cehrbert==1.3.1)\n", + " Using cached charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (34 kB)\n", + "Collecting idna<4,>=2.5 (from requests>=2.19.0->datasets==2.16.1->cehrbert==1.3.1)\n", + " Using cached idna-3.10-py3-none-any.whl.metadata (10 kB)\n", + "Collecting urllib3<3,>=1.21.1 (from requests>=2.19.0->datasets==2.16.1->cehrbert==1.3.1)\n", + " Using cached urllib3-2.2.3-py3-none-any.whl.metadata (6.5 kB)\n", + "Collecting certifi>=2017.4.17 (from requests>=2.19.0->datasets==2.16.1->cehrbert==1.3.1)\n", + " Using cached certifi-2024.12.14-py3-none-any.whl.metadata (2.3 kB)\n", + "Collecting google-auth<3,>=1.6.3 (from tensorboard<2.16,>=2.15->tensorflow==2.15.0->cehrbert==1.3.1)\n", + " Downloading google_auth-2.37.0-py2.py3-none-any.whl.metadata (4.8 kB)\n", + "Collecting google-auth-oauthlib<2,>=0.5 (from tensorboard<2.16,>=2.15->tensorflow==2.15.0->cehrbert==1.3.1)\n", + " Downloading google_auth_oauthlib-1.2.1-py2.py3-none-any.whl.metadata (2.7 kB)\n", + "Collecting markdown>=2.6.8 (from tensorboard<2.16,>=2.15->tensorflow==2.15.0->cehrbert==1.3.1)\n", + " Using cached Markdown-3.7-py3-none-any.whl.metadata (7.0 kB)\n", + "Collecting tensorboard-data-server<0.8.0,>=0.7.0 (from tensorboard<2.16,>=2.15->tensorflow==2.15.0->cehrbert==1.3.1)\n", + " Using cached tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl.metadata (1.1 kB)\n", + "INFO: pip is looking at multiple versions of multiprocess to determine which version is compatible with other requirements. This could take a while.\n", + "Collecting multiprocess (from datasets==2.16.1->cehrbert==1.3.1)\n", + " Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)\n", + " Downloading multiprocess-0.70.15-py311-none-any.whl.metadata (7.2 kB)\n", + "Collecting mpmath<1.4,>=1.1.0 (from sympy->torch==2.4.0->cehrbert==1.3.1)\n", + " Using cached mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)\n", + "Collecting googleapis-common-protos<2,>=1.56.4 (from tensorflow-metadata->tensorflow-datasets==4.5.2->cehrbert==1.3.1)\n", + " Downloading googleapis_common_protos-1.66.0-py2.py3-none-any.whl.metadata (1.5 kB)\n", + "Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython!=3.1.29,>=1.0.0->wandb==0.17.8->cehrbert==1.3.1)\n", + " Using cached smmap-5.0.1-py3-none-any.whl.metadata (4.3 kB)\n", + "Collecting cachetools<6.0,>=2.0.0 (from google-auth<3,>=1.6.3->tensorboard<2.16,>=2.15->tensorflow==2.15.0->cehrbert==1.3.1)\n", + " Downloading cachetools-5.5.0-py3-none-any.whl.metadata (5.3 kB)\n", + "Collecting pyasn1-modules>=0.2.1 (from google-auth<3,>=1.6.3->tensorboard<2.16,>=2.15->tensorflow==2.15.0->cehrbert==1.3.1)\n", + " Downloading pyasn1_modules-0.4.1-py3-none-any.whl.metadata (3.5 kB)\n", + "Collecting rsa<5,>=3.1.4 (from google-auth<3,>=1.6.3->tensorboard<2.16,>=2.15->tensorflow==2.15.0->cehrbert==1.3.1)\n", + " Downloading rsa-4.9-py3-none-any.whl.metadata (4.2 kB)\n", + "Collecting requests-oauthlib>=0.7.0 (from google-auth-oauthlib<2,>=0.5->tensorboard<2.16,>=2.15->tensorflow==2.15.0->cehrbert==1.3.1)\n", + " Downloading requests_oauthlib-2.0.0-py2.py3-none-any.whl.metadata (11 kB)\n", + "Collecting pyasn1<0.7.0,>=0.4.6 (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard<2.16,>=2.15->tensorflow==2.15.0->cehrbert==1.3.1)\n", + " Downloading pyasn1-0.6.1-py3-none-any.whl.metadata (8.4 kB)\n", + "Collecting oauthlib>=3.0.0 (from requests-oauthlib>=0.7.0->google-auth-oauthlib<2,>=0.5->tensorboard<2.16,>=2.15->tensorflow==2.15.0->cehrbert==1.3.1)\n", + " Downloading oauthlib-3.2.2-py3-none-any.whl.metadata (7.5 kB)\n", + "Downloading cehrbert-1.3.1-py3-none-any.whl (139 kB)\n", + "Downloading accelerate-0.31.0-py3-none-any.whl (309 kB)\n", + "Downloading cehrbert_data-0.0.4-py3-none-any.whl (77 kB)\n", + "Downloading dask-2024.1.1-py3-none-any.whl (1.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m86.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading datasets-2.16.1-py3-none-any.whl (507 kB)\n", + "Downloading evaluate-0.4.1-py3-none-any.whl (84 kB)\n", + "Downloading fast_ml-3.68-py3-none-any.whl (42 kB)\n", + "Downloading femr-0.2.0-py3-none-any.whl (84 kB)\n", + "Downloading Jinja2-3.1.3-py3-none-any.whl (133 kB)\n", + "Downloading networkx-3.2.1-py3-none-any.whl (1.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m134.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading numpy-1.24.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.3/17.3 MB\u001b[0m \u001b[31m143.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading packaging-23.2-py3-none-any.whl (53 kB)\n", + "Downloading pandas-2.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.0/13.0 MB\u001b[0m \u001b[31m147.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading peft-0.10.0-py3-none-any.whl (199 kB)\n", + "Downloading pillow-10.3.0-cp311-cp311-manylinux_2_28_x86_64.whl (4.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m135.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pyarrow-15.0.0-cp311-cp311-manylinux_2_28_x86_64.whl (38.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m38.3/38.3 MB\u001b[0m \u001b[31m108.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading pydantic-2.6.0-py3-none-any.whl (394 kB)\n", + "Downloading python_dateutil-2.8.2-py2.py3-none-any.whl (247 kB)\n", + "Downloading PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (757 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m757.7/757.7 kB\u001b[0m \u001b[31m71.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading scikit_learn-1.4.0-1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.1/12.1 MB\u001b[0m \u001b[31m93.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading scipy-1.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (38.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m38.4/38.4 MB\u001b[0m \u001b[31m134.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading tensorflow-2.15.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (475.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m475.3/475.3 MB\u001b[0m \u001b[31m71.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading tensorflow_datasets-4.5.2-py3-none-any.whl (4.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.2/4.2 MB\u001b[0m \u001b[31m49.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading tokenizers-0.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m107.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading torch-2.4.0-cp311-cp311-manylinux1_x86_64.whl (797.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m797.3/797.3 MB\u001b[0m \u001b[31m69.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading tqdm-4.66.1-py3-none-any.whl (78 kB)\n", + "Downloading transformers-4.40.0-py3-none-any.whl (9.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.0/9.0 MB\u001b[0m \u001b[31m156.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading wandb-0.17.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.4/9.4 MB\u001b[0m \u001b[31m76.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading werkzeug-3.0.1-py3-none-any.whl (226 kB)\n", + "Downloading xgboost-2.0.3-py3-none-manylinux2014_x86_64.whl (297.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m297.1/297.1 MB\u001b[0m \u001b[31m100.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading icecream-2.1.3-py2.py3-none-any.whl (8.4 kB)\n", + "Downloading nptyping-2.4.1-py3-none-any.whl (36 kB)\n", + "Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n", + "Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n", + "Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n", + "Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n", + "Using cached nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)\n", + "Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n", + "Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n", + "Using cached nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n", + "Using cached nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n", + "Using cached nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl (176.2 MB)\n", + "Using cached nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n", + "Downloading pydantic_core-2.16.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m111.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading triton-3.0.0-1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (209.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m209.4/209.4 MB\u001b[0m \u001b[31m117.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading py4j-0.10.9-py2.py3-none-any.whl (198 kB)\n", + "Using cached absl_py-2.1.0-py3-none-any.whl (133 kB)\n", + "Using cached annotated_types-0.7.0-py3-none-any.whl (13 kB)\n", + "Downloading astunparse-1.6.3-py2.py3-none-any.whl (12 kB)\n", + "Using cached click-8.1.7-py3-none-any.whl (97 kB)\n", + "Using cached cloudpickle-3.1.0-py3-none-any.whl (22 kB)\n", + "Downloading dill-0.3.7-py3-none-any.whl (115 kB)\n", + "Using cached docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)\n", + "Downloading flatbuffers-24.3.25-py2.py3-none-any.whl (26 kB)\n", + "Downloading fsspec-2023.10.0-py3-none-any.whl (166 kB)\n", + "Downloading aiohttp-3.11.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m131.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading gast-0.6.0-py3-none-any.whl (21 kB)\n", + "Using cached GitPython-3.1.43-py3-none-any.whl (207 kB)\n", + "Downloading google_pasta-0.2.0-py3-none-any.whl (57 kB)\n", + "Downloading grpcio-1.68.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m209.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading h5py-3.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m139.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hUsing cached huggingface_hub-0.26.5-py3-none-any.whl (447 kB)\n", + "Downloading importlib_metadata-8.5.0-py3-none-any.whl (26 kB)\n", + "Using cached joblib-1.4.2-py3-none-any.whl (301 kB)\n", + "Downloading keras-2.15.0-py3-none-any.whl (1.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m133.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading libclang-18.1.1-py2.py3-none-manylinux2010_x86_64.whl (24.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.5/24.5 MB\u001b[0m \u001b[31m201.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hUsing cached MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (23 kB)\n", + "Downloading ml_dtypes-0.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m125.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading msgpack-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (403 kB)\n", + "Downloading opt_einsum-3.4.0-py3-none-any.whl (71 kB)\n", + "Downloading partd-1.4.2-py3-none-any.whl (18 kB)\n", + "Downloading protobuf-4.25.5-cp37-abi3-manylinux2014_x86_64.whl (294 kB)\n", + "Downloading regex-2024.11.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (792 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m792.7/792.7 kB\u001b[0m \u001b[31m79.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hUsing cached requests-2.32.3-py3-none-any.whl (64 kB)\n", + "Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n", + "Using cached safetensors-0.4.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (435 kB)\n", + "Using cached sentry_sdk-2.19.2-py2.py3-none-any.whl (322 kB)\n", + "Downloading tensorboard-2.15.2-py3-none-any.whl (5.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m166.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading tensorflow_estimator-2.15.0-py2.py3-none-any.whl (441 kB)\n", + "Downloading tensorflow_io_gcs_filesystem-0.37.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.1/5.1 MB\u001b[0m \u001b[31m112.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hUsing cached termcolor-2.5.0-py3-none-any.whl (7.8 kB)\n", + "Using cached threadpoolctl-3.5.0-py3-none-any.whl (18 kB)\n", + "Downloading toolz-1.0.0-py3-none-any.whl (56 kB)\n", + "Downloading wrapt-1.14.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (78 kB)\n", + "Downloading zstandard-0.23.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.4/5.4 MB\u001b[0m \u001b[31m165.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hUsing cached filelock-3.16.1-py3-none-any.whl (16 kB)\n", + "Downloading multiprocess-0.70.15-py311-none-any.whl (135 kB)\n", + "Downloading pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)\n", + "Downloading setproctitle-1.3.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (31 kB)\n", + "Using cached sympy-1.13.3-py3-none-any.whl (6.2 MB)\n", + "Downloading tensorflow_metadata-1.16.1-py3-none-any.whl (28 kB)\n", + "Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", + "Using cached aiohappyeyeballs-2.4.4-py3-none-any.whl (14 kB)\n", + "Using cached aiosignal-1.3.2-py2.py3-none-any.whl (7.6 kB)\n", + "Using cached certifi-2024.12.14-py3-none-any.whl (164 kB)\n", + "Using cached charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (142 kB)\n", + "Using cached colorama-0.4.6-py2.py3-none-any.whl (25 kB)\n", + "Downloading frozenlist-1.5.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (274 kB)\n", + "Using cached gitdb-4.0.11-py3-none-any.whl (62 kB)\n", + "Downloading google_auth-2.37.0-py2.py3-none-any.whl (209 kB)\n", + "Downloading google_auth_oauthlib-1.2.1-py2.py3-none-any.whl (24 kB)\n", + "Downloading googleapis_common_protos-1.66.0-py2.py3-none-any.whl (221 kB)\n", + "Using cached idna-3.10-py3-none-any.whl (70 kB)\n", + "Using cached Markdown-3.7-py3-none-any.whl (106 kB)\n", + "Using cached mpmath-1.3.0-py3-none-any.whl (536 kB)\n", + "Downloading multidict-6.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (129 kB)\n", + "Downloading propcache-0.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (231 kB)\n", + "Using cached tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl (6.6 MB)\n", + "Using cached urllib3-2.2.3-py3-none-any.whl (126 kB)\n", + "Downloading yarl-1.18.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (344 kB)\n", + "Downloading zipp-3.21.0-py3-none-any.whl (9.6 kB)\n", + "Downloading locket-1.0.0-py2.py3-none-any.whl (4.4 kB)\n", + "Downloading nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl (19.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.7/19.7 MB\u001b[0m \u001b[31m226.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading cachetools-5.5.0-py3-none-any.whl (9.5 kB)\n", + "Downloading pyasn1_modules-0.4.1-py3-none-any.whl (181 kB)\n", + "Downloading requests_oauthlib-2.0.0-py2.py3-none-any.whl (24 kB)\n", + "Downloading rsa-4.9-py3-none-any.whl (34 kB)\n", + "Using cached smmap-5.0.1-py3-none-any.whl (24 kB)\n", + "Downloading oauthlib-3.2.2-py3-none-any.whl (151 kB)\n", + "Downloading pyasn1-0.6.1-py3-none-any.whl (83 kB)\n", + "Building wheels for collected packages: pyspark, promise\n", + " Building wheel for pyspark (setup.py) ... \u001b[?25ldone\n", + "\u001b[?25h Created wheel for pyspark: filename=pyspark-3.1.2-py2.py3-none-any.whl size=212880746 sha256=cb39c2c9d3ea7a37b23b68fe02520cffab2313593b1757452eb1b95f77a305c7\n", + " Stored in directory: /storage/nassim/.cache/pip/wheels/9c/3c/bc/93eb7c1c3c6438508389e26e46dfe3ffa238d163d20a44f9de\n", + " Building wheel for promise (setup.py) ... \u001b[?25ldone\n", + "\u001b[?25h Created wheel for promise: filename=promise-2.3-py3-none-any.whl size=21483 sha256=bca34bdad6c19dccb64518cda5223a0e4ed34500696fc418ab668e267ed2e3c4\n", + " Stored in directory: /storage/nassim/.cache/pip/wheels/90/74/b1/9b54c896b8d9409e9268329d4d45ede8a8040abe91c8879932\n", + "Successfully built pyspark promise\n", + "Installing collected packages: py4j, mpmath, libclang, flatbuffers, zstandard, zipp, xxhash, wrapt, urllib3, tqdm, toolz, threadpoolctl, termcolor, tensorflow-io-gcs-filesystem, tensorflow-estimator, tensorboard-data-server, sympy, smmap, setproctitle, safetensors, regex, PyYAML, python-dateutil, pyspark, pydantic-core, pyasn1, pyarrow-hotfix, protobuf, propcache, promise, Pillow, packaging, opt-einsum, oauthlib, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, numpy, networkx, multidict, msgpack, MarkupSafe, markdown, locket, keras, joblib, idna, grpcio, google-pasta, gast, fsspec, frozenlist, filelock, fast-ml, docker-pycreds, dill, colorama, cloudpickle, click, charset-normalizer, certifi, cachetools, astunparse, annotated-types, aiohappyeyeballs, absl-py, yarl, Werkzeug, triton, sentry-sdk, scipy, rsa, requests, pydantic, pyasn1-modules, pyarrow, partd, pandas, nvidia-cusparse-cu12, nvidia-cudnn-cu12, nptyping, multiprocess, ml-dtypes, Jinja2, importlib-metadata, icecream, h5py, googleapis-common-protos, gitdb, aiosignal, xgboost, tensorflow-metadata, scikit-learn, responses, requests-oauthlib, nvidia-cusolver-cu12, huggingface-hub, google-auth, gitpython, dask, cehrbert_data, aiohttp, wandb, torch, tokenizers, tensorflow-datasets, google-auth-oauthlib, femr, transformers, tensorboard, datasets, accelerate, tensorflow, peft, evaluate, cehrbert\n", + " Attempting uninstall: python-dateutil\n", + " Found existing installation: python-dateutil 2.9.0.post0\n", + " Uninstalling python-dateutil-2.9.0.post0:\n", + " Successfully uninstalled python-dateutil-2.9.0.post0\n", + " Attempting uninstall: packaging\n", + " Found existing installation: packaging 24.2\n", + " Uninstalling packaging-24.2:\n", + " Successfully uninstalled packaging-24.2\n", + " Attempting uninstall: numpy\n", + " Found existing installation: numpy 1.26.4\n", + " Uninstalling numpy-1.26.4:\n", + " Successfully uninstalled numpy-1.26.4\n", + " Attempting uninstall: pyarrow\n", + " Found existing installation: pyarrow 18.1.0\n", + " Uninstalling pyarrow-18.1.0:\n", + " Successfully uninstalled pyarrow-18.1.0\n", + " Attempting uninstall: pandas\n", + " Found existing installation: pandas 2.2.3\n", + " Uninstalling pandas-2.2.3:\n", + " Successfully uninstalled pandas-2.2.3\n", + "Successfully installed Jinja2-3.1.3 MarkupSafe-3.0.2 Pillow-10.3.0 PyYAML-6.0.1 Werkzeug-3.0.1 absl-py-2.1.0 accelerate-0.31.0 aiohappyeyeballs-2.4.4 aiohttp-3.11.10 aiosignal-1.3.2 annotated-types-0.7.0 astunparse-1.6.3 cachetools-5.5.0 cehrbert-1.3.1 cehrbert_data-0.0.4 certifi-2024.12.14 charset-normalizer-3.4.0 click-8.1.7 cloudpickle-3.1.0 colorama-0.4.6 dask-2024.1.1 datasets-2.16.1 dill-0.3.7 docker-pycreds-0.4.0 evaluate-0.4.1 fast-ml-3.68 femr-0.2.0 filelock-3.16.1 flatbuffers-24.3.25 frozenlist-1.5.0 fsspec-2023.10.0 gast-0.6.0 gitdb-4.0.11 gitpython-3.1.43 google-auth-2.37.0 google-auth-oauthlib-1.2.1 google-pasta-0.2.0 googleapis-common-protos-1.66.0 grpcio-1.68.1 h5py-3.12.1 huggingface-hub-0.26.5 icecream-2.1.3 idna-3.10 importlib-metadata-8.5.0 joblib-1.4.2 keras-2.15.0 libclang-18.1.1 locket-1.0.0 markdown-3.7 ml-dtypes-0.2.0 mpmath-1.3.0 msgpack-1.1.0 multidict-6.1.0 multiprocess-0.70.15 networkx-3.2.1 nptyping-2.4.1 numpy-1.24.3 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.6.85 nvidia-nvtx-cu12-12.1.105 oauthlib-3.2.2 opt-einsum-3.4.0 packaging-23.2 pandas-2.2.0 partd-1.4.2 peft-0.10.0 promise-2.3 propcache-0.2.1 protobuf-4.25.5 py4j-0.10.9 pyarrow-15.0.0 pyarrow-hotfix-0.6 pyasn1-0.6.1 pyasn1-modules-0.4.1 pydantic-2.6.0 pydantic-core-2.16.1 pyspark-3.1.2 python-dateutil-2.8.2 regex-2024.11.6 requests-2.32.3 requests-oauthlib-2.0.0 responses-0.18.0 rsa-4.9 safetensors-0.4.5 scikit-learn-1.4.0 scipy-1.12.0 sentry-sdk-2.19.2 setproctitle-1.3.4 smmap-5.0.1 sympy-1.13.3 tensorboard-2.15.2 tensorboard-data-server-0.7.2 tensorflow-2.15.0 tensorflow-datasets-4.5.2 tensorflow-estimator-2.15.0 tensorflow-io-gcs-filesystem-0.37.1 tensorflow-metadata-1.16.1 termcolor-2.5.0 threadpoolctl-3.5.0 tokenizers-0.19.0 toolz-1.0.0 torch-2.4.0 tqdm-4.66.1 transformers-4.40.0 triton-3.0.0 urllib3-2.2.3 wandb-0.17.8 wrapt-1.14.1 xgboost-2.0.3 xxhash-3.5.0 yarl-1.18.3 zipp-3.21.0 zstandard-0.23.0\n" ] } ], @@ -129,7 +526,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 12, "id": "ff2f8638", "metadata": {}, "outputs": [], @@ -156,9 +553,9 @@ "TASK_DIR = MEDS_DIR + \"/task_labels\"\n", "TASK_NAME=\"mortality/in_icu/first_24h\"\n", "# TASK_NAME=\"los_in_hospital_first_48h\"\n", - "OUTPUT_PRETRAIN_MODEL_DIR= ROOT_DIR + \"/output/cehrbert/\"\n", + "OUTPUT_PRETRAIN_MODEL_DIR= ROOT_DIR + \"/output/cehrbert/train/\"\n", "# TODO this variable has an identical name?\n", - "OUTPUT_PRETRAIN_MODEL_DIR= ROOT_DIR + \"/output/cehrbert_finetuned/\"" + "OUTPUT_FINETUNE_MODEL_DIR= ROOT_DIR + \"/output/cehrbert/finetuned/\"" ] }, { @@ -181,24 +578,193 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "26f1edfb", "metadata": {}, "outputs": [], "source": [ - "!mkdir -p ./content/output/cehrbert/\n", - "!mkdir -p ./content/output/cehrbert_dataset_prepared/\n", - "!mkdir -p ./content/output/cehrbert_finetuned/" + "!mkdir -p {ROOT_DIR}/output/cehrbert/\n", + "!mkdir -p {ROOT_DIR}/output/cehrbert_dataset_prepared/\n", + "!mkdir -p {ROOT_DIR}/output/cehrbert_finetuned/" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "17c4af8d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cloning into 'cehrbert'...\n", + "remote: Enumerating objects: 8757, done.\u001b[K\n", + "remote: Counting objects: 100% (1428/1428), done.\u001b[K\n", + "remote: Compressing objects: 100% (604/604), done.\u001b[K\n", + "remote: Total 8757 (delta 934), reused 943 (delta 767), pack-reused 7329 (from 1)\u001b[K\n", + "Receiving objects: 100% (8757/8757), 14.23 MiB | 25.66 MiB/s, done.\n", + "Resolving deltas: 100% (6174/6174), done.\n", + "error: pathspec 'fix/meds_evaluation' did not match any file(s) known to git\n", + "Processing /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo/github_repo/cehrbert\n", + " Installing build dependencies ... \u001b[?25ldone\n", + "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n", + "\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n", + "\u001b[?25hRequirement already satisfied: dask==2024.1.1 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (2024.1.1)\n", + "Requirement already satisfied: datasets==2.16.1 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (2.16.1)\n", + "Requirement already satisfied: evaluate==0.4.1 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (0.4.1)\n", + "Requirement already satisfied: fast-ml==3.68 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (3.68)\n", + "Requirement already satisfied: femr==0.2.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (0.2.0)\n", + "Requirement already satisfied: Jinja2==3.1.3 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (3.1.3)\n", + "Requirement already satisfied: meds==0.3.3 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (0.3.3)\n", + "Requirement already satisfied: meds_reader==0.1.9 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (0.1.9)\n", + "Requirement already satisfied: networkx==3.2.1 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (3.2.1)\n", + "Requirement already satisfied: numpy==1.24.3 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (1.24.3)\n", + "Requirement already satisfied: packaging==23.2 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (23.2)\n", + "Requirement already satisfied: pandas==2.2.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (2.2.0)\n", + "Requirement already satisfied: peft==0.10.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (0.10.0)\n", + "Requirement already satisfied: Pillow==10.3.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (10.3.0)\n", + "Requirement already satisfied: pyarrow==15.0.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (15.0.0)\n", + "Requirement already satisfied: pydantic==2.6.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (2.6.0)\n", + "Requirement already satisfied: python-dateutil==2.8.2 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (2.8.2)\n", + "Requirement already satisfied: PyYAML==6.0.1 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (6.0.1)\n", + "Requirement already satisfied: scikit-learn==1.4.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (1.4.0)\n", + "Requirement already satisfied: scipy==1.12.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (1.12.0)\n", + "Requirement already satisfied: tensorflow==2.15.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (2.15.0)\n", + "Requirement already satisfied: tensorflow-datasets==4.5.2 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (4.5.2)\n", + "Requirement already satisfied: tqdm==4.66.1 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (4.66.1)\n", + "Requirement already satisfied: torch==2.4.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (2.4.0)\n", + "Requirement already satisfied: tokenizers==0.19.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (0.19.0)\n", + "Requirement already satisfied: transformers==4.40.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (4.40.0)\n", + "Requirement already satisfied: accelerate==0.31.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (0.31.0)\n", + "Requirement already satisfied: Werkzeug==3.0.1 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (3.0.1)\n", + "Requirement already satisfied: wandb==0.17.8 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (0.17.8)\n", + "Requirement already satisfied: xgboost==2.0.3 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (2.0.3)\n", + "Requirement already satisfied: cehrbert_data==0.0.4 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert==1.3.1) (0.0.4)\n", + "Requirement already satisfied: psutil in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from accelerate==0.31.0->cehrbert==1.3.1) (6.1.0)\n", + "Requirement already satisfied: huggingface-hub in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from accelerate==0.31.0->cehrbert==1.3.1) (0.26.5)\n", + "Requirement already satisfied: safetensors>=0.3.1 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from accelerate==0.31.0->cehrbert==1.3.1) (0.4.5)\n", + "Requirement already satisfied: pyspark==3.1.2 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from cehrbert_data==0.0.4->cehrbert==1.3.1) (3.1.2)\n", + "Requirement already satisfied: click>=8.1 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from dask==2024.1.1->cehrbert==1.3.1) (8.1.7)\n", + "Requirement already satisfied: cloudpickle>=1.5.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from dask==2024.1.1->cehrbert==1.3.1) (3.1.0)\n", + "Requirement already satisfied: fsspec>=2021.09.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from dask==2024.1.1->cehrbert==1.3.1) (2023.10.0)\n", + "Requirement already satisfied: partd>=1.2.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from dask==2024.1.1->cehrbert==1.3.1) (1.4.2)\n", + "Requirement already satisfied: toolz>=0.10.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from dask==2024.1.1->cehrbert==1.3.1) (1.0.0)\n", + "Requirement already satisfied: importlib-metadata>=4.13.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from dask==2024.1.1->cehrbert==1.3.1) (8.5.0)\n", + "Requirement already satisfied: filelock in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from datasets==2.16.1->cehrbert==1.3.1) (3.16.1)\n", + "Requirement already satisfied: pyarrow-hotfix in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from datasets==2.16.1->cehrbert==1.3.1) (0.6)\n", + "Requirement already satisfied: dill<0.3.8,>=0.3.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from datasets==2.16.1->cehrbert==1.3.1) (0.3.7)\n", + "Requirement already satisfied: requests>=2.19.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from datasets==2.16.1->cehrbert==1.3.1) (2.32.3)\n", + "Requirement already satisfied: xxhash in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from datasets==2.16.1->cehrbert==1.3.1) (3.5.0)\n", + "Requirement already satisfied: multiprocess in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from datasets==2.16.1->cehrbert==1.3.1) (0.70.15)\n", + "Requirement already satisfied: aiohttp in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from datasets==2.16.1->cehrbert==1.3.1) (3.11.10)\n", + "Requirement already satisfied: responses<0.19 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from evaluate==0.4.1->cehrbert==1.3.1) (0.18.0)\n", + "Requirement already satisfied: zstandard>=0.18 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from femr==0.2.0->cehrbert==1.3.1) (0.23.0)\n", + "Requirement already satisfied: icecream==2.1.3 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from femr==0.2.0->cehrbert==1.3.1) (2.1.3)\n", + "Requirement already satisfied: nptyping==2.4.1 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from femr==0.2.0->cehrbert==1.3.1) (2.4.1)\n", + "Requirement already satisfied: msgpack>=1.0.5 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from femr==0.2.0->cehrbert==1.3.1) (1.1.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from Jinja2==3.1.3->cehrbert==1.3.1) (3.0.2)\n", + "Requirement already satisfied: jsonschema>=4.0.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from meds==0.3.3->cehrbert==1.3.1) (4.23.0)\n", + "Requirement already satisfied: typing-extensions>=4.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from meds==0.3.3->cehrbert==1.3.1) (4.12.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from pandas==2.2.0->cehrbert==1.3.1) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from pandas==2.2.0->cehrbert==1.3.1) (2024.2)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from pydantic==2.6.0->cehrbert==1.3.1) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.16.1 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from pydantic==2.6.0->cehrbert==1.3.1) (2.16.1)\n", + "Requirement already satisfied: six>=1.5 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from python-dateutil==2.8.2->cehrbert==1.3.1) (1.17.0)\n", + "Requirement already satisfied: joblib>=1.2.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from scikit-learn==1.4.0->cehrbert==1.3.1) (1.4.2)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from scikit-learn==1.4.0->cehrbert==1.3.1) (3.5.0)\n", + "Requirement already satisfied: absl-py>=1.0.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from tensorflow==2.15.0->cehrbert==1.3.1) (2.1.0)\n", + "Requirement already satisfied: astunparse>=1.6.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from tensorflow==2.15.0->cehrbert==1.3.1) (1.6.3)\n", + "Requirement already satisfied: flatbuffers>=23.5.26 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from tensorflow==2.15.0->cehrbert==1.3.1) (24.3.25)\n", + "Requirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from tensorflow==2.15.0->cehrbert==1.3.1) (0.6.0)\n", + "Requirement already satisfied: google-pasta>=0.1.1 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from tensorflow==2.15.0->cehrbert==1.3.1) (0.2.0)\n", + "Requirement already satisfied: h5py>=2.9.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from tensorflow==2.15.0->cehrbert==1.3.1) (3.12.1)\n", + "Requirement already satisfied: libclang>=13.0.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from tensorflow==2.15.0->cehrbert==1.3.1) (18.1.1)\n", + "Requirement already satisfied: ml-dtypes~=0.2.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from tensorflow==2.15.0->cehrbert==1.3.1) (0.2.0)\n", + "Requirement already satisfied: opt-einsum>=2.3.2 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from tensorflow==2.15.0->cehrbert==1.3.1) (3.4.0)\n", + "Requirement already satisfied: protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from tensorflow==2.15.0->cehrbert==1.3.1) (4.25.5)\n", + "Requirement already satisfied: setuptools in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from tensorflow==2.15.0->cehrbert==1.3.1) (75.1.0)\n", + "Requirement already satisfied: termcolor>=1.1.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from tensorflow==2.15.0->cehrbert==1.3.1) (2.5.0)\n", + "Requirement already satisfied: wrapt<1.15,>=1.11.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from tensorflow==2.15.0->cehrbert==1.3.1) (1.14.1)\n", + "Requirement already satisfied: tensorflow-io-gcs-filesystem>=0.23.1 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from tensorflow==2.15.0->cehrbert==1.3.1) (0.37.1)\n", + "Requirement already satisfied: grpcio<2.0,>=1.24.3 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from tensorflow==2.15.0->cehrbert==1.3.1) (1.68.1)\n", + "Requirement already satisfied: tensorboard<2.16,>=2.15 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from tensorflow==2.15.0->cehrbert==1.3.1) (2.15.2)\n", + "Requirement already satisfied: tensorflow-estimator<2.16,>=2.15.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from tensorflow==2.15.0->cehrbert==1.3.1) (2.15.0)\n", + "Requirement already satisfied: keras<2.16,>=2.15.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from tensorflow==2.15.0->cehrbert==1.3.1) (2.15.0)\n", + "Requirement already satisfied: promise in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from tensorflow-datasets==4.5.2->cehrbert==1.3.1) (2.3)\n", + "Requirement already satisfied: tensorflow-metadata in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from tensorflow-datasets==4.5.2->cehrbert==1.3.1) (1.16.1)\n", + "Requirement already satisfied: sympy in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from torch==2.4.0->cehrbert==1.3.1) (1.13.3)\n", + "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from torch==2.4.0->cehrbert==1.3.1) (12.1.105)\n", + "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from torch==2.4.0->cehrbert==1.3.1) (12.1.105)\n", + "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from torch==2.4.0->cehrbert==1.3.1) (12.1.105)\n", + "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from torch==2.4.0->cehrbert==1.3.1) (9.1.0.70)\n", + "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from torch==2.4.0->cehrbert==1.3.1) (12.1.3.1)\n", + "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from torch==2.4.0->cehrbert==1.3.1) (11.0.2.54)\n", + "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from torch==2.4.0->cehrbert==1.3.1) (10.3.2.106)\n", + "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from torch==2.4.0->cehrbert==1.3.1) (11.4.5.107)\n", + "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from torch==2.4.0->cehrbert==1.3.1) (12.1.0.106)\n", + "Requirement already satisfied: nvidia-nccl-cu12==2.20.5 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from torch==2.4.0->cehrbert==1.3.1) (2.20.5)\n", + "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from torch==2.4.0->cehrbert==1.3.1) (12.1.105)\n", + "Requirement already satisfied: triton==3.0.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from torch==2.4.0->cehrbert==1.3.1) (3.0.0)\n", + "Requirement already satisfied: regex!=2019.12.17 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from transformers==4.40.0->cehrbert==1.3.1) (2024.11.6)\n", + "Requirement already satisfied: docker-pycreds>=0.4.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from wandb==0.17.8->cehrbert==1.3.1) (0.4.0)\n", + "Requirement already satisfied: gitpython!=3.1.29,>=1.0.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from wandb==0.17.8->cehrbert==1.3.1) (3.1.43)\n", + "Requirement already satisfied: platformdirs in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from wandb==0.17.8->cehrbert==1.3.1) (4.3.6)\n", + "Requirement already satisfied: sentry-sdk>=1.0.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from wandb==0.17.8->cehrbert==1.3.1) (2.19.2)\n", + "Requirement already satisfied: setproctitle in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from wandb==0.17.8->cehrbert==1.3.1) (1.3.4)\n", + "Requirement already satisfied: colorama>=0.3.9 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from icecream==2.1.3->femr==0.2.0->cehrbert==1.3.1) (0.4.6)\n", + "Requirement already satisfied: pygments>=2.2.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from icecream==2.1.3->femr==0.2.0->cehrbert==1.3.1) (2.18.0)\n", + "Requirement already satisfied: executing>=0.3.1 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from icecream==2.1.3->femr==0.2.0->cehrbert==1.3.1) (2.1.0)\n", + "Requirement already satisfied: asttokens>=2.0.1 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from icecream==2.1.3->femr==0.2.0->cehrbert==1.3.1) (3.0.0)\n", + "Requirement already satisfied: nvidia-nvjitlink-cu12 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from nvidia-cusolver-cu12==11.4.5.107->torch==2.4.0->cehrbert==1.3.1) (12.6.85)\n", + "Requirement already satisfied: py4j==0.10.9 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from pyspark==3.1.2->cehrbert_data==0.0.4->cehrbert==1.3.1) (0.10.9)\n", + "Requirement already satisfied: wheel<1.0,>=0.23.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from astunparse>=1.6.0->tensorflow==2.15.0->cehrbert==1.3.1) (0.44.0)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from aiohttp->datasets==2.16.1->cehrbert==1.3.1) (2.4.4)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from aiohttp->datasets==2.16.1->cehrbert==1.3.1) (1.3.2)\n", + "Requirement already satisfied: attrs>=17.3.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from aiohttp->datasets==2.16.1->cehrbert==1.3.1) (24.2.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from aiohttp->datasets==2.16.1->cehrbert==1.3.1) (1.5.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from aiohttp->datasets==2.16.1->cehrbert==1.3.1) (6.1.0)\n", + "Requirement already satisfied: propcache>=0.2.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from aiohttp->datasets==2.16.1->cehrbert==1.3.1) (0.2.1)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from aiohttp->datasets==2.16.1->cehrbert==1.3.1) (1.18.3)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from gitpython!=3.1.29,>=1.0.0->wandb==0.17.8->cehrbert==1.3.1) (4.0.11)\n", + "Requirement already satisfied: zipp>=3.20 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from importlib-metadata>=4.13.0->dask==2024.1.1->cehrbert==1.3.1) (3.21.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from jsonschema>=4.0.0->meds==0.3.3->cehrbert==1.3.1) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from jsonschema>=4.0.0->meds==0.3.3->cehrbert==1.3.1) (0.35.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from jsonschema>=4.0.0->meds==0.3.3->cehrbert==1.3.1) (0.22.3)\n", + "Requirement already satisfied: locket in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from partd>=1.2.0->dask==2024.1.1->cehrbert==1.3.1) (1.0.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from requests>=2.19.0->datasets==2.16.1->cehrbert==1.3.1) (3.4.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from requests>=2.19.0->datasets==2.16.1->cehrbert==1.3.1) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from requests>=2.19.0->datasets==2.16.1->cehrbert==1.3.1) (2.2.3)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from requests>=2.19.0->datasets==2.16.1->cehrbert==1.3.1) (2024.12.14)\n", + "Requirement already satisfied: google-auth<3,>=1.6.3 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from tensorboard<2.16,>=2.15->tensorflow==2.15.0->cehrbert==1.3.1) (2.37.0)\n", + "Requirement already satisfied: google-auth-oauthlib<2,>=0.5 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from tensorboard<2.16,>=2.15->tensorflow==2.15.0->cehrbert==1.3.1) (1.2.1)\n", + "Requirement already satisfied: markdown>=2.6.8 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from tensorboard<2.16,>=2.15->tensorflow==2.15.0->cehrbert==1.3.1) (3.7)\n", + "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from tensorboard<2.16,>=2.15->tensorflow==2.15.0->cehrbert==1.3.1) (0.7.2)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from sympy->torch==2.4.0->cehrbert==1.3.1) (1.3.0)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.56.4 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from tensorflow-metadata->tensorflow-datasets==4.5.2->cehrbert==1.3.1) (1.66.0)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.29,>=1.0.0->wandb==0.17.8->cehrbert==1.3.1) (5.0.1)\n", + "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from google-auth<3,>=1.6.3->tensorboard<2.16,>=2.15->tensorflow==2.15.0->cehrbert==1.3.1) (5.5.0)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from google-auth<3,>=1.6.3->tensorboard<2.16,>=2.15->tensorflow==2.15.0->cehrbert==1.3.1) (0.4.1)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from google-auth<3,>=1.6.3->tensorboard<2.16,>=2.15->tensorflow==2.15.0->cehrbert==1.3.1) (4.9)\n", + "Requirement already satisfied: requests-oauthlib>=0.7.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from google-auth-oauthlib<2,>=0.5->tensorboard<2.16,>=2.15->tensorflow==2.15.0->cehrbert==1.3.1) (2.0.0)\n", + "Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard<2.16,>=2.15->tensorflow==2.15.0->cehrbert==1.3.1) (0.6.1)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in /storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<2,>=0.5->tensorboard<2.16,>=2.15->tensorflow==2.15.0->cehrbert==1.3.1) (3.2.2)\n", + "Building wheels for collected packages: cehrbert\n", + " Building wheel for cehrbert (pyproject.toml) ... \u001b[?25ldone\n", + "\u001b[?25h Created wheel for cehrbert: filename=cehrbert-1.3.1-py3-none-any.whl size=139740 sha256=b67dbe857d4ecf2d4c1c7b2fad5ece256e911d33abef142535a1210274be4205\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-j1feuvmd/wheels/0f/e9/2b/6ce226fd4943719e4d11e237506934d50e642553b2941d21c5\n", + "Successfully built cehrbert\n", + "Installing collected packages: cehrbert\n", + " Attempting uninstall: cehrbert\n", + " Found existing installation: cehrbert 1.3.1\n", + " Uninstalling cehrbert-1.3.1:\n", + " Successfully uninstalled cehrbert-1.3.1\n", + "Successfully installed cehrbert-1.3.1\n" + ] + } + ], "source": [ - "!mkdir ./content/github_repo;cd ./content/github_repo;git clone https://github.com/cumc-dbmi/cehrbert.git;cd cehrbert;git checkout fix/meds_evaluation;pip install .;" + "!mkdir {ROOT_DIR}/github_repo;cd {ROOT_DIR}/github_repo;git clone https://github.com/cumc-dbmi/cehrbert.git;cd cehrbert;git checkout fix/meds_evaluation;pip install .;" ] }, { @@ -211,15 +777,15 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "7f033a64", "metadata": {}, "outputs": [], "source": [ - "cehrbert_pretrain_config = \"\"\"\n", + "cehrbert_pretrain_config = f\"\"\"\n", "#Model arguments\n", - "model_name_or_path: \"./content/output/cehrbert/\"\n", - "tokenizer_name_or_path: \"./content/output/cehrbert/\"\n", + "model_name_or_path: \"{ROOT_DIR}/output/cehrbert/\"\n", + "tokenizer_name_or_path: \"{ROOT_DIR}/output/cehrbert/\"\n", "num_hidden_layers: 6\n", "max_position_embeddings: 1024\n", "hidden_size: 768\n", @@ -228,8 +794,8 @@ "include_value_prediction: false # additional CEHR-BERT learning objective\n", "\n", "#Data arguments\n", - "data_folder: \"./content/meds_reader/\"\n", - "dataset_prepared_path: \"./content/output/cehrbert_dataset_prepared/\"\n", + "data_folder: \"{ROOT_DIR}/meds_reader/\"\n", + "dataset_prepared_path: \"{ROOT_DIR}/output/cehrbert_dataset_prepared/\"\n", "\n", "# Below is a list of Med-to-CehrBert related arguments\n", "preprocessing_num_workers: 2\n", @@ -252,7 +818,7 @@ "resume_from_checkpoint: # automatically infer the latest checkpoint from the output folder\n", "seed: 42\n", "\n", - "output_dir: \"./content/output/cehrbert/\"\n", + "output_dir: \"{ROOT_DIR}/output/cehrbert/\"\n", "evaluation_strategy: \"epoch\"\n", "save_strategy: \"epoch\"\n", "eval_accumulation_steps: 10\n", @@ -281,24 +847,542 @@ ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 19, "id": "20844f8a", "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-12-15 03:56:01.498490: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-12-15 03:56:01.498535: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-12-15 03:56:01.499647: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-12-15 03:56:01.506092: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2024-12-15 03:56:02.465497: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", + "Directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/cehrbert_dataset_prepared/meds_reader_meds_extension not found\n", + "Traceback (most recent call last):\n", + " File \"/storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages/cehrbert/runners/hf_cehrbert_pretrain_runner.py\", line 183, in main\n", + " dataset = load_from_disk(meds_extension_path)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/storage/nassim/miniconda3/envs/cehrbert/lib/python3.11/site-packages/datasets/load.py\", line 2630, in load_from_disk\n", + " raise FileNotFoundError(f\"Directory {dataset_path} not found\")\n", + "FileNotFoundError: Directory /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/cehrbert_dataset_prepared/meds_reader_meds_extension not found\n", + "Generating train split: 50 examples [00:03, 14.66 examples/s]\n", + "Map (num_proc=2): 100%|██████████████████| 50/50 [00:05<00:00, 9.41 examples/s]\n", + "Generating train split: 25 examples [00:01, 13.20 examples/s]\n", + "Map (num_proc=2): 100%|██████████████████| 25/25 [00:02<00:00, 8.92 examples/s]\n", + "Generating train split: 25 examples [00:00, 29.81 examples/s]\n", + "Map (num_proc=2): 100%|██████████████████| 25/25 [00:01<00:00, 19.80 examples/s]\n", + "Saving the dataset (1/1 shards): 100%|██| 50/50 [00:00<00:00, 277.76 examples/s]\n", + "Saving the dataset (1/1 shards): 100%|██| 25/25 [00:00<00:00, 284.20 examples/s]\n", + "Saving the dataset (1/1 shards): 100%|██| 25/25 [00:00<00:00, 619.16 examples/s]\n", + "Failed to load the tokenizer from /storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/cehrbert/ with the error \n", + "/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/cehrbert/ does not appear to have a file named tokenizer.json. Checkout 'https://huggingface.co//storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/cehrbert//tree/None' for available files.\n", + "Tried to create the tokenizer, however the dataset is not provided.\n", + "Map (num_proc=2): 100%|█████████████████| 50/50 [00:00<00:00, 104.75 examples/s]\n", + "Map (num_proc=2): 100%|██████████████████| 50/50 [00:01<00:00, 38.83 examples/s] 0\n", + "Aggregating the lab statistics: 100%|█████████████| 2/2 [00:00<00:00, 7.44it/s]\n", + "Filter (num_proc=2): 100%|██████████████| 50/50 [00:00<00:00, 319.20 examples/s]\n", + "Filter (num_proc=2): 100%|██████████████| 25/25 [00:00<00:00, 168.34 examples/s]\n", + "Filter (num_proc=2): 100%|██████████████| 25/25 [00:00<00:00, 162.55 examples/s]\n", + "Map (num_proc=2): 100%|██████████████████| 50/50 [00:04<00:00, 11.91 examples/s]\n", + "Map (num_proc=2): 100%|██████████████████| 25/25 [00:02<00:00, 11.84 examples/s]\n", + "Map (num_proc=2): 100%|██████████████████| 25/25 [00:01<00:00, 20.68 examples/s]\n", + "Saving the dataset (1/1 shards): 100%|██| 50/50 [00:00<00:00, 314.80 examples/s]\n", + "Saving the dataset (1/1 shards): 100%|██| 25/25 [00:00<00:00, 355.17 examples/s]\n", + "Saving the dataset (1/1 shards): 100%|██| 25/25 [00:00<00:00, 774.69 examples/s]\n", + "Filter (num_proc=2): 100%|██████████████| 50/50 [00:00<00:00, 319.76 examples/s]\n", + "Filter (num_proc=2): 100%|██████████████| 25/25 [00:00<00:00, 162.83 examples/s]\n", + "Filter (num_proc=2): 100%|██████████████| 25/25 [00:00<00:00, 162.27 examples/s]\n", + "/storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/cehrbert/ does not appear to have a file named config.json. Checkout 'https://huggingface.co//storage/nassim/projects/MEDS-DEV/demo/work_dir/mimiciv_demo//output/cehrbert//tree/None' for available files.\n", + " 0%| | 0/50 [00:00