diff --git a/.github/workflows/code-quality-main.yaml b/.github/workflows/code-quality-main.yaml
index ba2caf4..874da12 100644
--- a/.github/workflows/code-quality-main.yaml
+++ b/.github/workflows/code-quality-main.yaml
@@ -13,12 +13,16 @@ jobs:
 
     steps:
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
-      - name: Set up Python 3.10
-        uses: actions/setup-python@v3
+      - name: Set up Python
+        uses: actions/setup-python@v5
         with:
           python-version: "3.10"
 
+      - name: Install packages
+        run: |
+          pip install .[dev]
+
       - name: Run pre-commits
         uses: pre-commit/action@v3.0.1
diff --git a/.github/workflows/code-quality-pr.yaml b/.github/workflows/code-quality-pr.yaml
index 9a33678..bee2e11 100644
--- a/.github/workflows/code-quality-pr.yaml
+++ b/.github/workflows/code-quality-pr.yaml
@@ -16,13 +16,17 @@ jobs:
 
     steps:
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Set up Python 3.10
-        uses: actions/setup-python@v3
+        uses: actions/setup-python@v5
         with:
           python-version: "3.10"
 
+      - name: Install packages
+        run: |
+          pip install .[dev]
+
       - name: Find modified files
         id: file_changes
         uses: trilom/file-changes-action@v1.2.4
diff --git a/.github/workflows/python-build.yaml b/.github/workflows/python-build.yaml
index 3f3c96d..b22ff87 100644
--- a/.github/workflows/python-build.yaml
+++ b/.github/workflows/python-build.yaml
@@ -10,7 +10,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Set up Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: "3.10"
       - name: Install pypa/build
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index 3faf789..31a46da 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -17,10 +17,10 @@ jobs:
 
     steps:
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Set up Python 3.10
-        uses: actions/setup-python@v3
+        uses: actions/setup-python@v5
         with:
           python-version: "3.10"
 
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 5c5591c..38d66f1 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -5,7 +5,7 @@ exclude: "docs/index.md"
 
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.4.0
+    rev: v5.0.0
     hooks:
       # list of supported hooks: https://pre-commit.com/hooks.html
       - id: trailing-whitespace
diff --git a/README.md b/README.md
index c7460b6..0d9c209 100644
--- a/README.md
+++ b/README.md
@@ -4,10 +4,129 @@ This repository contains the dataset, task, model training recipes, and results
 effort for EHR machine learning.
 
 Note that this repository is _not_ a place where functional code is stored. Rather, this repository stores
-configuration files, training recipes, results, etc. for the MEDS-DEV benchmarking effort -- runnable code will
+configuration files, training recipes, results, etc. for the MEDS-DEV benchmarking effort -- runnable code
+will
 often come from other repositories, with suitable permalinks being present in the various configuration files
 or commit messages for associated contributions to this repository.
 
+## Example workflow
+
+### (Optional) Set up the MEDS project with environment
+
+```bash
+# Create and enter a MEDS project directory
+mkdir $MY_MEDS_PROJECT_ROOT
+cd $MY_MEDS_PROJECT_ROOT
+
+conda create -n $MY_MEDS_CONDA_ENV python=3.10
+conda activate $MY_MEDS_CONDA_ENV
+```
+
+Additionally install any model-related dependencies.
+
+### Install MEDS-DEV
+
+Clone the MEDS-DEV GitHub repo and install it locally.
+This will additionally install some MEDS data processing dependencies:
+
+```bash
+git clone https://github.com/mmcdermott/MEDS-DEV.git
+cd ./MEDS-DEV
+pip install -e .
+```
+
+Install the MEDS evaluation package:
+
+```bash
+git clone https://github.com/kamilest/meds-evaluation.git
+pip install -e ./meds-evaluation
+```
+
+Additionally, make sure any model-related dependencies are installed.
+
+### Extract a task from the MEDS dataset
+
+This step prepares the MEDS dataset for a task by extracting a cohort using inclusion/exclusion criteria and
+processing the data to create the label files.
+
+### Find the task configuration file
+
+Task-related information is stored in Hydra configuration files (in `.yaml` format) under
+`MEDS-DEV/src/MEDS_DEV/tasks/criteria`.
+
+Task names are defined in a way that corresponds to the path to their configuration,
+starting from the `MEDS-DEV/src/MEDS_DEV/tasks/criteria` directory.
+For example,
+`MEDS-DEV/src/MEDS_DEV/tasks/criteria/mortality/in_icu/first_24h.yaml` directory corresponds to a `$TASK_NAME`
+of
+`mortality/in_icu/first_24h`.
+
+**To add a task**
+
+If your task is not supported, you will need to add a directory and define an appropriate configuration file
+in
+a corresponding location.
+
+### Dataset configuration file
+
+Task configuration files are incomplete, because some concepts (predicates) have to be defined in a
+dataset-specific
+way (e.g. `icu_admission` in `mortality/in_icu/first_24h`).
+
+These dataset-specific predicate definitions are found in
+`MEDS-DEV/src/MEDS_DEV/datasets/$DATASET_NAME/predicates.yaml` Hydra configuration files.
+
+In addition to `$DATASET_NAME` (e.g. `MIMIC-IV`), you will also need to have your MEDS dataset directory
+ready (i.e.
+`$MEDS_ROOT_DIR`).
+
+**To add a dataset configuration file**
+
+If your dataset is not supported, you will need to add a directory and define an appropriate configuration
+file in
+a corresponding location.
+
+### Run the MEDS task extraction helper
+
+From your project directory (`$MY_MEDS_PROJECT_ROOT`) where `MEDS-DEV` is located, run
+
+```bash
+./MEDS-DEV/src/MEDS_DEV/helpers/extract_task.sh $MEDS_ROOT_DIR $DATASET_NAME $TASK_NAME
+```
+
+This will use information from task and dataset-specific predicate configs to extract cohorts and labels from
+`$MEDS_ROOT_DIR/data`, and place them in `$MEDS_ROOT_DIR/task_labels/$TASK_NAME/` subdirectories, retaining
+the same
+sharded structure as the `$MEDS_ROOT_DIR/data` directory.
+
+### Train the model
+
+This step depends on the API of your particular model.
+
+For example, the command below will call a helper script that will generate random outputs for binary
+classification,
+conforming to MEDS binary classification prediction schema:
+
+```bash
+./MEDS-DEV/src/MEDS_DEV/helpers/generate_predictions.sh $MEDS_ROOT_DIR $TASK_NAME
+```
+
+### Evaluate the model
+
+You can use the `meds-evaluation` package by running `meds-evaluation-cli` and providing the path to
+predictions
+dataframe as well as the output directory. For example,
+
+```bash
+meds-evaluation-cli \
+	predictions_path="./<$MEDS_ROOT_DIR>/task_predictions/$TASK_NAME/<train|tuning|held_out>/*.parquet" \
+	output_dir="./<$MEDS_ROOT_DIR>/task_evaluation/$TASK_NAME/<train|tuning|held_out>/..."
+```
+
+This will create a JSON file with the results in the directory provided by the `output_dir` argument.
+
+Note this package currently supports binary classification only.
+
 ## Contributing to MEDS-DEV
 
 ### To Add a Model
diff --git a/pyproject.toml b/pyproject.toml
index cc51b8b..649eef6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,7 +33,7 @@ dependencies = ["meds==0.3.3", "es-aces==0.5.0"]
 [tool.setuptools_scm]
 
 [project.optional-dependencies]
-dev = ["pre-commit"]
+dev = ["pre-commit<4"]
 tests = ["pytest", "pytest-cov", "rootutils"]
 docs = [
   "mkdocs==1.6.0", "mkdocs-material==9.5.31", "mkdocstrings[python,shell]==0.25.2", "mkdocs-gen-files==0.5.0",
diff --git a/src/MEDS_DEV/configs/predictions.yaml b/src/MEDS_DEV/configs/predictions.yaml
new file mode 100644
index 0000000..736ae72
--- /dev/null
+++ b/src/MEDS_DEV/configs/predictions.yaml
@@ -0,0 +1,6 @@
+defaults:
+  - _ACES_MD
+  - _self_
+  - override hydra/hydra_logging: disabled
+
+cohort_predictions_dir: "${oc.env:MEDS_ROOT_DIR}/task_predictions"
diff --git a/src/MEDS_DEV/helpers/generate_predictions.sh b/src/MEDS_DEV/helpers/generate_predictions.sh
new file mode 100755
index 0000000..606d0f4
--- /dev/null
+++ b/src/MEDS_DEV/helpers/generate_predictions.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+export MEDS_ROOT_DIR=$1
+export MEDS_DATASET_NAME=$2
+export MEDS_TASK_NAME=$3
+
+shift 3
+
+MEDS_DEV_REPO_DIR=$(python -c "from importlib.resources import files; print(files(\"MEDS_DEV\"))")
+export MEDS_DEV_REPO_DIR
+
+# TODO improve efficiency of prediction generator by using this
+# SHARDS=$(expand_shards "$MEDS_ROOT_DIR"/data)
+
+python -m MEDS_DEV.helpers.generate_random_predictions  --config-path="$MEDS_DEV_REPO_DIR"/configs \
+--config-name="predictions" "hydra.searchpath=[pkg://aces.configs]" "$@"
diff --git a/src/MEDS_DEV/helpers/generate_random_predictions.py b/src/MEDS_DEV/helpers/generate_random_predictions.py
new file mode 100644
index 0000000..5502438
--- /dev/null
+++ b/src/MEDS_DEV/helpers/generate_random_predictions.py
@@ -0,0 +1,94 @@
+import os
+from importlib.resources import files
+from pathlib import Path
+
+import hydra
+import numpy as np
+import polars as pl
+from omegaconf import DictConfig
+
+SUBJECT_ID = "subject_id"
+PREDICTION_TIME = "prediction_time"
+
+BOOLEAN_VALUE_COLUMN = "boolean_value"
+PREDICTED_BOOLEAN_VALUE_COLUMN = "predicted_boolean_value"
+PREDICTED_BOOLEAN_PROBABILITY_COLUMN = "predicted_boolean_probability"
+
+CONFIG = files("MEDS_DEV").joinpath("configs/predictions.yaml")
+
+
+@hydra.main(version_base=None, config_path=str(CONFIG.parent.resolve()), config_name=CONFIG.stem)
+def generate_random_predictions(cfg: DictConfig) -> None:
+    cohort_dir = cfg.cohort_dir  # cohort_dir: "${oc.env:MEDS_ROOT_DIR}/task_labels"
+    cohort_name = cfg.cohort_name  # cohort_name: ${task_name}; task_name: ${oc.env:MEDS_TASK_NAME}
+
+    cohort_dir = Path(cohort_dir) / cohort_name
+    cohort_predictions_dir = (
+        cfg.cohort_predictions_dir
+    )  # cohort_predictions_dir: "${oc.env:MEDS_ROOT_DIR}/task_predictions"
+
+    # TODO: use expand_shards helper from the script to access sharded dataframes directly
+    for split in cohort_dir.iterdir():
+        if split.is_dir() and split.name in {"train", "tuning", "held_out"}:  # train | tuning | held_out
+            for file in split.iterdir():
+                if file.is_file():
+                    dataframe = pl.read_parquet(file)
+                    predictions = _generate_random_predictions(dataframe)  # sharded dataframes
+
+                    # $MEDS_ROOT_DIR/task_predictions/$TASK_NAME/<split>/<file>.parquet
+                    predictions_path = Path(cohort_predictions_dir) / cohort_name / split.name
+                    os.makedirs(predictions_path, exist_ok=True)
+
+                    predictions.write_parquet(predictions_path / file.name)
+        elif split.is_file():
+            dataframe = pl.read_parquet(split)
+            predictions = _generate_random_predictions(dataframe)
+
+            predictions_path = Path(cohort_predictions_dir) / cohort_name
+            os.makedirs(predictions_path, exist_ok=True)
+
+            predictions.write_parquet(predictions_path / split.name)
+
+
+def _generate_random_predictions(dataframe: pl.DataFrame, seed: int = 1) -> pl.DataFrame:
+    """Augments the input dataframe with random predictions.
+
+    Args:
+        dataframe: Input dataframe with at least the columns: [subject_id, prediction_time, boolean_value]
+        seed: Seed for the random number generator.
+
+    Returns:
+        An augmented dataframe with the boolean value and probability columns.
+
+    Example:
+        >>> df = pl.DataFrame({
+        ...     "subject_id": [1, 2, 3],
+        ...     "prediction_time": [0, 1, 2],
+        ...     "boolean_value": [True, False, True]
+        ... })
+        >>> _generate_random_predictions(df).drop(["prediction_time", "boolean_value"])
+        shape: (3, 3)
+        ┌────────────┬─────────────────────────┬───────────────────────────────┐
+        │ subject_id ┆ predicted_boolean_value ┆ predicted_boolean_probability │
+        │ ---        ┆ ---                     ┆ ---                           │
+        │ i64        ┆ bool                    ┆ f64                           │
+        ╞════════════╪═════════════════════════╪═══════════════════════════════╡
+        │ 1          ┆ true                    ┆ 0.511822                      │
+        │ 2          ┆ true                    ┆ 0.950464                      │
+        │ 3          ┆ false                   ┆ 0.14416                       │
+        └────────────┴─────────────────────────┴───────────────────────────────┘
+    """
+
+    output = dataframe.select([SUBJECT_ID, PREDICTION_TIME, BOOLEAN_VALUE_COLUMN])
+    rng = np.random.default_rng(seed)
+    probabilities = rng.uniform(0, 1, len(dataframe))
+    # TODO: meds-evaluation currently cares about the order of columns and types, so the new columns have to
+    #  be inserted at the correct position and cast to the correct type
+    output.insert_column(3, pl.Series(PREDICTED_BOOLEAN_VALUE_COLUMN, probabilities.round()).cast(pl.Boolean))
+    output.insert_column(4, pl.Series(PREDICTED_BOOLEAN_PROBABILITY_COLUMN, probabilities))
+
+    return output
+
+
+if __name__ == "__main__":
+    generate_random_predictions()