diff --git a/src/delphi/eval/utils.py b/src/delphi/eval/utils.py
new file mode 100644
index 00000000..1ad7c256
--- /dev/null
+++ b/src/delphi/eval/utils.py
@@ -0,0 +1,27 @@
+from collections.abc import Callable
+
+import torch
+from jaxtyping import Float, Int
+
+
+def get_all_logprobs(
+    model: Callable, input_ids: Int[torch.Tensor, "batch seq"]
+) -> Float[torch.Tensor, "batch seq vocab"]:
+    # batch, seq, vocab
+    logits = model(input_ids).logits
+    return torch.log_softmax(logits, dim=-1)
+
+
+def gather_logprobs(
+    logprobs: Float[torch.Tensor, "batch seq vocab"],
+    tokens: Int[torch.Tensor, "batch seq"],
+) -> Float[torch.Tensor, "batch seq"]:
+    return torch.gather(logprobs, -1, tokens.unsqueeze(-1)).squeeze(-1)
+
+
+def get_next_logprobs(
+    model: Callable, input_ids: Int[torch.Tensor, "batch seq"]
+) -> Float[torch.Tensor, "batch shorter_seq"]:
+    logprobs = get_all_logprobs(model, input_ids[:, :-1])
+    next_tokens = input_ids[:, 1:]
+    return gather_logprobs(logprobs, next_tokens)
diff --git a/tests/eval/test_utils.py b/tests/eval/test_utils.py
new file mode 100644
index 00000000..cefae455
--- /dev/null
+++ b/tests/eval/test_utils.py
@@ -0,0 +1,43 @@
+import torch
+
+from delphi.eval.utils import gather_logprobs
+
+
+def test_gather_logprobs():
+    # vocab size = 3
+    logprobs = torch.tensor(
+        [
+            # batch 0
+            [
+                # seq 0
+                [0.00, 0.01, 0.02],
+                # seq 1
+                [0.10, 0.11, 0.12],
+            ],
+            # batch 1
+            [
+                # seq 0
+                [1.00, 1.01, 1.02],
+                # seq 1
+                [1.10, 1.11, 1.12],
+            ],
+        ]
+    )
+    tokens = torch.tensor(
+        [
+            # batch 0
+            [0, 2],
+            # batch 1
+            [1, 2],
+        ]
+    )
+    expected_output = torch.tensor(
+        [
+            # batch 0
+            [0.00, 0.12],
+            # batch 1
+            [1.01, 1.12],
+        ]
+    )
+    result = gather_logprobs(logprobs, tokens)
+    assert torch.allclose(result, expected_output)