Quantco · kklein · Aug 1, 2022 · Jul 28, 2022 · Jul 28, 2022 · Jul 28, 2022
diff --git a/src/datajudge/db_access.py b/src/datajudge/db_access.py
@@ -288,7 +288,13 @@ def get_column(self, engine):
                 f"Trying to access column of DataReference "
                 f"{self.get_string()} yet none is given."
             )
-        return self.get_columns(engine)[0]
+        columns = self.get_columns(engine)
+        if len(columns) > 1:
+            raise ValueError(
+                "DataReference was expected to only have a single column but had multiple: "
+                f"{columns}"
+            )
+        return columns[0]
 
     def get_columns(self, engine):
         """Fetch all relevant columns of a DataReference."""
@@ -905,6 +911,12 @@ def get_column_array_agg(
 
 
 def _cdf_selection(engine, ref: DataReference, cdf_label: str, value_label: str):
+    """Create an empirical cumulative distribution function values.
+
+    Concretely, create a selection with values from ``value_label`` as well as
+    the empirical cumulative didistribution function values, labeled as
+    ``cdf_label``.
+    """
     col = ref.get_column(engine)
     selection = ref.get_selection(engine).subquery()
 
@@ -927,7 +939,6 @@ def _cdf_selection(engine, ref: DataReference, cdf_label: str, value_label: str)
         .group_by(cdf_selection.c[value_label])
         .subquery()
     )
-
     return grouped_cdf_selection
 
 
@@ -1026,7 +1037,10 @@ def get_ks_2sample(
     ref2: DataReference,
 ):
     """
-    Runs the query for the two-sample Kolmogorov-Smirnov test and returns the test statistic d.
+    Run the query for the two-sample Kolmogorov-Smirnov test and return the test statistic d.
+
+    For a raw-sql version of this query, please see this PR:
+    https://github.com/Quantco/datajudge/pull/28/
     """
     cdf_label = "cdf"
     value_label = "val"