Fix API

h2oai · Oct 30, 2023 · 1662f5d · 1662f5d
1 parent cd38506
commit 1662f5d
Show file tree

Hide file tree

Showing 5 changed files with 109 additions and 7 deletions.
diff --git a/h2o-py/h2o/model/metrics/binomial.py b/h2o-py/h2o/model/metrics/binomial.py
@@ -952,3 +952,27 @@ def gains_lift_plot(self, type="both", server=False, save_plot_path=None, plot=T
         else:
             return decorate_plot_result(res=gl)
 
+    def thresholds_and_metric_scores(self):
+        """Retrieve the thresholds and metric scores table.
+
+        :examples:
+
+        >>> from h2o.estimators.gbm import H2OGradientBoostingEstimator
+        >>> local_data = [[1, 'a'],[1, 'a'],[1, 'a'],[1, 'a'],[1, 'a'],
+        ...               [1, 'a'],[1, 'a'],[1, 'a'],[1, 'a'],[1, 'a'],
+        ...               [0, 'b'],[0, 'b'],[0, 'b'],[0, 'b'],[0, 'b'],
+        ...               [0, 'b'],[0, 'b'],[0, 'b'],[0, 'b'],[0, 'b']]
+        >>> h2o_data = h2o.H2OFrame(local_data)
+        >>> h2o_data.set_names(['response', 'predictor'])
+        >>> h2o_data["response"] = h2o_data["response"].asfactor()
+        >>> gbm = H2OGradientBoostingEstimator(ntrees=1,
+        ...                                    distribution="bernoulli")
+        >>> gbm.train(x=list(range(1,h2o_data.ncol)),
+        ...           y="response",
+        ...           training_frame=h2o_data)
+        >>> perf = gbm.model_performance()
+        >>> perf.
+        """
+        if 'thresholds_and_metric_scores' in self._metric_json:
+            return self._metric_json['thresholds_and_metric_scores']
+        return None
diff --git a/h2o-py/h2o/model/models/binomial.py b/h2o-py/h2o/model/models/binomial.py
@@ -10,6 +10,43 @@
 
 class H2OBinomialModel(ModelBase):
 
+    def thresholds_and_metric_scores(self, train=False, valid=False, xval=False):
+        """
+        Get the all thresholds and metric scores in a table.
+
+        If all are ``False`` (default), then return the training metric table.
+        If more than one option is set to ``True``, then return a dictionary of tables where
+        the keys are "train", "valid", and "xval".
+
+        :param bool train: If ``True``, return the thresholds and metric scores table for the training data.
+        :param bool valid: If ``True``, return the thresholds and metric scores table value for the validation data.
+        :param bool xval: If ``True``, return the thresholds and metric scores table value for each of the cross-validated splits.
+
+        :returns: The thresholds and metric scores tables for the specified key(s).
+
+        :examples:
+
+        >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv")
+        >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor()
+        >>> r = cars[0].runif()
+        >>> train = cars[r > .2]
+        >>> valid = cars[r <=.2] 
+        >>> response_col = "economy_20mpg"
+        >>> distribution = "bernoulli"
+        >>> predictors = ["displacement", "power", "weight", "acceleration", "year"]
+        >>> from h2o.estimators.gbm import H2OGradientBoostingEstimator
+        >>> gbm = H2OGradientBoostingEstimator(nfolds=3,
+        ...                                    distribution=distribution,
+        ...                                    fold_assignment="Random")
+        >>> gbm.train(y=response_col,
+        ...           x=predictors,
+        ...           validation_frame=valid,
+        ...           training_frame=train)
+        >>> gbm.thresholds_and_metric_scores()# <- Default: return training metric table
+        >>> gbm.thresholds_and_metric_scores(train=True, valid=True, xval=True)
+        """
+        return self._delegate_to_metrics('thresholds_and_metric_scores', train=train, valid=valid, xval=xval)
+
     def F1(self, thresholds=None, train=False, valid=False, xval=False):
         """
         Get the F1 value for a set of thresholds.

diff --git a/h2o-py/tests/testdir_algos/gbm/pyunit_gbm_cars_thresholds.py b/h2o-py/tests/testdir_algos/gbm/pyunit_gbm_cars_thresholds.py
@@ -0,0 +1,26 @@
+from builtins import range
+import sys
+sys.path.insert(1,"../../../")
+import h2o
+from tests import pyunit_utils
+from h2o.estimators.gbm import H2OGradientBoostingEstimator
+
+
+def thresholds_gbm():
+    prostate = h2o.import_file(path=pyunit_utils.locate("smalldata/logreg/prostate.csv"))
+    prostate[1] = prostate[1].asfactor()
+    prostate.summary()
+
+    prostate_gbm = H2OGradientBoostingEstimator(nfolds=5, distribution="bernoulli")
+    prostate_gbm.train(x=list(range(2, 9)), y=1, training_frame=prostate)
+    prostate_gbm.show()
+
+    ths_model = prostate_gbm.thresholds_and_metric_scores()
+    ths_perf = prostate_gbm.model_performance(train=True).thresholds_and_metric_scores()
+    pyunit_utils.assert_H2OTwoDimTable_equal_upto(ths_model, ths_perf)
+
+
+if __name__ == "__main__":
+    pyunit_utils.standalone_test(thresholds_gbm)
+else:
+    thresholds_gbm()
diff --git a/h2o-r/h2o-package/R/models.R b/h2o-r/h2o-package/R/models.R
@@ -1667,12 +1667,15 @@ h2o.auuc_table <- function(object, train=FALSE, valid=FALSE) {
 
 #' Retrieve the thresholds and metric scores table
 #'
-#' Retrieves the thresholds and metric scores table from an \linkS4class{H2OBinomialUpliftMetrics}.
-#' The table contains indices, thresholds, all cumulative uplift values and cumulative number of observations.
+#' Retrieves the thresholds and metric scores table from a \linkS4class{H2OBinomialUpliftMetrics} 
+#' or a \linkS4class{H2OBinomialMetrics}.
+#' 
+#' The table contains indices, thresholds, all cumulative uplift values and cumulative number of observations for 
+#' uplift binomial models or thresholds and maximal metric values for binomial models. 
 #' If "train" and "valid" parameters are FALSE (default), then the training table is returned. If more
 #' than one parameter is set to TRUE, then a named vector of tables is returned, where the names are "train", "valid".
 #'
-#' @param object An \linkS4class{H2OBinomialUpliftMetrics}
+#' @param object A \linkS4class{H2OBinomialUpliftMetrics} or a \linkS4class{H2OBinomialMetrics}
 #' @param train Retrieve the training thresholds and metric scores table
 #' @param valid Retrieve the validation thresholds and metric scores table
 #' @examples
@@ -1691,11 +1694,11 @@ h2o.auuc_table <- function(object, train=FALSE, valid=FALSE) {
 #' h2o.thresholds_and_metric_scores(perf)
 #' }
 #' @export
-h2o.thresholds_and_metric_scores <- function(object, train=FALSE, valid=FALSE) {
+h2o.thresholds_and_metric_scores <- function(object, train=FALSE, valid=FALSE, xval=FALSE) {
     if( is(object, "H2OModelMetrics") ) return( object@metrics$thresholds_and_metric_score)
     if( is(object, "H2OModel") ) {
         model.parts <- .model.parts(object)
-        if ( !train && !valid ) {
+        if ( !train && !valid && !xval) {
             metric <- model.parts$tm@metrics$thresholds_and_metric_score
             if ( !is.null(metric) ) return(metric)
         }
@@ -1712,6 +1715,13 @@ h2o.thresholds_and_metric_scores <- function(object, train=FALSE, valid=FALSE) {
                 v_names <- c(v_names,"valid")
             }
         }
+        if ( xval ) {
+            if( is.null(model.parts$xval) ) return(invisible(.warn.no.cross.validation()))
+            else {
+                v <- c(v,model.parts$xm@metrics$thresholds_and_metric_score)
+                v_names <- c(v_names,"xval")
+            }
+        }
         if ( !is.null(v) ) {
             names(v) <- v_names
             if ( length(v)==1 ) { return( v[[1]] ) } else { return( v ) }

diff --git a/h2o-r/tests/testdir_algos/gbm/runit_GBM_bernoulli.R b/h2o-r/tests/testdir_algos/gbm/runit_GBM_bernoulli.R
@@ -44,17 +44,22 @@ test.GBM.bernoulli <- function() {
   Log.info("R Confusion Matrix:")
   print(RCM)
   Log.info("H2O Confusion Matrix:")
-  print(h2o.confusionMatrix(h2o.performance(prostate.h2o)))
+  perf <- h2o.performance(prostate.h2o)  
+  print(h2o.confusionMatrix(perf))
 
   R.auc <- gbm.roc.area(prostate.data$CAPSULE,R.preds)
   Log.info(paste("R AUC:", R.auc, "\tH2O AUC:", h2o.auc(h2o.performance(prostate.h2o))))
 
   # PUBDEV-515
-  f0 = log(mean(prostate.data$CAPSULE)/(1-mean(prostate.data$CAPSULE)))
+  f0 <- log(mean(prostate.data$CAPSULE)/(1-mean(prostate.data$CAPSULE)))
   print(f0)
   print(prostate.h2o@model$init_f)
   expect_equal(prostate.h2o@model$init_f, f0, tolerance=1e-4) ## check the intercept term
 
+  # GH-15889
+  ths_model <- h2o.thresholds_and_metric_scores(prostate.h2o)
+  ths_perf <- h2o.thresholds_and_metric_scores(perf)
+  expect_equal(ths_model, ths_perf)
 }
 
 doTest("GBM Test: prostate.csv with Bernoulli distribution", test.GBM.bernoulli)