Skip to content

Commit

Permalink
Fix API
Browse files Browse the repository at this point in the history
  • Loading branch information
maurever committed Oct 30, 2023
1 parent cd38506 commit 1662f5d
Show file tree
Hide file tree
Showing 5 changed files with 109 additions and 7 deletions.
24 changes: 24 additions & 0 deletions h2o-py/h2o/model/metrics/binomial.py
Original file line number Diff line number Diff line change
Expand Up @@ -952,3 +952,27 @@ def gains_lift_plot(self, type="both", server=False, save_plot_path=None, plot=T
else:
return decorate_plot_result(res=gl)

def thresholds_and_metric_scores(self):
"""Retrieve the thresholds and metric scores table.
:examples:
>>> from h2o.estimators.gbm import H2OGradientBoostingEstimator
>>> local_data = [[1, 'a'],[1, 'a'],[1, 'a'],[1, 'a'],[1, 'a'],
... [1, 'a'],[1, 'a'],[1, 'a'],[1, 'a'],[1, 'a'],
... [0, 'b'],[0, 'b'],[0, 'b'],[0, 'b'],[0, 'b'],
... [0, 'b'],[0, 'b'],[0, 'b'],[0, 'b'],[0, 'b']]
>>> h2o_data = h2o.H2OFrame(local_data)
>>> h2o_data.set_names(['response', 'predictor'])
>>> h2o_data["response"] = h2o_data["response"].asfactor()
>>> gbm = H2OGradientBoostingEstimator(ntrees=1,
... distribution="bernoulli")
>>> gbm.train(x=list(range(1,h2o_data.ncol)),
... y="response",
... training_frame=h2o_data)
>>> perf = gbm.model_performance()
>>> perf.
"""
if 'thresholds_and_metric_scores' in self._metric_json:
return self._metric_json['thresholds_and_metric_scores']
return None
37 changes: 37 additions & 0 deletions h2o-py/h2o/model/models/binomial.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,43 @@

class H2OBinomialModel(ModelBase):

def thresholds_and_metric_scores(self, train=False, valid=False, xval=False):
"""
Get the all thresholds and metric scores in a table.
If all are ``False`` (default), then return the training metric table.
If more than one option is set to ``True``, then return a dictionary of tables where
the keys are "train", "valid", and "xval".
:param bool train: If ``True``, return the thresholds and metric scores table for the training data.
:param bool valid: If ``True``, return the thresholds and metric scores table value for the validation data.
:param bool xval: If ``True``, return the thresholds and metric scores table value for each of the cross-validated splits.
:returns: The thresholds and metric scores tables for the specified key(s).
:examples:
>>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv")
>>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor()
>>> r = cars[0].runif()
>>> train = cars[r > .2]
>>> valid = cars[r <=.2]
>>> response_col = "economy_20mpg"
>>> distribution = "bernoulli"
>>> predictors = ["displacement", "power", "weight", "acceleration", "year"]
>>> from h2o.estimators.gbm import H2OGradientBoostingEstimator
>>> gbm = H2OGradientBoostingEstimator(nfolds=3,
... distribution=distribution,
... fold_assignment="Random")
>>> gbm.train(y=response_col,
... x=predictors,
... validation_frame=valid,
... training_frame=train)
>>> gbm.thresholds_and_metric_scores()# <- Default: return training metric table
>>> gbm.thresholds_and_metric_scores(train=True, valid=True, xval=True)
"""
return self._delegate_to_metrics('thresholds_and_metric_scores', train=train, valid=valid, xval=xval)

def F1(self, thresholds=None, train=False, valid=False, xval=False):
"""
Get the F1 value for a set of thresholds.
Expand Down
26 changes: 26 additions & 0 deletions h2o-py/tests/testdir_algos/gbm/pyunit_gbm_cars_thresholds.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from builtins import range
import sys
sys.path.insert(1,"../../../")
import h2o
from tests import pyunit_utils
from h2o.estimators.gbm import H2OGradientBoostingEstimator


def thresholds_gbm():
prostate = h2o.import_file(path=pyunit_utils.locate("smalldata/logreg/prostate.csv"))
prostate[1] = prostate[1].asfactor()
prostate.summary()

prostate_gbm = H2OGradientBoostingEstimator(nfolds=5, distribution="bernoulli")
prostate_gbm.train(x=list(range(2, 9)), y=1, training_frame=prostate)
prostate_gbm.show()

ths_model = prostate_gbm.thresholds_and_metric_scores()
ths_perf = prostate_gbm.model_performance(train=True).thresholds_and_metric_scores()
pyunit_utils.assert_H2OTwoDimTable_equal_upto(ths_model, ths_perf)


if __name__ == "__main__":
pyunit_utils.standalone_test(thresholds_gbm)
else:
thresholds_gbm()
20 changes: 15 additions & 5 deletions h2o-r/h2o-package/R/models.R
Original file line number Diff line number Diff line change
Expand Up @@ -1667,12 +1667,15 @@ h2o.auuc_table <- function(object, train=FALSE, valid=FALSE) {

#' Retrieve the thresholds and metric scores table
#'
#' Retrieves the thresholds and metric scores table from an \linkS4class{H2OBinomialUpliftMetrics}.
#' The table contains indices, thresholds, all cumulative uplift values and cumulative number of observations.
#' Retrieves the thresholds and metric scores table from a \linkS4class{H2OBinomialUpliftMetrics}
#' or a \linkS4class{H2OBinomialMetrics}.
#'
#' The table contains indices, thresholds, all cumulative uplift values and cumulative number of observations for
#' uplift binomial models or thresholds and maximal metric values for binomial models.
#' If "train" and "valid" parameters are FALSE (default), then the training table is returned. If more
#' than one parameter is set to TRUE, then a named vector of tables is returned, where the names are "train", "valid".
#'
#' @param object An \linkS4class{H2OBinomialUpliftMetrics}
#' @param object A \linkS4class{H2OBinomialUpliftMetrics} or a \linkS4class{H2OBinomialMetrics}
#' @param train Retrieve the training thresholds and metric scores table
#' @param valid Retrieve the validation thresholds and metric scores table
#' @examples
Expand All @@ -1691,11 +1694,11 @@ h2o.auuc_table <- function(object, train=FALSE, valid=FALSE) {
#' h2o.thresholds_and_metric_scores(perf)
#' }
#' @export
h2o.thresholds_and_metric_scores <- function(object, train=FALSE, valid=FALSE) {
h2o.thresholds_and_metric_scores <- function(object, train=FALSE, valid=FALSE, xval=FALSE) {
if( is(object, "H2OModelMetrics") ) return( object@metrics$thresholds_and_metric_score)
if( is(object, "H2OModel") ) {
model.parts <- .model.parts(object)
if ( !train && !valid ) {
if ( !train && !valid && !xval) {
metric <- model.parts$tm@metrics$thresholds_and_metric_score
if ( !is.null(metric) ) return(metric)
}
Expand All @@ -1712,6 +1715,13 @@ h2o.thresholds_and_metric_scores <- function(object, train=FALSE, valid=FALSE) {
v_names <- c(v_names,"valid")
}
}
if ( xval ) {
if( is.null(model.parts$xval) ) return(invisible(.warn.no.cross.validation()))
else {
v <- c(v,model.parts$xm@metrics$thresholds_and_metric_score)
v_names <- c(v_names,"xval")
}
}
if ( !is.null(v) ) {
names(v) <- v_names
if ( length(v)==1 ) { return( v[[1]] ) } else { return( v ) }
Expand Down
9 changes: 7 additions & 2 deletions h2o-r/tests/testdir_algos/gbm/runit_GBM_bernoulli.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,22 @@ test.GBM.bernoulli <- function() {
Log.info("R Confusion Matrix:")
print(RCM)
Log.info("H2O Confusion Matrix:")
print(h2o.confusionMatrix(h2o.performance(prostate.h2o)))
perf <- h2o.performance(prostate.h2o)
print(h2o.confusionMatrix(perf))

R.auc <- gbm.roc.area(prostate.data$CAPSULE,R.preds)
Log.info(paste("R AUC:", R.auc, "\tH2O AUC:", h2o.auc(h2o.performance(prostate.h2o))))

# PUBDEV-515
f0 = log(mean(prostate.data$CAPSULE)/(1-mean(prostate.data$CAPSULE)))
f0 <- log(mean(prostate.data$CAPSULE)/(1-mean(prostate.data$CAPSULE)))
print(f0)
print(prostate.h2o@model$init_f)
expect_equal(prostate.h2o@model$init_f, f0, tolerance=1e-4) ## check the intercept term

# GH-15889
ths_model <- h2o.thresholds_and_metric_scores(prostate.h2o)
ths_perf <- h2o.thresholds_and_metric_scores(perf)
expect_equal(ths_model, ths_perf)
}

doTest("GBM Test: prostate.csv with Bernoulli distribution", test.GBM.bernoulli)

0 comments on commit 1662f5d

Please sign in to comment.