Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-15894: Make sure the functions that are supposed to be exported are exported in the R package #15899

Merged
merged 8 commits into from
Nov 10, 2023
4 changes: 2 additions & 2 deletions h2o-docs/src/product/data-science/model_selection.rst
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ Examples
4.8526636043 0.7153633278 0.0069487980 -0.0584344031 0.0791810013 0.4353149856 0.0126060611 -0.0005196059

# Retrieve the list of coefficients for a subset size of 3:
coeff3 <- h2o.coeff(allsubsetsModel, 3)
coeff3 <- h2o.coef(allsubsetsModel, 3)
print(coeff3)
[[3]]
Intercept CAPSULE DCAPS PSA
Expand Down Expand Up @@ -351,7 +351,7 @@ Examples

# To find out which variables get removed, build a new model with ``mode = "backward``
# using the above training information:
bwModel = h2o.modelSelection(x = predictors,
bwModel <- h2o.modelSelection(x = predictors,
y = response,
training_frame = prostate,
seed = 12345,
Expand Down
1 change: 1 addition & 0 deletions h2o-r/h2o-package/R/classes.R
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,7 @@ coef.H2OCoxPHModelSummary <- function(object, ...) object@summary$coefficients
#' @param fit an \code{H2OCoxPHModel} object.
#' @param scale optional numeric specifying the scale parameter of the model.
#' @param k numeric specifying the weight of the equivalent degrees of freedom.
#' @export
extractAIC.H2OCoxPHModel <- function(fit, scale, k = 2, ...) {
fun <- get("extractAIC.coxph", getNamespace("stats"))
if (missing(scale))
Expand Down
4 changes: 0 additions & 4 deletions h2o-r/h2o-package/R/logging.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,6 @@ NULL
write(x = message, file = .h2o.getLogFileName(), append = TRUE)
}

h2o.logIt <- function(m, tmp, commandOrErr, isPost = TRUE) {
# Legacy. Do nothing.
}

#' Start Writing H2O R Logs
#'
#' Begin logging H2o R POST commands and error responses to local disk. Used
Expand Down
114 changes: 114 additions & 0 deletions h2o-r/h2o-package/R/models.R
Original file line number Diff line number Diff line change
Expand Up @@ -755,11 +755,125 @@ predict_leaf_node_assignment.H2OModel <- function(object, newdata, type = c("Pat
#' @export
h2o.predict_leaf_node_assignment <- predict_leaf_node_assignment.H2OModel

#' Use GRLM to transform a frame.
#'
#' @param model H2O GRLM model
#' @param fr H2OFrame
#' @return Returns a transformed frame
#'
#' @examples
#' \dontrun{
#' library(h2o)
#' h2o.init()
#'
#' # Import the USArrests dataset into H2O:
#' arrests <- h2o.importFile(
#' "https://s3.amazonaws.com/h2o-public-test-data/smalldata/pca_test/USArrests.csv"
#' )
#'
#' # Split the dataset into a train and valid set:
#' arrests_splits <- h2o.splitFrame(data = arrests, ratios = 0.8, seed = 1234)
#' train <- arrests_splits[[1]]
#' valid <- arrests_splits[[2]]
#'
#' # Build and train the model:
#' glrm_model = h2o.glrm(training_frame = train,
#' k = 4,
#' loss = "Quadratic",
#' gamma_x = 0.5,
#' gamma_y = 0.5,
#' max_iterations = 700,
#' recover_svd = TRUE,
#' init = "SVD",
#' transform = "STANDARDIZE")
#'
#' # Eval performance:
#' arrests_perf <- h2o.performance(glrm_model)
#'
#' # Generate predictions on a validation set (if necessary):
#' arrests_pred <- h2o.predict(glrm_model, newdata = valid)
#'
#' # Transform the data using the dataset "valid" to retrieve the new coefficients:
#' glrm_transform <- h2o.transform_frame(glrm_model, valid)
#'}
#' @export
h2o.transform_frame <- function(model, fr) {
if (!is(model, "H2OModel") || (is(model, "H2OModel") && model@algorithm != "glrm")) stop("h2o.transform_frame can only be applied to GLRM H2OModel instance.")
return(.newExpr("transform", model@model_id, h2o.getId(fr)))
}

#' Retrieve the results to view the best predictor subsets.
#'
#' @param model H2OModelSelection object
#' @return Returns an H2OFrame object
#'
#' @examples
#' \dontrun{
#' library(h2o)
#' h2o.init()
#'
#' # Import the prostate dataset:
#' prostate <- h2o.importFile("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
#'
#' # Set the predictors & response:
#' predictors <- c("AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS")
#' response <- "GLEASON"
#'
#' # Build & train the model:
#' allsubsetsModel <- h2o.modelSelection(x = predictors,
#' y = response,
#' training_frame = prostate,
#' seed = 12345,
#' max_predictor_number = 7,
#' mode = "allsubsets")
#'
#' # Retrieve the results (H2OFrame containing best model_ids, best_r2_value, & predictor subsets):
#' results <- h2o.result(allsubsetsModel)
#' print(results)
#'
#' # Retrieve the list of coefficients:
#' coeff <- h2o.coef(allsubsetsModel)
#' print(coeff)
#'
#' # Retrieve the list of coefficients for a subset size of 3:
#' coeff3 <- h2o.coef(allsubsetsModel, 3)
#' print(coeff3)
#'
#' # Retrieve the list of standardized coefficients:
#' coeff_norm <- h2o.coef_norm(allsubsetsModel)
#' print(coeff_norm)
#'
#' # Retrieve the list of standardized coefficients for a subset size of 3:
#' coeff_norm3 <- h2o.coef_norm(allsubsetsModel)
#' print(coeff_norm3)
#'
#' # Check the variables that were added during this process:
#' h2o.get_predictors_added_per_step(allsubsetsModel)
#'
#' # To find out which variables get removed, build a new model with ``mode = "backward``
#' # using the above training information:
#' bwModel <- h2o.modelSelection(x = predictors,
#' y = response,
#' training_frame = prostate,
#' seed = 12345,
#' max_predictor_number = 7,
#' mode = "backward")
#' h2o.get_predictors_removed_per_step(bwModel)
#'
#' # To build the fastest model with ModelSelection, use ``mode = "maxrsweep"``:
#' sweepModel <- h2o.modelSelection(x = predictors,
#' y = response,
#' training_frame = prostate,
#' mode = "maxrsweep",
#' build_glm_model = FALSE,
#' max_predictor_number = 3,
#' seed = 12345)
#'
#' # Retrieve the results to view the best predictor subsets:
#' h2o.result(sweepModel)
#' '}
#'
#' @export
h2o.result <- function(model) {
if (!is(model, "H2OModel")) stop("h2o.result can only be applied to H2OModel instances with constant results")
return(.newExpr("result", model@model_id))
Expand Down
1 change: 1 addition & 0 deletions h2o-r/h2o-package/R/tf-idf.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#'
#' @return resulting frame with TF-IDF values.
#' Row format: documentID, word, TF, IDF, TF-IDF
#' @export
h2o.tf_idf <- function(frame, document_id_col, text_col, preprocess=TRUE, case_sensitive=TRUE) {
col_indices <- c()
for (col in c(document_id_col, text_col))
Expand Down
7 changes: 7 additions & 0 deletions h2o-r/h2o-package/R/zzz.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@
registerS3method("print", "H2OExplanation", "print.H2OExplanation")
.s3_register("repr", "repr_text", "H2OExplanation")
.s3_register("repr", "repr_html", "H2OExplanation")

# CoxPH
.s3_register("survival", "survfit", "H2OCoxPHModel")
Copy link
Contributor Author

@tomasfryda tomasfryda Nov 3, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registering a class with S3 method in one package to a generic from another package is hard in R (unless it's some of the packages that are loaded by default in the package environment (base) - for those we can use the registerS3Method).


# H2OFrame
.s3_register("stats", "median", "H2OFrame")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

stats package is not imported in the package namespace so it has to be specified here.

Confusingly, min, mean, max are defined in base package that is imported so they don't have to be listed here.


invisible()
}

Expand Down
6 changes: 2 additions & 4 deletions h2o-r/h2o-package/pkgdown/_pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,8 @@ reference:
- h2o.get_best_model_predictors
- h2o.get_knot_locations
- h2o.get_gam_knot_column_names
- h2o.get_predictor_added_per_step
- h2o.get_predictor_removed_per_step
- h2o.get_predictors_added_per_step
- h2o.get_predictors_removed_per_step
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(missing s in predictorS )

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch!

- h2o.get_regression_influence_diagnostics
- h2o.get_variable_inflation_factors
- h2o.glm
Expand Down Expand Up @@ -333,8 +333,6 @@ reference:
- h2o.tanh
- h2o.target_encode_apply
- h2o.target_encode_create
- h2o.target_encode_fit
- h2o.target_encode_transform
- h2o.targetencoder
- h2o.tf_idf
- h2o.toFrame
Expand Down
32 changes: 22 additions & 10 deletions h2o-r/scripts/h2o-r-test-setup.R
Original file line number Diff line number Diff line change
Expand Up @@ -182,16 +182,28 @@ function() {
strict_version_check <- TRUE
} else if (IS.RUNIT) {
# source h2o-r/h2o-package/R. overrides h2o package load
to_src <- c("aggregator.R", "classes.R", "connection.R","config.R", "constants.R", "logging.R", "communication.R",
"kvstore.R", "frame.R", "targetencoder.R", "astfun.R","automl.R", "import.R", "parse.R", "export.R", "models.R", "edicts.R",
"coxph.R", "coxphutils.R", "gbm.R", "glm.R", "gam.R", "anovaglm.R", "glrm.R", "kmeans.R", "deeplearning.R", "randomforest.R", "generic.R",
"naivebayes.R", "pca.R", "svd.R", "locate.R", "grid.R", "word2vec.R", "w2vutils.R", "stackedensemble.R", "rulefit.R", "modelselection.R",
"predict.R", "xgboost.R", "isolationforest.R", "psvm.R", "segment.R", "tf-idf.R", "explain.R", "permutation_varimp.R", "extendedisolationforest.R",
"upliftrandomforest.R", "infogram.R", "isotonicregression.R", "admissibleml.R", "decisiontree.R", "adaboost.R")

src_path <- paste(h2oRDir,"h2o-package","R",sep=.Platform$file.sep)
invisible(lapply(to_src,function(x){source(paste(src_path, x, sep = .Platform$file.sep))}))
# to_src <- c("aggregator.R", "classes.R", "connection.R","config.R", "constants.R", "logging.R", "communication.R",
# "kvstore.R", "frame.R", "targetencoder.R", "astfun.R","automl.R", "import.R", "parse.R", "export.R", "models.R", "edicts.R",
# "coxph.R", "coxphutils.R", "gbm.R", "glm.R", "gam.R", "anovaglm.R", "glrm.R", "kmeans.R", "deeplearning.R", "randomforest.R", "generic.R",
# "naivebayes.R", "pca.R", "svd.R", "locate.R", "grid.R", "word2vec.R", "w2vutils.R", "stackedensemble.R", "rulefit.R", "modelselection.R",
# "predict.R", "xgboost.R", "isolationforest.R", "psvm.R", "segment.R", "tf-idf.R", "explain.R", "permutation_varimp.R", "extendedisolationforest.R",
# "upliftrandomforest.R", "infogram.R", "isotonicregression.R", "admissibleml.R", "decisiontree.R", "adaboost.R")

# src_path <- paste(h2oRDir,"h2o-package","R",sep=.Platform$file.sep)
# invisible(lapply(to_src,function(x){source(paste(src_path, x, sep = .Platform$file.sep))}))
devtools::load_all(paste(h2oRDir,"h2o-package",sep=.Platform$file.sep), export_all=FALSE)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

export_all = FALSE tells devtools to just export those methods that are supposed to be exported so this helps to find out which methods are not exported but used in the tests.


additional_imports <- c(
maurever marked this conversation as resolved.
Show resolved Hide resolved
".h2o.__remoteSend", ".as.survival.coxph.model", ".as.survival.coxph.summary",
".getExpanded", ".str.list", "is.H2OFrame", ".get.session.property", ".set.session.property",
".h2o.maximizing_metrics", ".h2o.doSafeGET", ".parse.h2oconfig", ".h2o.check_java_version",
"cut.H2OFrame", "as.data.frame.H2OFrame", ".h2o.perfect_auc", ".newExpr", ".h2o.doSafeREST",
".h2o.fromJSON"
)

for (fn in additional_imports)
assign(fn, do.call(`:::`, list(quote(h2o), fn)), envir = .GlobalEnv)

# source h2o-r/tests/runitUtils
to_src <- c("utilsR.R", "pcaR.R", "deeplearningR.R", "glmR.R", "glrmR.R",
"gbmR.R", "kmeansR.R", "naivebayesR.R", "gridR.R", "shared_javapredict.R")
Expand All @@ -209,7 +221,7 @@ function() {
masterSeedFile <- paste(getwd(), "/master_seed", sep = "")
if (file.exists(masterSeedFile)) seed <- read.table(masterSeedFile)[[1]]
setupSeed(seed)
h2o.logIt("[SEED] :", SEED)
cat("[SEED] :", SEED, "\n")

if (!is.na(USERNAME)) {
Log.info(paste0("Authenticating with http-basic, username: ", USERNAME, "."))
Expand Down
7 changes: 5 additions & 2 deletions h2o-r/tests/runitUtils/utilsR.R
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,13 @@ function(cur.dir, root, root.parent = NULL) {
default.packages <-
function() {
to_require <- c("jsonlite", "RCurl", "RUnit", "R.utils", "testthat", "ade4", "glmnet", "gbm", "ROCR", "e1071",
"tools", "statmod", "fpc", "cluster")
"tools", "statmod", "fpc", "cluster", "survival")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for CoxPH tests

if (Sys.info()['sysname'] == "Windows") {
options(RCurlOptions = list(cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl"))) }
invisible(lapply(to_require,function(x){require(x,character.only=TRUE,quietly=TRUE,warn.conflicts=FALSE)}))
invisible(lapply(to_require,function(x) {
if (!require(x,character.only=TRUE,quietly=TRUE,warn.conflicts=FALSE))
warning(paste("Package", x, "is not available."), call. = FALSE)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mainly to make it easier for us to find out why some test might not work. We import packages and if unsuccessful we used to silently fail, after this change we should see that some package is not available (and R names are sometimes quite obscure so it's hard to guess if we are missing a package or badly exporting something or both (survfit.H2OCoxPHModel)).

}))
}

read.zip<-
Expand Down
2 changes: 1 addition & 1 deletion h2o-r/tests/testdir_algos/coxph/runit_coxph_cancer.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ test.CoxPH.cancer <- function() {
expect_equal(fit$var, .as.survival.coxph.model(hex.fit@model)$var, tolerance = 1e-8)
}

doTest("CoxPH: Cancer Test", test.CoxPH.cancer)
doTest("CoxPH: Cancer Test", test.CoxPH.cancer)