From 0df0ebc254c680109e379e9fa104d92169fad364 Mon Sep 17 00:00:00 2001 From: sebhrusen Date: Mon, 18 Sep 2023 19:57:59 +0200 Subject: [PATCH] add R tests --- .../H2O_Module/pyunit_h2oexport_file.py | 4 ++ ..._export_file.R => runit_export_file_csv.R} | 14 +++-- .../testdir_misc/runit_export_file_parquet.R | 55 +++++++++++++++++++ .../runit_export_parquet_multipart.R | 45 --------------- 4 files changed, 68 insertions(+), 50 deletions(-) rename h2o-r/tests/testdir_misc/{runit_export_file.R => runit_export_file_csv.R} (84%) create mode 100644 h2o-r/tests/testdir_misc/runit_export_file_parquet.R delete mode 100644 h2o-r/tests/testdir_misc/runit_export_parquet_multipart.R diff --git a/h2o-py/tests/testdir_apis/H2O_Module/pyunit_h2oexport_file.py b/h2o-py/tests/testdir_apis/H2O_Module/pyunit_h2oexport_file.py index 964bc6b5e05e..7d3f92ae467c 100644 --- a/h2o-py/tests/testdir_apis/H2O_Module/pyunit_h2oexport_file.py +++ b/h2o-py/tests/testdir_apis/H2O_Module/pyunit_h2oexport_file.py @@ -37,6 +37,7 @@ def export_file_parquet(): if os.path.isdir(export_dir): shutil.rmtree(export_dir, ignore_errors=True) h2o.export_file(data, path=export_dir, format='parquet') + assert os.path.isdir(export_dir) assert any(os.path.splitext(f)[1] == '.crc' for f in os.listdir(export_dir)) @@ -48,6 +49,9 @@ def export_file_parquet_no_checksum(): if os.path.isdir(export_dir): shutil.rmtree(export_dir, ignore_errors=True) h2o.export_file(data, path=export_dir, format='parquet', write_checksum=False) + + assert os.path.isdir(export_dir) + assert os.listdir(export_dir) assert not any(os.path.splitext(f)[1] == '.crc' for f in os.listdir(export_dir)) diff --git a/h2o-r/tests/testdir_misc/runit_export_file.R b/h2o-r/tests/testdir_misc/runit_export_file_csv.R similarity index 84% rename from h2o-r/tests/testdir_misc/runit_export_file.R rename to h2o-r/tests/testdir_misc/runit_export_file_csv.R index ea51a108f500..32ac1130e119 100644 --- a/h2o-r/tests/testdir_misc/runit_export_file.R +++ b/h2o-r/tests/testdir_misc/runit_export_file_csv.R @@ -25,7 +25,7 @@ test.export.file <- function(parts) { dname <- file.path(sandbox(), fname) Log.info("Exporting File...") - h2o.exportFile(mypred, dname, parts = parts) + h2o.exportFile(mypred, dname, parts = parts, force = TRUE) Log.info("Comparing file with R...") rfiles <- ifelse(parts > 1, list.files(dname, full.names = TRUE), dname) @@ -42,10 +42,14 @@ test.export.file <- function(parts) { print(head(H.pred)) expect_equal(R.pred, H.pred) + return(dname) } -test.export.file.single <- function() test.export.file(1) -test.export.file.multipart <- function() test.export.file(2) +test.export.file.csv.single <- function() test.export.file(1) +test.export.file.csv.multipart <- function() test.export.file(2) + +doSuite("Testing Exporting Files CSV", makeSuite( + test.export.file.csv.single, + test.export.file.csv.multipart, +)) -doTest("Testing Exporting Files (single file)", test.export.file.single) -doTest("Testing Exporting Files (part files)", test.export.file.multipart) diff --git a/h2o-r/tests/testdir_misc/runit_export_file_parquet.R b/h2o-r/tests/testdir_misc/runit_export_file_parquet.R new file mode 100644 index 000000000000..640df80dd85d --- /dev/null +++ b/h2o-r/tests/testdir_misc/runit_export_file_parquet.R @@ -0,0 +1,55 @@ +setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f"))) +source("../../scripts/h2o-r-test-setup.R") + + +#Export file with h2o.export_file and compare with R counterpart when re importing file to check for parity. + + +test.export.file <- function(path, write_checksum = TRUE) { + data <- h2o.uploadFile(locate(path)) + + fname <- paste(paste0(sample(letters, 3, replace = TRUE), collapse = ""), + paste0(sample(0:9, 3, replace = TRUE), collapse = ""), paste0(data$id, ".parquet"), sep = "_") + dname <- file.path(sandbox(), fname) + + Log.info("Exporting File...") + h2o.exportFile(data, dname, format = "parquet", force=TRUE, write_checksum = write_checksum) + + files <- list.files(dname, full.names = TRUE) + print(files) + + Log.info("Comparing file with R...") + rfiles <- ifelse( length(files) > 1, list.files(dname, full.names = TRUE), dname) + Log.info(sprintf("Results stored in files: %s", paste(rfiles, collapse = ", "))) + + imported <- h2o.importFolder(path = dname, pattern = "part-m-") + + if (length(files) == 1) { + expect_equal(imported, data) + } else { + expect_equal(mean(imported), mean(data)) + } + return(dname) +} + +test.export.file.prostate <- function() test.export.file("smalldata/prostate/prostate.csv") +test.export.file.airquality_train1 <- function() test.export.file("smalldata/testng/airquality_train1.csv") +test.export.file.autoclaims <- function() test.export.file("smalldata/gbm_test/autoclaims.csv") +test.export.file.item_demand <- function() test.export.file("smalldata/demos/item_demand.csv") + +test.export.file.titanic_expanded <- function() { + export_dir <- test.export.file("smalldata/titanic/titanic_expanded.csv") + expect_gt(length(list.files(path=export_dir, pattern="\\.crc$", all.files=TRUE, full.names=FALSE)), 0) +} +test.export.file.titanic_expanded.no_checksum<- function() { + export_dir <- test.export.file("smalldata/titanic/titanic_expanded.csv", write_checksum = FALSE) + expect_equal(length(list.files(path=export_dir, pattern="\\.crc$", all.files=TRUE, full.names=FALSE)), 0) +} + doSuite("Testing Exporting Parquet Files", makeSuite( + test.export.file.prostate, + test.export.file.airquality_train1, + test.export.file.autoclaims, + test.export.file.item_demand, + test.export.file.titanic_expanded, + test.export.file.titanic_expanded.no_checksum + )) diff --git a/h2o-r/tests/testdir_misc/runit_export_parquet_multipart.R b/h2o-r/tests/testdir_misc/runit_export_parquet_multipart.R deleted file mode 100644 index fc0fd0bdf478..000000000000 --- a/h2o-r/tests/testdir_misc/runit_export_parquet_multipart.R +++ /dev/null @@ -1,45 +0,0 @@ -setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f"))) -source("../../scripts/h2o-r-test-setup.R") - - -#Export file with h2o.export_file and compare with R counterpart when re importing file to check for parity. - - -test.export.file <- function(path) { - data <- h2o.uploadFile(locate(path)) - - fname <- paste(paste0(sample(letters, 3, replace = TRUE), collapse = ""), - paste0(sample(0:9, 3, replace = TRUE), collapse = ""), "prostate.parquet", sep = "_") - dname <- file.path(sandbox(), fname) - - Log.info("Exporting File...") - h2o.exportFile(data, dname, format = "parquet", force=TRUE) - - files <- list.files(dname, full.names = TRUE) - print(files) - - Log.info("Comparing file with R...") - rfiles <- ifelse( length(files) > 1, list.files(dname, full.names = TRUE), dname) - Log.info(sprintf("Results stored in files: %s", paste(rfiles, collapse = ", "))) - - imported <- h2o.importFolder(path = dname, pattern = "part-m-") - - if (length(files) == 1) { - expect_equal(imported, data) - } else { - expect_equal(mean(imported), mean(data)) - } - -} - - test.export.file.prostate <- function() test.export.file("smalldata/prostate/prostate.csv") - test.export.file.titanic_expanded <- function() test.export.file("smalldata/titanic/titanic_expanded.csv") - test.export.file.airquality_train1 <- function() test.export.file("smalldata/testng/airquality_train1.csv") - test.export.file.autoclaims <- function() test.export.file("smalldata/gbm_test/autoclaims.csv") - test.export.file.item_demand <- function() test.export.file("smalldata/demos/item_demand.csv") - - doTest("Testing Exporting Parquet Files (prostate)", test.export.file.prostate) - doTest("Testing Exporting Parquet Files (titanic_expanded)", test.export.file.titanic_expanded) - doTest("Testing Exporting Parquet Files (airquality_train1)", test.export.file.airquality_train1) - doTest("Testing Exporting Parquet Files (autoclaims)", test.export.file.autoclaims) - doTest("Testing Exporting Parquet Files (item_demand)", test.export.file.item_demand)