Skip to content

Commit

Permalink
add R tests
Browse files Browse the repository at this point in the history
  • Loading branch information
sebhrusen committed Sep 18, 2023
1 parent 70c91c6 commit 0df0ebc
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 50 deletions.
4 changes: 4 additions & 0 deletions h2o-py/tests/testdir_apis/H2O_Module/pyunit_h2oexport_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def export_file_parquet():
if os.path.isdir(export_dir):
shutil.rmtree(export_dir, ignore_errors=True)
h2o.export_file(data, path=export_dir, format='parquet')

assert os.path.isdir(export_dir)
assert any(os.path.splitext(f)[1] == '.crc' for f in os.listdir(export_dir))

Expand All @@ -48,6 +49,9 @@ def export_file_parquet_no_checksum():
if os.path.isdir(export_dir):
shutil.rmtree(export_dir, ignore_errors=True)
h2o.export_file(data, path=export_dir, format='parquet', write_checksum=False)

assert os.path.isdir(export_dir)
assert os.listdir(export_dir)
assert not any(os.path.splitext(f)[1] == '.crc' for f in os.listdir(export_dir))


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ test.export.file <- function(parts) {
dname <- file.path(sandbox(), fname)

Log.info("Exporting File...")
h2o.exportFile(mypred, dname, parts = parts)
h2o.exportFile(mypred, dname, parts = parts, force = TRUE)

Log.info("Comparing file with R...")
rfiles <- ifelse(parts > 1, list.files(dname, full.names = TRUE), dname)
Expand All @@ -42,10 +42,14 @@ test.export.file <- function(parts) {
print(head(H.pred))

expect_equal(R.pred, H.pred)
return(dname)
}

test.export.file.single <- function() test.export.file(1)
test.export.file.multipart <- function() test.export.file(2)
test.export.file.csv.single <- function() test.export.file(1)
test.export.file.csv.multipart <- function() test.export.file(2)

doSuite("Testing Exporting Files CSV", makeSuite(
test.export.file.csv.single,
test.export.file.csv.multipart,
))

doTest("Testing Exporting Files (single file)", test.export.file.single)
doTest("Testing Exporting Files (part files)", test.export.file.multipart)
55 changes: 55 additions & 0 deletions h2o-r/tests/testdir_misc/runit_export_file_parquet.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f")))
source("../../scripts/h2o-r-test-setup.R")


#Export file with h2o.export_file and compare with R counterpart when re importing file to check for parity.


test.export.file <- function(path, write_checksum = TRUE) {
data <- h2o.uploadFile(locate(path))

fname <- paste(paste0(sample(letters, 3, replace = TRUE), collapse = ""),
paste0(sample(0:9, 3, replace = TRUE), collapse = ""), paste0(data$id, ".parquet"), sep = "_")
dname <- file.path(sandbox(), fname)

Log.info("Exporting File...")
h2o.exportFile(data, dname, format = "parquet", force=TRUE, write_checksum = write_checksum)

files <- list.files(dname, full.names = TRUE)
print(files)

Log.info("Comparing file with R...")
rfiles <- ifelse( length(files) > 1, list.files(dname, full.names = TRUE), dname)
Log.info(sprintf("Results stored in files: %s", paste(rfiles, collapse = ", ")))

imported <- h2o.importFolder(path = dname, pattern = "part-m-")

if (length(files) == 1) {
expect_equal(imported, data)
} else {
expect_equal(mean(imported), mean(data))
}
return(dname)
}

test.export.file.prostate <- function() test.export.file("smalldata/prostate/prostate.csv")
test.export.file.airquality_train1 <- function() test.export.file("smalldata/testng/airquality_train1.csv")
test.export.file.autoclaims <- function() test.export.file("smalldata/gbm_test/autoclaims.csv")
test.export.file.item_demand <- function() test.export.file("smalldata/demos/item_demand.csv")

test.export.file.titanic_expanded <- function() {
export_dir <- test.export.file("smalldata/titanic/titanic_expanded.csv")
expect_gt(length(list.files(path=export_dir, pattern="\\.crc$", all.files=TRUE, full.names=FALSE)), 0)
}
test.export.file.titanic_expanded.no_checksum<- function() {
export_dir <- test.export.file("smalldata/titanic/titanic_expanded.csv", write_checksum = FALSE)
expect_equal(length(list.files(path=export_dir, pattern="\\.crc$", all.files=TRUE, full.names=FALSE)), 0)
}
doSuite("Testing Exporting Parquet Files", makeSuite(
test.export.file.prostate,
test.export.file.airquality_train1,
test.export.file.autoclaims,
test.export.file.item_demand,
test.export.file.titanic_expanded,
test.export.file.titanic_expanded.no_checksum
))
45 changes: 0 additions & 45 deletions h2o-r/tests/testdir_misc/runit_export_parquet_multipart.R

This file was deleted.

0 comments on commit 0df0ebc

Please sign in to comment.