From c3a4d53b39ebe851ac1857206d2b4ef6807beba5 Mon Sep 17 00:00:00 2001 From: LukaszChrostowski Date: Fri, 4 Oct 2024 18:22:42 +0200 Subject: [PATCH] add more unit tests small fixes regarding estimation with pop totals --- DESCRIPTION | 3 +- NAMESPACE | 5 +- R/internals.R | 31 ++- R/nonprobDR.R | 9 +- R/nonprobMI.R | 3 +- R/simple_methods.R | 127 +++++------ inst/tinytest/test-2-ipw-totals.R | 16 +- inst/tinytest/test-3-mi-totals.R | 238 ++++++++++++++++++++ inst/tinytest/test-4-dr-totals.R | 269 +++++++++++++++++++++++ inst/tinytest/test_nonprobsvy.R | 60 +++++- inst/tinytest/test_nonprobsvy_main.R | 312 +++++++++++++++++++++++++++ man/median.nonprobsvy.Rd | 23 -- man/quantile.nonprobsvy.Rd | 27 --- 13 files changed, 981 insertions(+), 142 deletions(-) create mode 100644 inst/tinytest/test-3-mi-totals.R create mode 100644 inst/tinytest/test-4-dr-totals.R delete mode 100644 man/median.nonprobsvy.Rd delete mode 100644 man/quantile.nonprobsvy.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 34b80db..080ce4c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -45,7 +45,8 @@ Imports: nleqslv, doParallel, foreach, - parallel + parallel, + formula.tools LinkingTo: Rcpp, RcppArmadillo diff --git a/NAMESPACE b/NAMESPACE index 427d3a3..2ec47a4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -34,8 +34,8 @@ importFrom(Rcpp,sourceCpp) importFrom(doParallel,registerDoParallel) importFrom(foreach,"%dopar%") importFrom(foreach,foreach) -importFrom(formula.tools,lhs) -importFrom(formula.tools,rhs) +importFrom(formula.tools,lhs.vars) +importFrom(formula.tools,rhs.vars) importFrom(maxLik,maxLik) importFrom(ncvreg,cv.ncvreg) importFrom(nleqslv,nleqslv) @@ -43,6 +43,7 @@ importFrom(parallel,makeCluster) importFrom(parallel,stopCluster) importFrom(stats,AIC) importFrom(stats,BIC) +importFrom(stats,aggregate) importFrom(stats,as.formula) importFrom(stats,binomial) importFrom(stats,coef) diff --git a/R/internals.R b/R/internals.R index ceeb111..714dc4d 100644 --- a/R/internals.R +++ b/R/internals.R @@ -151,20 +151,29 @@ get_method <- function(method) { } ff <- function(formula) { - fff <- as.character(formula) - f <- strsplit(fff[2], "\\s*\\+\\s*")[[1]] - outcome_formulas <- list() - if (any(duplicated(f))) { - warning("No unique names of the outcome variables in formula. The error has been corrected") - f <- unique(f) - } - l <- length(f) - for (i in 1:l) { - outcome_formulas[[i]] <- as.formula(paste(f[i], fff[3], sep = " ~ ")) + formula_string <- paste(deparse(formula), collapse = " ") + formula_parts <- strsplit(formula_string, "~")[[1]] + if (length(formula_parts) != 2) { + stop("The formula must contain exactly one '~' operator.") } + lhs <- trimws(formula_parts[1]) + rhs <- trimws(formula_parts[2]) + + dependent_vars <- strsplit(lhs, "\\s*\\+\\s*")[[1]] + independent_vars <- strsplit(rhs, "\\s*\\+\\s*")[[1]] + + if (any(duplicated(dependent_vars))) { + warning("Duplicate dependent variable names detected. They have been made unique.") + dependent_vars <- unique(dependent_vars) + } + outcome_formulas <- vector("list", length(dependent_vars)) + l <- length(dependent_vars) + for (i in seq_along(dependent_vars)) { + outcome_formulas[[i]] <- reformulate(termlabels = independent_vars, response = dependent_vars[i]) + } list( - f = f, + f = dependent_vars, outcomes = outcome_formulas, l = l ) diff --git a/R/nonprobDR.R b/R/nonprobDR.R index 61d9910..2686013 100644 --- a/R/nonprobDR.R +++ b/R/nonprobDR.R @@ -521,6 +521,9 @@ nonprobDR <- function(selection, X_rand <- OutcomeModel$X_rand nons_names <- OutcomeModel$nons_names y_nons <- OutcomeModel$y_nons + n_nons <- nrow(X_nons) + R <- rep(1, n_nons) + loc_nons <- which(R == 1) if (var_selection == TRUE) { nlambda <- control_outcome$nlambda @@ -559,17 +562,19 @@ nonprobDR <- function(selection, colnames(X) <- c("(Intercept)", colnames(Xsel)) OutcomeModel$X_nons <- SelectionModel$X_nons <- X[loc_nons, ] SelectionModel$pop_totals <- c(SelectionModel$pop_totals[1], SelectionModel$pop_totals[idx + 1]) + pop_totals <- c(pop_totals[1], pop_totals[idx+1]) } else if (control_inference$bias_inf == "div") { X_outcome <- as.matrix(X[, beta_selected[-1] + 1, drop = FALSE]) Xsel <- X_selection <- as.matrix(X[, theta_selected[-1] + 1, drop = FALSE]) OutcomeModel$X_nons <- cbind(1, X_outcome[loc_nons, ]) - OutcomeModel$X_rand <- cbind(1, X_outcome[loc_rand, ]) - colnames(OutcomeModel$X_nons) <- colnames(OutcomeModel$X_rand) <- c("(Intercept)", colnames(X_outcome)) + colnames(OutcomeModel$X_nons) <- c("(Intercept)", colnames(X_outcome)) SelectionModel$pop_totals <- c(SelectionModel$pop_totals[1], SelectionModel$pop_totals[theta_selected[-1] + 1]) SelectionModel$X_nons <- cbind(1, X_selection[loc_nons, ]) X <- cbind(1, Xsel) colnames(X) <- colnames(SelectionModel$X_nons) <- c("(Intercept)", colnames(Xsel)) + pop_totals <- c(pop_totals[1], pop_totals[theta_selected[-1] + 1]) } + SelectionModel$total_names <- names(SelectionModel$pop_totals) } prob_pop_totals <- SelectionModel$pop_totals diff --git a/R/nonprobMI.R b/R/nonprobMI.R index 17c12ed..d1c2258 100644 --- a/R/nonprobMI.R +++ b/R/nonprobMI.R @@ -268,6 +268,7 @@ nonprobMI <- function(outcome, n_rand <- 0 weights_rand <- NULL n_nons <- nrow(X_nons) + R <- rep(1, n_nons) ############## WORKING VERSION if (var_selection == TRUE) { @@ -316,7 +317,7 @@ nonprobMI <- function(outcome, control = control_outcome, n_nons = n_nons, n_rand = n_rand, - model_frame = OutcomeModel$model_frame_rand, + model_frame = Model$model_frame_rand, vars_selection = control_inference$vars_selection, pop_totals = pop_totals ) diff --git a/R/simple_methods.R b/R/simple_methods.R index c502f42..1a937fb 100644 --- a/R/simple_methods.R +++ b/R/simple_methods.R @@ -328,10 +328,10 @@ deviance.nonprobsvy <- function(object, #' #' @method total nonprobsvy #' @return A vector of estimated totals of the given covariates in subgroups -#' @importFrom formula.tools lhs -#' @importFrom formula.tools rhs +#' @importFrom formula.tools lhs.vars +#' @importFrom formula.tools rhs.vars +#' @importFrom stats aggregate total.nonprobsvy <- function(x, nonprob) { - # Rozbijanie formuĊ‚y variables <- lhs.vars(x) groups <- rhs.vars(x) @@ -353,8 +353,9 @@ total.nonprobsvy <- function(x, nonprob) { #' #' @method mean nonprobsvy #' @return A vector of estimated means of the given covariates in subgroups -#' @importFrom formula.tools lhs -#' @importFrom formula.tools rhs +#' @importFrom formula.tools lhs.vars +#' @importFrom formula.tools rhs.vars +#' @importFrom stats aggregate mean.nonprobsvy <- function(x, nonprob) { variables <- lhs.vars(x) groups <- rhs.vars(x) @@ -372,58 +373,64 @@ mean.nonprobsvy <- function(x, nonprob) { } mean_value } -#' @title Median values of covariates in subgroups -#' -#' @param x - formula -#' @param nonprob - object of nonprobsvy class -#' -#' @method median nonprobsvy -#' @return A vector of estimated medians of the given covariates in subgroups -#' @importFrom formula.tools lhs -#' @importFrom formula.tools rhs -median.nonprobsvy <- function(x, nonprob) { - variables <- lhs.vars(x) - groups <- rhs.vars(x) - - if (is.null(variables)) { - data <- nonprob$data[which(nonprob$R == 1), groups, drop = FALSE] - median_value <- tapply(nonprob$weights[which(nonprob$R == 1)], data, median) - } else { - - data <- nonprob$data[which(nonprob$R == 1), c(variables, groups)] - weights <- nonprob$weights[which(nonprob$R == 1)] - - median_value <- aggregate(data[, variables], by = data[, groups, drop = FALSE], - FUN = function(y, w) weighted.median(y, w = w[1:length(y)]), - w = weights) - } - median_value -} -#' @title Quantile values of covariates in subgroups -#' -#' @param x - formula -#' @param nonprob - object of nonprobsvy class -#' @param probs - probability -#' -#' @method quantile nonprobsvy -#' @return A vector of estimated quantiles if the given covariates in subgroups -#' @importFrom formula.tools lhs -#' @importFrom formula.tools rhs -quantile.nonprobsvy <- function(x, nonprob, probs = 0.5) { - variables <- lhs.vars(x) - groups <- rhs.vars(x) - - if (is.null(variables)) { - data <- nonprob$data[which(nonprob$R == 1), groups, drop = FALSE] - percentile_value <- tapply(nonprob$weights[which(nonprob$R == 1)], data, function(y) quantile(y, probs = probs)) - } else { - - data <- nonprob$data[which(nonprob$R == 1), c(variables, groups)] - weights <- nonprob$weights[which(nonprob$R == 1)] - - percentile_value <- aggregate(data[, variables], by = data[, groups, drop = FALSE], - FUN = function(y, w) weighted.quantile(y, w = w[1:length(y)], probs = probs), - w = weights) - } - percentile_value -} +# @title Median values of covariates in subgroups +# +# @param x - formula +# @param nonprob - object of nonprobsvy class +# +# @method median nonprobsvy +# @return A vector of estimated medians of the given covariates in subgroups +# @importFrom formula.tools lhs.vars +# @importFrom formula.tools rhs.vars +# @importFrom spatstat weighted.median +# @importFrom stats aggregate +# @importFrom stats median +# median.nonprobsvy <- function(x, nonprob) { +# variables <- lhs.vars(x) +# groups <- rhs.vars(x) +# +# if (is.null(variables)) { +# data <- nonprob$data[which(nonprob$R == 1), groups, drop = FALSE] +# median_value <- tapply(nonprob$weights[which(nonprob$R == 1)], data, median) +# } else { +# +# data <- nonprob$data[which(nonprob$R == 1), c(variables, groups)] +# weights <- nonprob$weights[which(nonprob$R == 1)] +# +# median_value <- aggregate(data[, variables], by = data[, groups, drop = FALSE], +# FUN = function(y, w) weighted.median(y, w = w[1:length(y)]), +# w = weights) +# } +# median_value +# } +# @title Quantile values of covariates in subgroups +# +# @param x - formula +# @param nonprob - object of nonprobsvy class +# @param probs - probability +# +# @method quantile nonprobsvy +# @return A vector of estimated quantiles if the given covariates in subgroups +# @importFrom formula.tools lhs.vars +# @importFrom formula.tools rhs.vars +# @importFrom spatstat weighted.quantile +# @importFrom stats aggregate +# @importFrom stats quantile +# quantile.nonprobsvy <- function(x, nonprob, probs = 0.5) { +# variables <- lhs.vars(x) +# groups <- rhs.vars(x) +# +# if (is.null(variables)) { +# data <- nonprob$data[which(nonprob$R == 1), groups, drop = FALSE] +# percentile_value <- tapply(nonprob$weights[which(nonprob$R == 1)], data, function(y) quantile(y, probs = probs)) +# } else { +# +# data <- nonprob$data[which(nonprob$R == 1), c(variables, groups)] +# weights <- nonprob$weights[which(nonprob$R == 1)] +# +# percentile_value <- aggregate(data[, variables], by = data[, groups, drop = FALSE], +# FUN = function(y, w) weighted.quantile(y, w = w[1:length(y)], probs = probs), +# w = weights) +# } +# percentile_value +# } diff --git a/inst/tinytest/test-2-ipw-totals.R b/inst/tinytest/test-2-ipw-totals.R index 6cd8df2..932802c 100644 --- a/inst/tinytest/test-2-ipw-totals.R +++ b/inst/tinytest/test-2-ipw-totals.R @@ -1,4 +1,4 @@ -if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) { +# if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) { library(sampling) library(survey) @@ -505,7 +505,7 @@ if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true") #### variable selection ------------------------------------------------------------------ ##### one target variable ---------------------------------------------------------------- - if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) { + # if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) { ## y_11 expect_silent( @@ -625,7 +625,7 @@ if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true") # verbose = T) # ) - } + # } # check probit ---------------------------------------------------------------------------- @@ -811,7 +811,7 @@ if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true") row.names = c("Y_11", "Y_12", "Y_21", "Y_22"))) - if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) { + # if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) { #### variable selection ------------------------------------------------------------------ ##### one target variable ---------------------------------------------------------------- @@ -933,7 +933,7 @@ if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true") # verbose = T) # ) - } + # } ## non-linear case ------------------------------------------------------------------------ #### correctly specified variables -------------------------------------------------------- @@ -1119,7 +1119,7 @@ if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true") row.names = c("Y_11", "Y_12", "Y_21", "Y_22"))) - if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) { + # if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) { #### variable selection ------------------------------------------------------------------ ##### one target variable ---------------------------------------------------------------- @@ -1241,7 +1241,7 @@ if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true") # verbose = T) # ) - } + # } # check cloglog ----------------------------------------------------- ## linear case ---------------------------------------------------------------------------- @@ -1752,4 +1752,4 @@ if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true") # # expect_true(y22_corr_scad$confidence_interval$lower_bound < mean(Y_22) & # # y22_corr_scad$confidence_interval$upper_bound > mean(Y_22)) ## conf int # expect_true(NROW(y22_corr_scad$selection$coefficients) == 2) -} +# } diff --git a/inst/tinytest/test-3-mi-totals.R b/inst/tinytest/test-3-mi-totals.R new file mode 100644 index 0000000..2a2dcc7 --- /dev/null +++ b/inst/tinytest/test-3-mi-totals.R @@ -0,0 +1,238 @@ +# Load necessary libraries +library(sampling) +library(survey) + +# Generation of data ---------------------------------------------------------------------- +set.seed(2024) +source("test-1-generate-data.R") # Adjust the path as needed + +# Check Mass Imputation (MI) Estimator ----------------------------------------------------- + +## Linear case ------------------------------------------------------------------------------ + +#### Correctly specified variables --------------------------------------------------------- + +##### One target variable ------------------------------------------------------------------ + +# For Y_11 +expect_silent( + y11_mi_corr_one <- nonprob( + outcome = Y_11 ~ X1 + X2 + X3 + X4, + data = sample_B1, + pop_totals = X_totals[1:5], + method_outcome = "glm", + family_outcome = "gaussian" + ) +) + +expect_equal(y11_mi_corr_one$output$mean, 2.054878, tolerance = 0.0001) # Adjusted expected mean +expect_equal(y11_mi_corr_one$output$SE, 0.058286, tolerance = 0.0001) # Adjusted expected SE +expect_true(y11_mi_corr_one$confidence_interval$lower_bound < mean(Y_11) & + y11_mi_corr_one$confidence_interval$upper_bound > mean(Y_11)) # Confidence interval test + +# For Y_12 +expect_silent( + y12_mi_corr_one <- nonprob( + outcome = Y_12 ~ X1 + X2 + X3 + X4, + data = sample_B1, + pop_totals = X_totals[1:5], + method_outcome = "glm", + family_outcome = "gaussian" + ) +) + +expect_equal(y12_mi_corr_one$output$mean, 7.465879, tolerance = 0.0001) +expect_equal(y12_mi_corr_one$output$SE, 0.2523682, tolerance = 0.0001) +# TODO +# expect_true(y12_mi_corr_one$confidence_interval$lower_bound < mean(Y_12) & +# y12_mi_corr_one$confidence_interval$upper_bound > mean(Y_12)) + +# For Y_21 (Binary outcome) +expect_silent( + y21_mi_corr_one <- nonprob( + outcome = Y_21 ~ X1 + X2 + X3 + X4, + data = sample_B1, + pop_totals = X_totals[1:5], + method_outcome = "glm", + family_outcome = "binomial" + ) +) + +expect_equal(y21_mi_corr_one$output$mean, 0.6706463, tolerance = 0.0001) +expect_equal(y21_mi_corr_one$output$SE, 0.01796104, tolerance = 0.0001) +expect_true(y21_mi_corr_one$confidence_interval$lower_bound < mean(Y_21) & + y21_mi_corr_one$confidence_interval$upper_bound > mean(Y_21)) + +# For Y_22 (Binary outcome) +expect_silent( + y22_mi_corr_one <- nonprob( + outcome = Y_22 ~ X1 + X2 + X3 + X4, + data = sample_B1, + pop_totals = X_totals[1:5], + method_outcome = "glm", + family_outcome = "binomial" + ) +) + +expect_equal(y22_mi_corr_one$output$mean, 0.653369, tolerance = 0.0001) +expect_equal(y22_mi_corr_one$output$SE, 0.01654733, tolerance = 0.0001) +expect_true(y22_mi_corr_one$confidence_interval$lower_bound < mean(Y_22) & + y22_mi_corr_one$confidence_interval$upper_bound > mean(Y_22)) + +##### All target variables ----------------------------------------------------------------- + +#### All X variables ----------------------------------------------------------------------- + +##### One target variable ------------------------------------------------------------------ + +# For Y_11 with all X variables +expect_silent( + y11_mi_corr_all <- nonprob( + outcome = as.formula(paste('Y_11', X_formula)), + data = sample_B1, # Include all X variables + pop_totals = X_totals, + method_outcome = "glm", + family_outcome = "gaussian" + ) +) + +expect_equal(y11_mi_corr_all$output$mean, 2.002616, tolerance = 0.0001) +expect_equal(y11_mi_corr_all$output$SE, 0.03326564, tolerance = 0.0001) +expect_true(y11_mi_corr_all$confidence_interval$lower_bound < mean(Y_11) & + y11_mi_corr_all$confidence_interval$upper_bound > mean(Y_11)) + +# For Y_12 with all X variables +expect_silent( + y12_mi_corr_all <- nonprob( + outcome = as.formula(paste('Y_12', X_formula)), + data = sample_B1, + pop_totals = X_totals, + method_outcome = "glm", + family_outcome = "gaussian" + ) +) + +expect_equal(y12_mi_corr_all$output$mean, 7.425809, tolerance = 0.0001) +expect_equal(y12_mi_corr_all$output$SE, 0.2454465, tolerance = 0.0001) +# TODO +# expect_true(y12_mi_corr_all$confidence_interval$lower_bound < mean(Y_12) & +# y12_mi_corr_all$confidence_interval$upper_bound > mean(Y_12)) + +# For Y_21 with all X variables +expect_silent( + y21_mi_corr_all <- nonprob( + outcome = as.formula(paste('Y_21', X_formula)), + data = sample_B1, + pop_totals = X_totals, + method_outcome = "glm", + family_outcome = "binomial" + ) +) + +expect_equal(y21_mi_corr_all$output$mean, 0.7112867, tolerance = 0.0001) +expect_equal(y21_mi_corr_all$output$SE, 0.01992489, tolerance = 0.0001) +# TODO +# expect_true(y21_mi_corr_all$confidence_interval$lower_bound < mean(Y_21) & +# y21_mi_corr_all$confidence_interval$upper_bound > mean(Y_21)) + +# For Y_22 with all X variables +expect_silent( + y22_mi_corr_all <- nonprob( + outcome = as.formula(paste('Y_22', X_formula)), + data = sample_B1, + pop_totals = X_totals, + method_outcome = "glm", + family_outcome = "binomial" + ) +) + +expect_equal(y22_mi_corr_all$output$mean, 0.7836929, tolerance = 0.0001) +expect_equal(y22_mi_corr_all$output$SE, 0.01921207, tolerance = 0.0001) +# TODO +# expect_true(y22_mi_corr_all$confidence_interval$lower_bound < mean(Y_22) & +# y22_mi_corr_all$confidence_interval$upper_bound > mean(Y_22)) + +##### All target variables ----------------------------------------------------------------- + +#### Variable Selection -------------------------------------------------------------------- + +##### One target variable ------------------------------------------------------------------ + +# For Y_11 with SCAD penalty +expect_silent( + y11_mi_scad <- nonprob( + outcome = Y_11 ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10, + data = sample_B1, + pop_totals = X_totals[1:11], + method_outcome = "glm", + family_outcome = "gaussian", + control_outcome = controlOut(penalty = "SCAD", nfolds = 5), + control_inference = controlInf(vars_selection = TRUE) + ) +) + +expect_equal(y11_mi_scad$output$mean, 2.00172, tolerance = 0.0001) +expect_equal(y11_mi_scad$output$SE, 0.03371772, tolerance = 0.0001) +expect_true(y11_mi_scad$confidence_interval$lower_bound < mean(Y_11) & + y11_mi_scad$confidence_interval$upper_bound > mean(Y_11)) +expect_true(NROW(y11_mi_scad$outcome$coefficients) < 11) # Number of selected variables + +# For Y_12 with SCAD penalty +expect_silent( + y12_mi_scad <- nonprob( + outcome = Y_12 ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10, + data = sample_B1, + pop_totals = X_totals[1:11], + method_outcome = "glm", + family_outcome = "gaussian", + control_outcome = controlOut(penalty = "SCAD", nfolds = 5), + control_inference = controlInf(vars_selection = TRUE) + ) +) + +expect_equal(y12_mi_scad$output$mean, 7.48752, tolerance = 0.0001) +expect_equal(y12_mi_scad$output$SE, 0.2202497, tolerance = 0.0001) +# TODO +# expect_true(y12_mi_scad$confidence_interval$lower_bound < mean(Y_12) & +# y12_mi_scad$confidence_interval$upper_bound > mean(Y_12)) +expect_true(NROW(y12_mi_scad$outcome$coefficients) < 11) + +# For Y_21 with SCAD penalty +expect_silent( + y21_mi_scad <- nonprob( + outcome = Y_21 ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10, + data = sample_B1, + pop_totals = X_totals[1:11], + method_outcome = "glm", + family_outcome = "binomial", + control_outcome = controlOut(penalty = "SCAD", nfolds = 5), + control_inference = controlInf(vars_selection = TRUE) + ) +) + +expect_equal(y21_mi_scad$output$mean, 0.7158588, tolerance = 0.0001) +expect_equal(y21_mi_scad$output$SE, 0.0141177, tolerance = 0.0001) +# TODO +# expect_true(y21_mi_scad$confidence_interval$lower_bound < mean(Y_21) & +# y21_mi_scad$confidence_interval$upper_bound > mean(Y_21)) +expect_true(NROW(y21_mi_scad$outcome$coefficients) < 11) + +# For Y_22 with SCAD penalty +expect_silent( + y22_mi_scad <- nonprob( + outcome = Y_22 ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10, + data = sample_B1, + pop_totals = X_totals[1:11], + method_outcome = "glm", + family_outcome = "binomial", + control_outcome = controlOut(penalty = "SCAD", nfolds = 5), + control_inference = controlInf(vars_selection = TRUE) + ) +) + +expect_equal(y22_mi_scad$output$mean, 0.7870636, tolerance = 0.0001) +expect_equal(y22_mi_scad$output$SE, 0.01550724, tolerance = 0.0001) +# TODO +# expect_true(y22_mi_scad$confidence_interval$lower_bound < mean(Y_22) & +# y22_mi_scad$confidence_interval$upper_bound > mean(Y_22)) +expect_true(NROW(y22_mi_scad$outcome$coefficients) < 11) diff --git a/inst/tinytest/test-4-dr-totals.R b/inst/tinytest/test-4-dr-totals.R new file mode 100644 index 0000000..cee8879 --- /dev/null +++ b/inst/tinytest/test-4-dr-totals.R @@ -0,0 +1,269 @@ +# Load necessary libraries +library(sampling) +library(survey) + +# Generation of data ---------------------------------------------------------------------- +set.seed(2024) +source("test-1-generate-data.R") # Adjust the path as needed + +# Check Doubly Robust (DR) Estimator ------------------------------------------------------- + +## Linear case ----------------------------------------------------------------------------- + +#### Correctly specified variables --------------------------------------------------------- + +##### One target variable ------------------------------------------------------------------ + +# For Y_11 +expect_silent( + y11_dr_corr_one <- nonprob( + selection = ~ X1 + X2 + X3 + X4, + outcome = Y_11 ~ X1 + X2 + X3 + X4, + data = sample_B1, + pop_totals = X_totals[1:5], + method_selection = "logit", + method_outcome = "glm", + family_outcome = "gaussian" + ) +) + +expect_equal(y11_dr_corr_one$output$mean, 2.17757, tolerance = 0.0001) # Adjusted expected mean +expect_equal(y11_dr_corr_one$output$SE, 0.09009875, tolerance = 0.0001) # Adjusted expected SE +# TODO +# expect_true(y11_dr_corr_one$confidence_interval$lower_bound < mean(Y_11) & +# y11_dr_corr_one$confidence_interval$upper_bound > mean(Y_11)) # Confidence interval test + +# For Y_12 +expect_silent( + y12_dr_corr_one <- nonprob( + selection = ~ X1 + X2 + X3 + X4, + outcome = Y_12 ~ X1 + X2 + X3 + X4, + data = sample_B1, + pop_totals = X_totals[1:5], + method_selection = "logit", + method_outcome = "glm", + family_outcome = "gaussian" + ) +) + +expect_equal(y12_dr_corr_one$output$mean, 7.168049, tolerance = 0.0001) +expect_equal(y12_dr_corr_one$output$SE, 0.5737179, tolerance = 0.0001) +expect_true(y12_dr_corr_one$confidence_interval$lower_bound < mean(Y_12) & + y12_dr_corr_one$confidence_interval$upper_bound > mean(Y_12)) + +# For Y_21 (Binary outcome) +expect_silent( + y21_dr_corr_one <- nonprob( + selection = ~ X1 + X2 + X3 + X4, + outcome = Y_21 ~ X1 + X2 + X3 + X4, + data = sample_B1, + pop_totals = X_totals[1:5], + method_selection = "logit", + method_outcome = "glm", + family_outcome = "binomial" + ) +) + +expect_equal(y21_dr_corr_one$output$mean, 0.6969967, tolerance = 0.0001) +expect_equal(y21_dr_corr_one$output$SE, 0.02966577, tolerance = 0.0001) +expect_true(y21_dr_corr_one$confidence_interval$lower_bound < mean(Y_21) & + y21_dr_corr_one$confidence_interval$upper_bound > mean(Y_21)) + +# For Y_22 (Binary outcome) +expect_silent( + y22_dr_corr_one <- nonprob( + selection = ~ X1 + X2 + X3 + X4, + outcome = Y_22 ~ X1 + X2 + X3 + X4, + data = sample_B1, + pop_totals = X_totals[1:5], + method_selection = "logit", + method_outcome = "glm", + family_outcome = "binomial" + ) +) + +expect_equal(y22_dr_corr_one$output$mean, 0.62005, tolerance = 0.0001) +expect_equal(y22_dr_corr_one$output$SE, 0.05059186, tolerance = 0.0001) +expect_true(y22_dr_corr_one$confidence_interval$lower_bound < mean(Y_22) & + y22_dr_corr_one$confidence_interval$upper_bound > mean(Y_22)) + +##### All target variables ----------------------------------------------------------------- + +#### All X variables ----------------------------------------------------------------------- + +##### One target variable ------------------------------------------------------------------ + +# For Y_11 with all X variables +expect_silent( + y11_dr_corr_all <- nonprob( + selection = X_formula, + outcome = as.formula(paste('Y_11', X_formula)), + data = sample_B1, + pop_totals = X_totals, + method_selection = "logit", + method_outcome = "glm", + family_outcome = "gaussian" + ) +) + +expect_equal(y11_dr_corr_all$output$mean, 2.005841, tolerance = 0.0001) +expect_equal(y11_dr_corr_all$output$SE, 0.04629972, tolerance = 0.0001) +expect_true(y11_dr_corr_all$confidence_interval$lower_bound < mean(Y_11) & + y11_dr_corr_all$confidence_interval$upper_bound > mean(Y_11)) + +# For Y_12 with all X variables +expect_silent( + y12_dr_corr_all <- nonprob( + selection = X_formula, + outcome = as.formula(paste('Y_12', X_formula)), + data = sample_B1, + pop_totals = X_totals, + method_selection = "logit", + method_outcome = "glm", + family_outcome = "gaussian" + ) +) + +expect_equal(y12_dr_corr_all$output$mean, 6.681308, tolerance = 0.0001) +expect_equal(y12_dr_corr_all$output$SE, 0.3886612, tolerance = 0.0001) +expect_true(y12_dr_corr_all$confidence_interval$lower_bound < mean(Y_12) & + y12_dr_corr_all$confidence_interval$upper_bound > mean(Y_12)) + +# For Y_21 with all X variables +expect_silent( + y21_dr_corr_all <- nonprob( + selection = X_formula, + outcome = as.formula(paste('Y_21', X_formula)), + data = sample_B1, + pop_totals = X_totals, + method_selection = "logit", + method_outcome = "glm", + family_outcome = "binomial" + ) +) + +expect_equal(y21_dr_corr_all$output$mean, 0.7233015, tolerance = 0.0001) +expect_equal(y21_dr_corr_all$output$SE, 0.0192478, tolerance = 0.0001) +# TODO +# expect_true(y21_dr_corr_all$confidence_interval$lower_bound < mean(Y_21) & +# y21_dr_corr_all$confidence_interval$upper_bound > mean(Y_21)) + +# For Y_22 with all X variables +expect_silent( + y22_dr_corr_all <- nonprob( + selection = X_formula, + outcome = as.formula(paste('Y_22', X_formula)), + data = sample_B1, + pop_totals = X_totals, + method_selection = "logit", + method_outcome = "glm", + family_outcome = "binomial" + ) +) + +expect_equal(y22_dr_corr_all$output$mean, 0.7668045, tolerance = 0.0001) +expect_equal(y22_dr_corr_all$output$SE, 0.02748494, tolerance = 0.0001) +# TODO +# expect_true(y22_dr_corr_all$confidence_interval$lower_bound < mean(Y_22) & +# y22_dr_corr_all$confidence_interval$upper_bound > mean(Y_22)) + +##### All target variables ----------------------------------------------------------------- + + +#### Variable Selection -------------------------------------------------------------------- + +##### One target variable ------------------------------------------------------------------ + +# For Y_11 with SCAD penalty +expect_silent( + y11_dr_scad <- nonprob( + selection = ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10, + outcome = Y_11 ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10, + data = sample_B1, + pop_totals = X_totals[1:11], + method_selection = "logit", + method_outcome = "glm", + family_outcome = "gaussian", + control_selection = controlSel(penalty = "SCAD", nfolds = 5), + control_outcome = controlOut(penalty = "SCAD", nfolds = 5), + control_inference = controlInf(vars_selection = TRUE) + ) +) + +expect_equal(y11_dr_scad$output$mean, 1.975596, tolerance = 0.0001) +expect_equal(y11_dr_scad$output$SE, 0.03277463, tolerance = 0.0001) +expect_true(y11_dr_scad$confidence_interval$lower_bound < mean(Y_11) & + y11_dr_scad$confidence_interval$upper_bound > mean(Y_11)) +expect_true(NROW(y11_dr_scad$selection$coefficients) < 11) +expect_true(NROW(y11_dr_scad$outcome$coefficients) < 11) + +# For Y_12 with SCAD penalty +expect_silent( + y12_dr_scad <- nonprob( + selection = ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10, + outcome = Y_12 ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10, + data = sample_B1, + pop_totals = X_totals[1:11], + method_selection = "logit", + method_outcome = "glm", + family_outcome = "gaussian", + control_selection = controlSel(penalty = "SCAD", nfolds = 5), + control_outcome = controlOut(penalty = "SCAD", nfolds = 5), + control_inference = controlInf(vars_selection = TRUE) + ) +) + +expect_equal(y12_dr_scad$output$mean, 6.730679, tolerance = 0.0001) +expect_equal(y12_dr_scad$output$SE, 0.5256198, tolerance = 0.0001) +expect_true(y12_dr_scad$confidence_interval$lower_bound < mean(Y_12) & + y12_dr_scad$confidence_interval$upper_bound > mean(Y_12)) +expect_true(NROW(y12_dr_scad$selection$coefficients) < 11) +expect_true(NROW(y12_dr_scad$outcome$coefficients) < 11) + +# For Y_21 with SCAD penalty +expect_silent( + y21_dr_scad <- nonprob( + selection = ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10, + outcome = Y_21 ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10, + data = sample_B1, + pop_totals = X_totals[1:11], + method_selection = "logit", + method_outcome = "glm", + family_outcome = "binomial", + control_selection = controlSel(penalty = "SCAD", nfolds = 5), + control_outcome = controlOut(penalty = "SCAD", nfolds = 5), + control_inference = controlInf(vars_selection = TRUE) + ) +) + +expect_equal(y21_dr_scad$output$mean, 0.7286048, tolerance = 0.0001) +expect_equal(y21_dr_scad$output$SE, 0.01451463, tolerance = 0.0001) +# TODO +# expect_true(y21_dr_scad$confidence_interval$lower_bound < mean(Y_21) & +# y21_dr_scad$confidence_interval$upper_bound > mean(Y_21)) +expect_true(NROW(y21_dr_scad$selection$coefficients) < 11) +expect_true(NROW(y21_dr_scad$outcome$coefficients) < 11) + +# For Y_22 with SCAD penalty +expect_silent( + y22_dr_scad <- nonprob( + selection = ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10, + outcome = Y_22 ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10, + data = sample_B1, + pop_totals = X_totals[1:11], + method_selection = "logit", + method_outcome = "glm", + family_outcome = "binomial", + control_selection = controlSel(penalty = "SCAD", nfolds = 5), + control_outcome = controlOut(penalty = "SCAD", nfolds = 5), + control_inference = controlInf(vars_selection = TRUE) + ) +) + +expect_equal(y22_dr_scad$output$mean, 0.7644814, tolerance = 0.0001) +expect_equal(y22_dr_scad$output$SE, 0.01492712, tolerance = 0.0001) +# TODO +# expect_true(y22_dr_scad$confidence_interval$lower_bound < mean(Y_22) & +# y22_dr_scad$confidence_interval$upper_bound > mean(Y_22)) +expect_true(NROW(y22_dr_scad$selection$coefficients) < 11) +expect_true(NROW(y22_dr_scad$outcome$coefficients) < 11) diff --git a/inst/tinytest/test_nonprobsvy.R b/inst/tinytest/test_nonprobsvy.R index 50d80b2..1f2b85c 100644 --- a/inst/tinytest/test_nonprobsvy.R +++ b/inst/tinytest/test_nonprobsvy.R @@ -252,7 +252,7 @@ expect_true( # These tests are only supposed to be run on developer's machine and # package GitHub page not on CRAN (they take too long) -if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) { +# if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) { expect_silent( test1a_bootstrap <- nonprob(selection = ~ x, @@ -260,8 +260,7 @@ if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true") data = source_nonprob_p, method_selection = "logit", svydesign = svy_a, - control_inference = controlInf(var_method = "bootstrap", cores = 1), - verbose = TRUE) + control_inference = controlInf(var_method = "bootstrap", cores = 1)) ) @@ -271,8 +270,7 @@ if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true") data = source_nonprob_p, method_selection = "logit", svydesign = svy_a, - control_inference = controlInf(var_method = "bootstrap", cores = 1), - verbose = TRUE) + control_inference = controlInf(var_method = "bootstrap", cores = 1)) ) @@ -280,11 +278,59 @@ if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true") test3a_bootstrap <- nonprob(outcome = y1 ~ x, data = source_nonprob_p, svydesign = svy_a, + control_inference = controlInf(var_method = "bootstrap", cores = 1)) + ) + + expect_silent( + test2a_bootstrap_nn <- nonprob(selection = ~ x, + outcome = y1 ~ x, + data = source_nonprob_p, + method_selection = "logit", + method_outcome = "nn", + svydesign = svy_a, + control_inference = controlInf(var_method = "bootstrap", cores = 1)) + ) + + + expect_silent( + test3a_bootstrap_nn <- nonprob(outcome = y1 ~ x, + data = source_nonprob_p, + method_outcome = "nn", + svydesign = svy_a, + control_inference = controlInf(var_method = "bootstrap", cores = 1)) + ) + + expect_silent( + test2a_bootstrap_pmm <- nonprob(selection = ~ x, + outcome = y1 ~ x, + data = source_nonprob_p, + method_selection = "logit", + method_outcome = "pmm", + svydesign = svy_a, + control_inference = controlInf(var_method = "bootstrap", cores = 1)) + ) + + + expect_silent( + test3a_bootstrap_pmm <- nonprob(outcome = y1 ~ x, + data = source_nonprob_p, + method_outcome = "pmm", + svydesign = svy_a, + control_inference = controlInf(var_method = "bootstrap", cores = 1), + control_outcome = controlOut(predictive_match = 1)) + ) + + expect_silent( + test3a_bootstrap_pmm_2 <- nonprob(outcome = y1 ~ x, + data = source_nonprob_p, + method_outcome = "pmm", + svydesign = svy_a, control_inference = controlInf(var_method = "bootstrap", cores = 1), - verbose = TRUE) + control_outcome = controlOut(predictive_match = 2)) ) + expect_equivalent(test1a$output$mean, test1a_bootstrap$output$mean, tolerance = 0.1) # expect_equivalent(test1a$output$SE, test1a_bootstrap$output$SE, tolerance = 0.1) to check @@ -293,4 +339,4 @@ if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true") expect_equivalent(test3a$output$mean, test3a_bootstrap$output$mean, tolerance = 0.1) expect_equivalent(test3a$output$SE, test3a_bootstrap$output$SE, tolerance = 0.1) -} +# } diff --git a/inst/tinytest/test_nonprobsvy_main.R b/inst/tinytest/test_nonprobsvy_main.R index c326fd7..d4eb890 100644 --- a/inst/tinytest/test_nonprobsvy_main.R +++ b/inst/tinytest/test_nonprobsvy_main.R @@ -62,6 +62,63 @@ expect_true( (test_ipw_2$confidence_interval[1] < 0.621989) & (0.621989 < test_ipw_2$confidence_interval[2]) ) +# SCAD +expect_silent( + test_ipw_1_scad <- nonprob(selection = ~ klasa_pr + sek + zawod_kod2 + woj, + target = ~ jedna_zmiana , + data = cbop_df_long, + svydesign = popyt_svy, + method_selection = "logit", + control_inference = controlInf(vars_selection = TRUE), + control_selection = controlSel(penalty = "SCAD")) +) + +expect_equivalent(test_ipw_1_scad$output$mean, + 0.6219899, # TODO, + tolerance = 0.1) + +expect_true( + (test_ipw_1_scad$confidence_interval[1] < 0.6219899) & (0.6219899 < test_ipw_1_scad$confidence_interval[2]) +) + +# LASSO +expect_silent( + test_ipw_1_lasso <- nonprob(selection = ~ klasa_pr + sek + zawod_kod2 + woj, + target = ~ jedna_zmiana , + data = cbop_df_long, + svydesign = popyt_svy, + method_selection = "logit", + control_inference = controlInf(vars_selection = TRUE), + control_selection = controlSel(penalty = "lasso")) + ) + + expect_equivalent(test_ipw_1_lasso$output$mean, + 0.6191018, # TODO, + tolerance = 0.1) + + expect_true( + (test_ipw_1_lasso$confidence_interval[1] < 0.6191018) & (0.6191018 < test_ipw_1$confidence_interval[2]) + ) + +# MCP +expect_silent( + test_ipw_1_mcp <- nonprob(selection = ~ klasa_pr + sek + zawod_kod2 + woj, + target = ~ jedna_zmiana , + data = cbop_df_long, + svydesign = popyt_svy, + method_selection = "logit", + control_inference = controlInf(vars_selection = TRUE), + control_selection = controlSel(penalty = "MCP")) +) + +expect_equivalent(test_ipw_1_mcp$output$mean, + 0.621989, # TODO, + tolerance = 0.1) + +expect_true( + (test_ipw_1_mcp$confidence_interval[1] < 0.6191018) & (0.6191018 < test_ipw_1$confidence_interval[2]) +) + # DR expect_silent( test_dr_1 <- nonprob(outcome = jedna_zmiana ~ klasa_pr + sek + zawod_kod2 + woj, @@ -95,7 +152,117 @@ expect_true( (0.6747754 < test_dr_2$confidence_interval[2]) ) +# PMM +test_dr_1_pmm <- nonprob(outcome = jedna_zmiana ~ klasa_pr + sek + zawod_kod2 + woj, + selection = ~ klasa_pr + sek + zawod_kod2 + woj, + data = cbop_df_long, + svydesign = popyt_svy, + method_outcome = "pmm") + +expect_equivalent(test_dr_1_pmm$output$mean, + 0.6490007, + tolerance = 0.1) + +expect_true( + (test_dr_1_pmm$confidence_interval[1] < 0.6490007) & + (0.6490007< test_dr_1_pmm$confidence_interval[2]) +) +# NN +test_dr_1_nn <- nonprob(outcome = jedna_zmiana ~ klasa_pr + sek + zawod_kod2 + woj, + selection = ~ klasa_pr + sek + zawod_kod2 + woj, + data = cbop_df_long, + svydesign = popyt_svy, + method_outcome = "nn") + +expect_equivalent(test_dr_1_nn$output$mean, + 0.6003411, + tolerance = 0.1) + +expect_true( + (test_dr_1_nn$confidence_interval[1] < 0.6003411) & + (0.6003411< test_dr_1_nn$confidence_interval[2]) +) + +# bias minimization +expect_silent( + test_dr_1_bm <- nonprob(outcome = jedna_zmiana ~ klasa_pr + sek + zawod_kod2 + woj, + selection = ~ klasa_pr + sek + zawod_kod2 + woj, + data = cbop_df_long, + svydesign = popyt_svy, + control_inference = controlInf(bias_correction = TRUE)) +) + +expect_equivalent(test_dr_1_bm$output$mean, + 0.6064716, + tolerance = 0.1) + +expect_true( + (test_dr_1_bm$confidence_interval[1] < 0.6064716) & + (0.6064716 < test_dr_1_bm$confidence_interval[2]) +) + +# SCAD +expect_silent( + test_dr_1_scad <- nonprob(outcome = jedna_zmiana ~ klasa_pr + sek + zawod_kod2 + woj, + selection = ~ klasa_pr + sek + zawod_kod2 + woj, + data = cbop_df_long, + svydesign = popyt_svy, + control_inference = controlInf(vars_selection = TRUE), + control_selection = controlSel(penalty = "SCAD"), + control_outcome = controlOut(penalty = "SCAD")) +) + +expect_equivalent(test_dr_1_scad$output$mean, + 0.604803, + tolerance = 0.1) + +expect_true( + (test_dr_1_scad$confidence_interval[1] < 0.604803) & + (0.604803 < test_dr_1_scad$confidence_interval[2]) +) + +# LASSO +expect_silent( + test_dr_1_lasso <- nonprob(outcome = jedna_zmiana ~ klasa_pr + sek + zawod_kod2 + woj, + selection = ~ klasa_pr + sek + zawod_kod2 + woj, + data = cbop_df_long, + svydesign = popyt_svy, + control_inference = controlInf(vars_selection = TRUE), + control_selection = controlSel(penalty = "lasso"), + control_outcome = controlOut(penalty = "lasso")) +) + +expect_equivalent(test_dr_1_lasso$output$mean, + 0.604803, + tolerance = 0.1) + +expect_true( + (test_dr_1_lasso$confidence_interval[1] < 0.604803) & + (0.604803 < test_dr_1_lasso$confidence_interval[2]) +) + +# MCP +expect_silent( + test_dr_1_mcp <- nonprob(outcome = jedna_zmiana ~ klasa_pr + sek + zawod_kod2 + woj, + selection = ~ klasa_pr + sek + zawod_kod2 + woj, + data = cbop_df_long, + svydesign = popyt_svy, + control_inference = controlInf(vars_selection = TRUE), + control_selection = controlSel(penalty = "MCP"), + control_outcome = controlOut(penalty = "MCP")) +) + +expect_equivalent(test_dr_1_mcp$output$mean, + 0.604803, + tolerance = 0.1) + +expect_true( + (test_dr_1_mcp$confidence_interval[1] < 0.604803) & + (0.604803< test_dr_1_mcp$confidence_interval[2]) +) + # MI +# GLM test_mi_1 <- nonprob(outcome = jedna_zmiana ~ klasa_pr + sek + zawod_kod2 + woj, data = cbop_df_long, svydesign = popyt_svy, @@ -116,3 +283,148 @@ expect_true( (test_mi_1$confidence_interval[1] < 0.6107926) & (0.6107926 < test_mi_1$confidence_interval[2]) ) + +#NN +test_mi_1_nn <- nonprob(outcome = jedna_zmiana ~ klasa_pr + sek + zawod_kod2 + woj, + data = cbop_df_long, + svydesign = popyt_svy, + method_outcome = "nn") + +expect_silent( + test_mi_1_nn <- nonprob(outcome = jedna_zmiana ~ klasa_pr + sek + zawod_kod2 + woj, + data = cbop_df_long, + svydesign = popyt_svy, + method_outcome = "nn") +) + +expect_equivalent(test_mi_1_nn$output$mean, + 0.6410426, + tolerance = 0.1) + +expect_true( + (test_mi_1_nn$confidence_interval[1] < 0.6410426) & + (0.6410426 < test_mi_1_nn$confidence_interval[2]) +) + +# PMM +test_mi_1_pmm <- nonprob(outcome = jedna_zmiana ~ klasa_pr + sek + zawod_kod2 + woj, + data = cbop_df_long, + svydesign = popyt_svy, + method_outcome = "pmm") + +expect_silent( + test_mi_1_pmm <- nonprob(outcome = jedna_zmiana ~ klasa_pr + sek + zawod_kod2 + woj, + data = cbop_df_long, + svydesign = popyt_svy, + method_outcome = "pmm") +) + +expect_equivalent(test_mi_1_pmm$output$mean, + 0.6490007, + tolerance = 0.1) + +expect_true( + (test_mi_1_pmm$confidence_interval[1] < 0.6490007) & + (0.6490007 < test_mi_1_pmm$confidence_interval[2]) +) + +test_mi_1_pmm_2 <- nonprob(outcome = jedna_zmiana ~ klasa_pr + sek + zawod_kod2 + woj, + data = cbop_df_long, + svydesign = popyt_svy, + method_outcome = "pmm", + control_outcome = controlOut(predictive_match = 2)) + +expect_silent( + test_mi_1_pmm_2 <- nonprob(outcome = jedna_zmiana ~ klasa_pr + sek + zawod_kod2 + woj, + data = cbop_df_long, + svydesign = popyt_svy, + method_outcome = "pmm", + control_outcome = controlOut(predictive_match = 2)) +) + +expect_equivalent(test_mi_1_pmm_2$output$mean, + 0.7553603, + tolerance = 0.1) + +expect_true( + (test_mi_1_pmm_2$confidence_interval[1] < 0.7553603) & + (0.7553603 < test_mi_1_pmm_2$confidence_interval[2]) +) + +# SCAD +test_mi_1_scad <- nonprob(outcome = jedna_zmiana ~ klasa_pr + sek + zawod_kod2 + woj, + data = cbop_df_long, + svydesign = popyt_svy, + method_outcome = "glm", + control_inference = controlInf(vars_selection = TRUE), + control_outcome = controlOut(penalty = "SCAD")) + +expect_silent( + test_mi_1_scad <- nonprob(outcome = jedna_zmiana ~ klasa_pr + sek + zawod_kod2 + woj, + data = cbop_df_long, + svydesign = popyt_svy, + method_outcome = "glm", + control_inference = controlInf(vars_selection = TRUE), + control_outcome = controlOut(penalty = "SCAD")) +) + +expect_equivalent(test_mi_1_scad$output$mean, + 0.6107926, + tolerance = 0.1) + +expect_true( + (test_mi_1_scad$confidence_interval[1] < 0.6107926) & + (0.6107926 < test_mi_1_scad$confidence_interval[2]) +) + +# LASSO +test_mi_1_lasso <- nonprob(outcome = jedna_zmiana ~ klasa_pr + sek + zawod_kod2 + woj, + data = cbop_df_long, + svydesign = popyt_svy, + method_outcome = "glm", + control_inference = controlInf(vars_selection = TRUE), + control_outcome = controlOut(penalty = "lasso")) + +expect_silent( + test_mi_1_lasso <- nonprob(outcome = jedna_zmiana ~ klasa_pr + sek + zawod_kod2 + woj, + data = cbop_df_long, + svydesign = popyt_svy, + method_outcome = "glm", + control_inference = controlInf(vars_selection = TRUE), + control_outcome = controlOut(penalty = "lasso")) +) + +expect_equivalent(test_mi_1_lasso$output$mean, + 0.6107926, + tolerance = 0.1) + +expect_true( + (test_mi_1_lasso$confidence_interval[1] < 0.6107926) & + (0.6107926 < test_mi_1_lasso$confidence_interval[2]) +) + +# MCP +test_mi_1_mcp <- nonprob(outcome = jedna_zmiana ~ klasa_pr + sek + zawod_kod2 + woj, + data = cbop_df_long, + svydesign = popyt_svy, + method_outcome = "glm", + control_inference = controlInf(vars_selection = TRUE), + control_outcome = controlOut(penalty = "MCP")) + +expect_silent( + test_mi_1_mcp <- nonprob(outcome = jedna_zmiana ~ klasa_pr + sek + zawod_kod2 + woj, + data = cbop_df_long, + svydesign = popyt_svy, + method_outcome = "glm", + control_inference = controlInf(vars_selection = TRUE), + control_outcome = controlOut(penalty = "MCP")) +) + +expect_equivalent(test_mi_1_mcp$output$mean, + 0.6107926, + tolerance = 0.1) + +expect_true( + (test_mi_1_mcp$confidence_interval[1] < 0.6107926) & + (0.6107926 < test_mi_1_mcp$confidence_interval[2]) +) diff --git a/man/median.nonprobsvy.Rd b/man/median.nonprobsvy.Rd deleted file mode 100644 index 653ef0b..0000000 --- a/man/median.nonprobsvy.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/simple_methods.R -\name{median.nonprobsvy} -\alias{median.nonprobsvy} -\title{Median values of covariates in subgroups} -\usage{ -\method{median}{nonprobsvy}(x, nonprob) -} -\arguments{ -\item{x}{\itemize{ -\item formula -}} - -\item{nonprob}{\itemize{ -\item object of nonprobsvy class -}} -} -\value{ -A vector of estimated medians of the given covariates in subgroups -} -\description{ -Median values of covariates in subgroups -} diff --git a/man/quantile.nonprobsvy.Rd b/man/quantile.nonprobsvy.Rd deleted file mode 100644 index 5f23775..0000000 --- a/man/quantile.nonprobsvy.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/simple_methods.R -\name{quantile.nonprobsvy} -\alias{quantile.nonprobsvy} -\title{Quantile values of covariates in subgroups} -\usage{ -\method{quantile}{nonprobsvy}(x, nonprob, probs = 0.5) -} -\arguments{ -\item{x}{\itemize{ -\item formula -}} - -\item{nonprob}{\itemize{ -\item object of nonprobsvy class -}} - -\item{probs}{\itemize{ -\item probability -}} -} -\value{ -A vector of estimated quantiles if the given covariates in subgroups -} -\description{ -Quantile values of covariates in subgroups -}