Initial cleaning before going on CRAN

ncn-foreigners · Oct 27, 2024 · 5101e05 · 5101e05
1 parent f33fa1a
commit 5101e05
Show file tree

Hide file tree

Showing 11 changed files with 19 additions and 307 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -45,8 +45,7 @@ Imports:
     nleqslv,
     doParallel,
     foreach,
-    parallel,
-    formula.tools
+    parallel
 LinkingTo: 
     Rcpp,
     RcppArmadillo

diff --git a/NAMESPACE b/NAMESPACE
@@ -34,16 +34,13 @@ importFrom(Rcpp,sourceCpp)
 importFrom(doParallel,registerDoParallel)
 importFrom(foreach,"%dopar%")
 importFrom(foreach,foreach)
-importFrom(formula.tools,lhs.vars)
-importFrom(formula.tools,rhs.vars)
 importFrom(maxLik,maxLik)
 importFrom(ncvreg,cv.ncvreg)
 importFrom(nleqslv,nleqslv)
 importFrom(parallel,makeCluster)
 importFrom(parallel,stopCluster)
 importFrom(stats,AIC)
 importFrom(stats,BIC)
-importFrom(stats,aggregate)
 importFrom(stats,as.formula)
 importFrom(stats,binomial)
 importFrom(stats,coef)

diff --git a/NEWS.md b/NEWS.md
@@ -8,7 +8,6 @@
     -   add information to `summary` about quality of estimation basing on difference between estimated and known total values of auxiliary variables
     -   add estimation of exact standard error for k-nearest neighbor estimator.
     -   add breaking change to `controlOut` function by switching values for `predictive_match` argument. From now on, the `predictive_match = 1` means $\hat{y}-\hat{y}$ in predictive mean matching imputation and `predictive_match = 2` corresponds to $\hat{y}-y$ matching.
-    - add methods for estimation of means and totals in subsets/groups with its variances.
     - implement `div` option when variable selection (more in documentation) for doubly robust estimation.
 
 ## nonprobsvy 0.1.0

diff --git a/R/simple_methods.R b/R/simple_methods.R
@@ -321,236 +321,3 @@ deviance.nonprobsvy <- function(object,
   if (class(object)[2] == "nonprobsvy_dr") res <- c("selection" = res_sel, "outcome" = res_out)
   res
 }
-#' @title Total values of covariates in subgroups
-#'
-#' @param x - formula
-#' @param nonprob - object of nonprobsvy class
-#'
-#' @method total nonprobsvy
-#' @return A vector of estimated totals of the given covariates in subgroups
-#' @importFrom formula.tools lhs.vars
-#' @importFrom formula.tools rhs.vars
-#' @importFrom stats aggregate
-total.nonprobsvy <- function(x, nonprob) {
-  variables <- lhs.vars(x)
-  groups <- rhs.vars(x)
-  # TODO variance implementation
-  if (is.null(variables)) {
-    data <- nonprob$data[which(nonprob$R == 1), groups, drop = FALSE]
-    total <- tapply(nonprob$weights[which(nonprob$R == 1)], data, sum)
-  } else {
-    data <- nonprob$data[which(nonprob$R == 1), c(variables, groups)]
-    weights <- nonprob$weights[which(nonprob$R == 1)]
-
-    total <- aggregate(data[, variables] * weights, by = data[, groups, drop = FALSE], sum)
-  }
-  total
-}
-#' @title Mean values of covariates in subgroups
-#'
-#' @param x - formula
-#' @param nonprob - object of nonprobsvy class
-#'
-#' @method mean nonprobsvy
-#' @return A vector of estimated means of the given covariates in subgroups
-#' @importFrom formula.tools lhs.vars
-#' @importFrom formula.tools rhs.vars
-#' @importFrom stats aggregate
-mean.nonprobsvy <- function(x, nonprob) {
-  # TODO variance for MI
-  # TODO all implementation for DR
-  variables <- lhs.vars(x)
-  groups <- rhs.vars(x)
-  class_nonprob <- class(nonprob)[2]
-  if (!is.null(variables) & class_nonprob != 'nonprobsvy_ipw')
-    stop("DR or MI estimators only for y variable in subgroups. Recommended formula definition
-         is something like ~ x + y where x and y are grouping variables")
-  if (is.null(variables)) {
-    # data <- nonprob$data[, groups, drop = FALSE]
-    data <- model.matrix(as.formula(paste(x, "- 1")), data = nonprob$data)
-    if (class_nonprob == "nonprobsvy_ipw") {
-      mean_value <- sapply(as.data.frame(data), function(col) weighted.mean(col, nonprob$weights))
-      ys <- data
-    } else {
-      # MI TODO for more than one y
-      if (class_nonprob == "nonprobsvy_mi") {
-        data_nonprob <- split(cbind(nonprob$data, nonprob$outcome[[1]]$fitted.values), x)
-        data_prob <- split(cbind(nonprob$svydesign$variables, nonprob$svydesign$prob), x)
-
-        # X_nons <- model.matrix(delete.response(terms(nonprob$outcome[[1]]$formula)), data_nonprob)
-        # X_rand <- model.matrix(delete.response(terms(nonprob$outcome[[1]]$formula)), data_prob)
-
-        mean_value <- aggregate(formula(paste("y_hat_MI", x)),
-                                data = nonprob$svydesign$variables,
-                                FUN = function(y, w) weighted.mean(y, w = w[1:length(y)]),
-                                w = 1 / nonprob$svydesign$prob)
-
-      } else if (class_nonprob == "nonprobsvy_dr") {
-        mean_value <- list()
-        for (i in 1:p) {
-          mean_value[[i]] <- mu_hatDR(
-            y = ,
-            y_nons = ,
-            y_rand = ,
-            weights = , # TODO
-            weights_nons = ,
-            weights_rand = ,
-            N_nons = ,
-            N_rand = )
-        }
-      }
-    }
-  } else {
-    data <- nonprob$data[, c(variables, groups)]
-    weights <- nonprob$weights
-    if (class_nonprob == "nonprobsvy_ipw") {
-      mean_value <- aggregate(data[, variables], by = data[, groups, drop = FALSE],
-                              FUN = function(y, w) weighted.mean(y, w = w[1:length(y)]),
-                              w = weights)
-    }
-  }
-  p <- length(mean_value)
-  variances <- numeric(length = p)
-
-  for (i in 1:p) {
-    if (class_nonprob == "nonprobsvy_ipw") {
-      yy <- ys[,i]
-      mu <- mean_value[i]
-      var <- internal_varIPW(svydesign = nonprob$svydesign,
-                             X_nons = nonprob$X[nonprob$R == 1, ],
-                             X_rand = nonprob$X[nonprob$R == 0, ],
-                             y_nons = yy,
-                             weights = nonprob$selection$prior.weights,
-                             ps_nons = nonprob$prob[nonprob$R == 1],
-                             mu_hat = mu,
-                             hess = nonprob$selection$hessian,
-                             ps_nons_der = as.vector(nonprob$selection$prob_der),
-                             N = nonprob$pop_size,
-                             est_ps_rand = as.vector(nonprob$selection$prob_rand_est), # TODO
-                             ps_rand = nonprob$svydesign$prob_rand,
-                             est_ps_rand_der = as.vector(nonprob$selection$prob_rand_est_der), # TODO
-                             n_rand = nonprob$prob_size,
-                             pop_size = nonprob$pop_size,
-                             pop_totals = nonprob$selection$pop_totals,
-                             method_selection = nonprob$selection$method_selection,
-                             est_method = nonprob$selection$method,
-                             theta = nonprob$selection$coefficients,
-                             h = nonprob$selection$h_function,
-                             verbose = FALSE,
-                             var_cov1 = nonprob$selection$link$variance_covariance1,
-                             var_cov2 = nonprob$selection$link$variance_covariance2
-                               )
-    } else if (nonprob_class == "nonprobsvy_mi") {
-
-      var <- internal_varMI(svydesign = nonprob$svydesign,
-                            X_nons = nonprob$X[nonprob$R == 1, ], # subset by x formula
-                            X_rand = nonprob$X[nonprob$R == 0, ], # subset by x formula
-                            y = ys_nons[[i]]$single_shift, # TODO y_nons
-                            y_pred = ys_pred_nons[[i]]$single_shift, # TODO
-                            weights_rand = 1 / nonprob$svydesig$prob, # subset by x formula
-                            method = method_outcome,
-                            n_rand = nonprob$prob_size,
-                            n_nons = nonprob$nonprob_size,
-                            N = nonprob$pop_size,
-                            family = nonprob$outcome$family,
-                            model_obj = model_objects, # TODO
-                            pop_totals = nonprob$pop_totals,
-                            # we should probably just pass full control list
-                            k = nonprob$control$control_outcome$k,
-                            predictive_match = nonprob$control$control_outcome$predictive_match,
-                            nn_exact_se = nonprob$control$control_inference$nn_exact_se,
-                            pmm_reg_engine = nonprob$control$control_outcome$pmm_reg_engine,
-                            pi_ij = nonprob$control$control_inference$pi_ij
-                          )
-    } else if (nonprob_class == "nonprobsvy_dr") {
-      var <- internal_varDR(OutcomeModel = OutcomeModel,
-                            SelectionModel = SelectionModel,
-                            y_nons_pred = ys_nons_pred,
-                            weights = 1,
-                            weights_rand = 1 / nonprob$svydesign$prob,
-                            method_selection = method_selection,
-                            control_selection = nonprob$control$control_selection,
-                            ps_nons = nonprob$prob,
-                            theta = nonprob$selection$coefficients,
-                            hess = nonprob$selection$hessian,
-                            ps_nons_der = nonprob$selection$ps_nons_der,
-                            est_ps_rand = nonprob$selection$prob_rand_est,
-                            y_rand_pred = ys_pred,
-                            N_nons = nonprob$pop_size,
-                            est_ps_rand_der = nonprob$selection$prob_rand_est_der,
-                            svydesign = nonprob$svydesign,
-                            est_method = nonprob$selection$method,
-                            h = nonprob$selection$h_function,
-                            pop_totals = nonprob$pop_totals,
-                            sigma = nonprob$outcome$sigma, # TODO
-                            bias_correction = nonprob$control$control_inference$bias_corr,
-                            verbose = FALSE
-      )
-    }
-    variances[i] <- var$var
-  }
-  res <- data.frame(mean = mean_value,
-                    se = sqrt(variances)) # perhaps convert to data.frame
-  res
-}
-# @title Median values of covariates in subgroups
-#
-# @param x - formula
-# @param nonprob - object of nonprobsvy class
-#
-# @method median nonprobsvy
-# @return A vector of estimated medians of the given covariates in subgroups
-# @importFrom formula.tools lhs.vars
-# @importFrom formula.tools rhs.vars
-# @importFrom spatstat weighted.median
-# @importFrom stats aggregate
-# @importFrom stats median
-# median.nonprobsvy <- function(x, nonprob) {
-#   variables <- lhs.vars(x)
-#   groups <- rhs.vars(x)
-#
-#   if (is.null(variables)) {
-#     data <- nonprob$data[which(nonprob$R == 1), groups, drop = FALSE]
-#     median_value <- tapply(nonprob$weights[which(nonprob$R == 1)], data, median)
-#   } else {
-#
-#     data <- nonprob$data[which(nonprob$R == 1), c(variables, groups)]
-#     weights <- nonprob$weights[which(nonprob$R == 1)]
-#
-#     median_value <- aggregate(data[, variables], by = data[, groups, drop = FALSE],
-#                               FUN = function(y, w) weighted.median(y, w = w[1:length(y)]),
-#                               w = weights)
-#   }
-#   median_value
-# }
-# @title Quantile values of covariates in subgroups
-#
-# @param x - formula
-# @param nonprob - object of nonprobsvy class
-# @param probs - probability
-#
-# @method quantile nonprobsvy
-# @return A vector of estimated quantiles if the given covariates in subgroups
-# @importFrom formula.tools lhs.vars
-# @importFrom formula.tools rhs.vars
-# @importFrom spatstat weighted.quantile
-# @importFrom stats aggregate
-# @importFrom stats quantile
-# quantile.nonprobsvy <- function(x, nonprob, probs = 0.5) {
-#   variables <- lhs.vars(x)
-#   groups <- rhs.vars(x)
-#
-#   if (is.null(variables)) {
-#     data <- nonprob$data[which(nonprob$R == 1), groups, drop = FALSE]
-#     percentile_value <- tapply(nonprob$weights[which(nonprob$R == 1)], data, function(y) quantile(y, probs = probs))
-#   } else {
-#
-#     data <- nonprob$data[which(nonprob$R == 1), c(variables, groups)]
-#     weights <- nonprob$weights[which(nonprob$R == 1)]
-#
-#     percentile_value <- aggregate(data[, variables], by = data[, groups, drop = FALSE],
-#                                   FUN = function(y, w) weighted.quantile(y, w = w[1:length(y)], probs = probs),
-#                                   w = weights)
-#   }
-#   percentile_value
-# }
diff --git a/inst/tinytest/test-2-ipw-totals.R b/inst/tinytest/test-2-ipw-totals.R
@@ -1,5 +1,3 @@
-# if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) {
-
   library(sampling)
   library(survey)
 
@@ -505,7 +503,7 @@
   #### variable selection  ------------------------------------------------------------------
   ##### one target variable  ----------------------------------------------------------------
 
-  # if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) {
+  if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) {
 
     ## y_11
     expect_silent(
@@ -625,7 +623,7 @@
     #                             verbose = T)
     # )
 
-  # }
+  }
 
 
   # check probit ----------------------------------------------------------------------------
@@ -811,7 +809,7 @@
                               row.names = c("Y_11", "Y_12", "Y_21", "Y_22")))
 
 
-  # if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) {
+  if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) {
     #### variable selection  ------------------------------------------------------------------
     ##### one target variable  ----------------------------------------------------------------
 
@@ -933,7 +931,7 @@
     #                             verbose = T)
     # )
 
-  # }
+  }
 
   ## non-linear case ------------------------------------------------------------------------
   #### correctly specified variables --------------------------------------------------------
@@ -1119,7 +1117,7 @@
                               row.names = c("Y_11", "Y_12", "Y_21", "Y_22")))
 
 
-  # if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) {
+  if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) {
     #### variable selection  ------------------------------------------------------------------
     ##### one target variable  ----------------------------------------------------------------
 
@@ -1241,7 +1239,7 @@
     #                             verbose = T)
     # )
 
-  # }
+  }
 
   # check cloglog -----------------------------------------------------
   ## linear case ----------------------------------------------------------------------------
@@ -1752,4 +1750,3 @@
     # # expect_true(y22_corr_scad$confidence_interval$lower_bound < mean(Y_22) &
     # #               y22_corr_scad$confidence_interval$upper_bound > mean(Y_22)) ## conf int
     # expect_true(NROW(y22_corr_scad$selection$coefficients) == 2)
-# }
diff --git a/inst/tinytest/test-3-mi-totals.R b/inst/tinytest/test-3-mi-totals.R
@@ -43,7 +43,6 @@ expect_silent(
 
 expect_equal(y12_mi_corr_one$output$mean, 7.465879, tolerance = 0.0001)
 expect_equal(y12_mi_corr_one$output$SE, 0.2523682, tolerance = 0.0001)
-# TODO
 # expect_true(y12_mi_corr_one$confidence_interval$lower_bound < mean(Y_12) &
 #               y12_mi_corr_one$confidence_interval$upper_bound > mean(Y_12))
 
@@ -88,7 +87,7 @@ expect_true(y22_mi_corr_one$confidence_interval$lower_bound < mean(Y_22) &
 # For Y_11 with all X variables
 expect_silent(
   y11_mi_corr_all <- nonprob(
-    outcome = as.formula(paste('Y_11', X_formula)),
+    outcome = as.formula(paste('Y_11 ~ ', as.character(X_formula)[2])),
     data = sample_B1,  # Include all X variables
     pop_totals = X_totals,
     method_outcome = "glm",
@@ -104,7 +103,7 @@ expect_true(y11_mi_corr_all$confidence_interval$lower_bound < mean(Y_11) &
 # For Y_12 with all X variables
 expect_silent(
   y12_mi_corr_all <- nonprob(
-    outcome = as.formula(paste('Y_12', X_formula)),
+    outcome = as.formula(paste('Y_12 ~ ', as.character(X_formula)[2])),
     data = sample_B1,
     pop_totals = X_totals,
     method_outcome = "glm",
@@ -121,7 +120,7 @@ expect_equal(y12_mi_corr_all$output$SE, 0.2454465, tolerance = 0.0001)
 # For Y_21 with all X variables
 expect_silent(
   y21_mi_corr_all <- nonprob(
-    outcome = as.formula(paste('Y_21', X_formula)),
+    outcome = as.formula(paste('Y_21 ~ ', as.character(X_formula)[2])),
     data = sample_B1,
     pop_totals = X_totals,
     method_outcome = "glm",
@@ -138,7 +137,7 @@ expect_equal(y21_mi_corr_all$output$SE, 0.01992489, tolerance = 0.0001)
 # For Y_22 with all X variables
 expect_silent(
   y22_mi_corr_all <- nonprob(
-    outcome = as.formula(paste('Y_22', X_formula)),
+    outcome = as.formula(paste('Y_22 ~ ', as.character(X_formula)[2])),
     data = sample_B1,
     pop_totals = X_totals,
     method_outcome = "glm",