From 5101e0500ce75b069cbdbd38f92109866e8ff4e4 Mon Sep 17 00:00:00 2001 From: LukaszChrostowski Date: Sun, 27 Oct 2024 16:48:14 +0100 Subject: [PATCH] Initial cleaning before going on CRAN --- DESCRIPTION | 3 +- NAMESPACE | 3 - NEWS.md | 1 - R/simple_methods.R | 233 ------------------------------ inst/tinytest/test-2-ipw-totals.R | 15 +- inst/tinytest/test-3-mi-totals.R | 9 +- inst/tinytest/test-4-dr-totals.R | 8 +- inst/tinytest/test_nonprobsvy.R | 4 +- man/mean.nonprobsvy.Rd | 23 --- man/total.nonprobsvy.Rd | 23 --- src/Makevars | 4 +- 11 files changed, 19 insertions(+), 307 deletions(-) delete mode 100644 man/mean.nonprobsvy.Rd delete mode 100644 man/total.nonprobsvy.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 080ce4c..34b80db 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -45,8 +45,7 @@ Imports: nleqslv, doParallel, foreach, - parallel, - formula.tools + parallel LinkingTo: Rcpp, RcppArmadillo diff --git a/NAMESPACE b/NAMESPACE index 2ec47a4..6c7a33e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -34,8 +34,6 @@ importFrom(Rcpp,sourceCpp) importFrom(doParallel,registerDoParallel) importFrom(foreach,"%dopar%") importFrom(foreach,foreach) -importFrom(formula.tools,lhs.vars) -importFrom(formula.tools,rhs.vars) importFrom(maxLik,maxLik) importFrom(ncvreg,cv.ncvreg) importFrom(nleqslv,nleqslv) @@ -43,7 +41,6 @@ importFrom(parallel,makeCluster) importFrom(parallel,stopCluster) importFrom(stats,AIC) importFrom(stats,BIC) -importFrom(stats,aggregate) importFrom(stats,as.formula) importFrom(stats,binomial) importFrom(stats,coef) diff --git a/NEWS.md b/NEWS.md index fbb5c38..c11f086 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,7 +8,6 @@ - add information to `summary` about quality of estimation basing on difference between estimated and known total values of auxiliary variables - add estimation of exact standard error for k-nearest neighbor estimator. - add breaking change to `controlOut` function by switching values for `predictive_match` argument. From now on, the `predictive_match = 1` means $\hat{y}-\hat{y}$ in predictive mean matching imputation and `predictive_match = 2` corresponds to $\hat{y}-y$ matching. - - add methods for estimation of means and totals in subsets/groups with its variances. - implement `div` option when variable selection (more in documentation) for doubly robust estimation. ## nonprobsvy 0.1.0 diff --git a/R/simple_methods.R b/R/simple_methods.R index 6ae4aab..983fb1d 100644 --- a/R/simple_methods.R +++ b/R/simple_methods.R @@ -321,236 +321,3 @@ deviance.nonprobsvy <- function(object, if (class(object)[2] == "nonprobsvy_dr") res <- c("selection" = res_sel, "outcome" = res_out) res } -#' @title Total values of covariates in subgroups -#' -#' @param x - formula -#' @param nonprob - object of nonprobsvy class -#' -#' @method total nonprobsvy -#' @return A vector of estimated totals of the given covariates in subgroups -#' @importFrom formula.tools lhs.vars -#' @importFrom formula.tools rhs.vars -#' @importFrom stats aggregate -total.nonprobsvy <- function(x, nonprob) { - variables <- lhs.vars(x) - groups <- rhs.vars(x) - # TODO variance implementation - if (is.null(variables)) { - data <- nonprob$data[which(nonprob$R == 1), groups, drop = FALSE] - total <- tapply(nonprob$weights[which(nonprob$R == 1)], data, sum) - } else { - data <- nonprob$data[which(nonprob$R == 1), c(variables, groups)] - weights <- nonprob$weights[which(nonprob$R == 1)] - - total <- aggregate(data[, variables] * weights, by = data[, groups, drop = FALSE], sum) - } - total -} -#' @title Mean values of covariates in subgroups -#' -#' @param x - formula -#' @param nonprob - object of nonprobsvy class -#' -#' @method mean nonprobsvy -#' @return A vector of estimated means of the given covariates in subgroups -#' @importFrom formula.tools lhs.vars -#' @importFrom formula.tools rhs.vars -#' @importFrom stats aggregate -mean.nonprobsvy <- function(x, nonprob) { - # TODO variance for MI - # TODO all implementation for DR - variables <- lhs.vars(x) - groups <- rhs.vars(x) - class_nonprob <- class(nonprob)[2] - if (!is.null(variables) & class_nonprob != 'nonprobsvy_ipw') - stop("DR or MI estimators only for y variable in subgroups. Recommended formula definition - is something like ~ x + y where x and y are grouping variables") - if (is.null(variables)) { - # data <- nonprob$data[, groups, drop = FALSE] - data <- model.matrix(as.formula(paste(x, "- 1")), data = nonprob$data) - if (class_nonprob == "nonprobsvy_ipw") { - mean_value <- sapply(as.data.frame(data), function(col) weighted.mean(col, nonprob$weights)) - ys <- data - } else { - # MI TODO for more than one y - if (class_nonprob == "nonprobsvy_mi") { - data_nonprob <- split(cbind(nonprob$data, nonprob$outcome[[1]]$fitted.values), x) - data_prob <- split(cbind(nonprob$svydesign$variables, nonprob$svydesign$prob), x) - - # X_nons <- model.matrix(delete.response(terms(nonprob$outcome[[1]]$formula)), data_nonprob) - # X_rand <- model.matrix(delete.response(terms(nonprob$outcome[[1]]$formula)), data_prob) - - mean_value <- aggregate(formula(paste("y_hat_MI", x)), - data = nonprob$svydesign$variables, - FUN = function(y, w) weighted.mean(y, w = w[1:length(y)]), - w = 1 / nonprob$svydesign$prob) - - } else if (class_nonprob == "nonprobsvy_dr") { - mean_value <- list() - for (i in 1:p) { - mean_value[[i]] <- mu_hatDR( - y = , - y_nons = , - y_rand = , - weights = , # TODO - weights_nons = , - weights_rand = , - N_nons = , - N_rand = ) - } - } - } - } else { - data <- nonprob$data[, c(variables, groups)] - weights <- nonprob$weights - if (class_nonprob == "nonprobsvy_ipw") { - mean_value <- aggregate(data[, variables], by = data[, groups, drop = FALSE], - FUN = function(y, w) weighted.mean(y, w = w[1:length(y)]), - w = weights) - } - } - p <- length(mean_value) - variances <- numeric(length = p) - - for (i in 1:p) { - if (class_nonprob == "nonprobsvy_ipw") { - yy <- ys[,i] - mu <- mean_value[i] - var <- internal_varIPW(svydesign = nonprob$svydesign, - X_nons = nonprob$X[nonprob$R == 1, ], - X_rand = nonprob$X[nonprob$R == 0, ], - y_nons = yy, - weights = nonprob$selection$prior.weights, - ps_nons = nonprob$prob[nonprob$R == 1], - mu_hat = mu, - hess = nonprob$selection$hessian, - ps_nons_der = as.vector(nonprob$selection$prob_der), - N = nonprob$pop_size, - est_ps_rand = as.vector(nonprob$selection$prob_rand_est), # TODO - ps_rand = nonprob$svydesign$prob_rand, - est_ps_rand_der = as.vector(nonprob$selection$prob_rand_est_der), # TODO - n_rand = nonprob$prob_size, - pop_size = nonprob$pop_size, - pop_totals = nonprob$selection$pop_totals, - method_selection = nonprob$selection$method_selection, - est_method = nonprob$selection$method, - theta = nonprob$selection$coefficients, - h = nonprob$selection$h_function, - verbose = FALSE, - var_cov1 = nonprob$selection$link$variance_covariance1, - var_cov2 = nonprob$selection$link$variance_covariance2 - ) - } else if (nonprob_class == "nonprobsvy_mi") { - - var <- internal_varMI(svydesign = nonprob$svydesign, - X_nons = nonprob$X[nonprob$R == 1, ], # subset by x formula - X_rand = nonprob$X[nonprob$R == 0, ], # subset by x formula - y = ys_nons[[i]]$single_shift, # TODO y_nons - y_pred = ys_pred_nons[[i]]$single_shift, # TODO - weights_rand = 1 / nonprob$svydesig$prob, # subset by x formula - method = method_outcome, - n_rand = nonprob$prob_size, - n_nons = nonprob$nonprob_size, - N = nonprob$pop_size, - family = nonprob$outcome$family, - model_obj = model_objects, # TODO - pop_totals = nonprob$pop_totals, - # we should probably just pass full control list - k = nonprob$control$control_outcome$k, - predictive_match = nonprob$control$control_outcome$predictive_match, - nn_exact_se = nonprob$control$control_inference$nn_exact_se, - pmm_reg_engine = nonprob$control$control_outcome$pmm_reg_engine, - pi_ij = nonprob$control$control_inference$pi_ij - ) - } else if (nonprob_class == "nonprobsvy_dr") { - var <- internal_varDR(OutcomeModel = OutcomeModel, - SelectionModel = SelectionModel, - y_nons_pred = ys_nons_pred, - weights = 1, - weights_rand = 1 / nonprob$svydesign$prob, - method_selection = method_selection, - control_selection = nonprob$control$control_selection, - ps_nons = nonprob$prob, - theta = nonprob$selection$coefficients, - hess = nonprob$selection$hessian, - ps_nons_der = nonprob$selection$ps_nons_der, - est_ps_rand = nonprob$selection$prob_rand_est, - y_rand_pred = ys_pred, - N_nons = nonprob$pop_size, - est_ps_rand_der = nonprob$selection$prob_rand_est_der, - svydesign = nonprob$svydesign, - est_method = nonprob$selection$method, - h = nonprob$selection$h_function, - pop_totals = nonprob$pop_totals, - sigma = nonprob$outcome$sigma, # TODO - bias_correction = nonprob$control$control_inference$bias_corr, - verbose = FALSE - ) - } - variances[i] <- var$var - } - res <- data.frame(mean = mean_value, - se = sqrt(variances)) # perhaps convert to data.frame - res -} -# @title Median values of covariates in subgroups -# -# @param x - formula -# @param nonprob - object of nonprobsvy class -# -# @method median nonprobsvy -# @return A vector of estimated medians of the given covariates in subgroups -# @importFrom formula.tools lhs.vars -# @importFrom formula.tools rhs.vars -# @importFrom spatstat weighted.median -# @importFrom stats aggregate -# @importFrom stats median -# median.nonprobsvy <- function(x, nonprob) { -# variables <- lhs.vars(x) -# groups <- rhs.vars(x) -# -# if (is.null(variables)) { -# data <- nonprob$data[which(nonprob$R == 1), groups, drop = FALSE] -# median_value <- tapply(nonprob$weights[which(nonprob$R == 1)], data, median) -# } else { -# -# data <- nonprob$data[which(nonprob$R == 1), c(variables, groups)] -# weights <- nonprob$weights[which(nonprob$R == 1)] -# -# median_value <- aggregate(data[, variables], by = data[, groups, drop = FALSE], -# FUN = function(y, w) weighted.median(y, w = w[1:length(y)]), -# w = weights) -# } -# median_value -# } -# @title Quantile values of covariates in subgroups -# -# @param x - formula -# @param nonprob - object of nonprobsvy class -# @param probs - probability -# -# @method quantile nonprobsvy -# @return A vector of estimated quantiles if the given covariates in subgroups -# @importFrom formula.tools lhs.vars -# @importFrom formula.tools rhs.vars -# @importFrom spatstat weighted.quantile -# @importFrom stats aggregate -# @importFrom stats quantile -# quantile.nonprobsvy <- function(x, nonprob, probs = 0.5) { -# variables <- lhs.vars(x) -# groups <- rhs.vars(x) -# -# if (is.null(variables)) { -# data <- nonprob$data[which(nonprob$R == 1), groups, drop = FALSE] -# percentile_value <- tapply(nonprob$weights[which(nonprob$R == 1)], data, function(y) quantile(y, probs = probs)) -# } else { -# -# data <- nonprob$data[which(nonprob$R == 1), c(variables, groups)] -# weights <- nonprob$weights[which(nonprob$R == 1)] -# -# percentile_value <- aggregate(data[, variables], by = data[, groups, drop = FALSE], -# FUN = function(y, w) weighted.quantile(y, w = w[1:length(y)], probs = probs), -# w = weights) -# } -# percentile_value -# } diff --git a/inst/tinytest/test-2-ipw-totals.R b/inst/tinytest/test-2-ipw-totals.R index 932802c..5a593e3 100644 --- a/inst/tinytest/test-2-ipw-totals.R +++ b/inst/tinytest/test-2-ipw-totals.R @@ -1,5 +1,3 @@ -# if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) { - library(sampling) library(survey) @@ -505,7 +503,7 @@ #### variable selection ------------------------------------------------------------------ ##### one target variable ---------------------------------------------------------------- - # if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) { + if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) { ## y_11 expect_silent( @@ -625,7 +623,7 @@ # verbose = T) # ) - # } + } # check probit ---------------------------------------------------------------------------- @@ -811,7 +809,7 @@ row.names = c("Y_11", "Y_12", "Y_21", "Y_22"))) - # if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) { + if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) { #### variable selection ------------------------------------------------------------------ ##### one target variable ---------------------------------------------------------------- @@ -933,7 +931,7 @@ # verbose = T) # ) - # } + } ## non-linear case ------------------------------------------------------------------------ #### correctly specified variables -------------------------------------------------------- @@ -1119,7 +1117,7 @@ row.names = c("Y_11", "Y_12", "Y_21", "Y_22"))) - # if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) { + if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) { #### variable selection ------------------------------------------------------------------ ##### one target variable ---------------------------------------------------------------- @@ -1241,7 +1239,7 @@ # verbose = T) # ) - # } + } # check cloglog ----------------------------------------------------- ## linear case ---------------------------------------------------------------------------- @@ -1752,4 +1750,3 @@ # # expect_true(y22_corr_scad$confidence_interval$lower_bound < mean(Y_22) & # # y22_corr_scad$confidence_interval$upper_bound > mean(Y_22)) ## conf int # expect_true(NROW(y22_corr_scad$selection$coefficients) == 2) -# } diff --git a/inst/tinytest/test-3-mi-totals.R b/inst/tinytest/test-3-mi-totals.R index 2a2dcc7..f901356 100644 --- a/inst/tinytest/test-3-mi-totals.R +++ b/inst/tinytest/test-3-mi-totals.R @@ -43,7 +43,6 @@ expect_silent( expect_equal(y12_mi_corr_one$output$mean, 7.465879, tolerance = 0.0001) expect_equal(y12_mi_corr_one$output$SE, 0.2523682, tolerance = 0.0001) -# TODO # expect_true(y12_mi_corr_one$confidence_interval$lower_bound < mean(Y_12) & # y12_mi_corr_one$confidence_interval$upper_bound > mean(Y_12)) @@ -88,7 +87,7 @@ expect_true(y22_mi_corr_one$confidence_interval$lower_bound < mean(Y_22) & # For Y_11 with all X variables expect_silent( y11_mi_corr_all <- nonprob( - outcome = as.formula(paste('Y_11', X_formula)), + outcome = as.formula(paste('Y_11 ~ ', as.character(X_formula)[2])), data = sample_B1, # Include all X variables pop_totals = X_totals, method_outcome = "glm", @@ -104,7 +103,7 @@ expect_true(y11_mi_corr_all$confidence_interval$lower_bound < mean(Y_11) & # For Y_12 with all X variables expect_silent( y12_mi_corr_all <- nonprob( - outcome = as.formula(paste('Y_12', X_formula)), + outcome = as.formula(paste('Y_12 ~ ', as.character(X_formula)[2])), data = sample_B1, pop_totals = X_totals, method_outcome = "glm", @@ -121,7 +120,7 @@ expect_equal(y12_mi_corr_all$output$SE, 0.2454465, tolerance = 0.0001) # For Y_21 with all X variables expect_silent( y21_mi_corr_all <- nonprob( - outcome = as.formula(paste('Y_21', X_formula)), + outcome = as.formula(paste('Y_21 ~ ', as.character(X_formula)[2])), data = sample_B1, pop_totals = X_totals, method_outcome = "glm", @@ -138,7 +137,7 @@ expect_equal(y21_mi_corr_all$output$SE, 0.01992489, tolerance = 0.0001) # For Y_22 with all X variables expect_silent( y22_mi_corr_all <- nonprob( - outcome = as.formula(paste('Y_22', X_formula)), + outcome = as.formula(paste('Y_22 ~ ', as.character(X_formula)[2])), data = sample_B1, pop_totals = X_totals, method_outcome = "glm", diff --git a/inst/tinytest/test-4-dr-totals.R b/inst/tinytest/test-4-dr-totals.R index cee8879..f826d6e 100644 --- a/inst/tinytest/test-4-dr-totals.R +++ b/inst/tinytest/test-4-dr-totals.R @@ -97,7 +97,7 @@ expect_true(y22_dr_corr_one$confidence_interval$lower_bound < mean(Y_22) & expect_silent( y11_dr_corr_all <- nonprob( selection = X_formula, - outcome = as.formula(paste('Y_11', X_formula)), + outcome = as.formula(paste('Y_11 ~ ', as.character(X_formula)[2])), data = sample_B1, pop_totals = X_totals, method_selection = "logit", @@ -115,7 +115,7 @@ expect_true(y11_dr_corr_all$confidence_interval$lower_bound < mean(Y_11) & expect_silent( y12_dr_corr_all <- nonprob( selection = X_formula, - outcome = as.formula(paste('Y_12', X_formula)), + outcome = as.formula(paste('Y_12 ~ ', as.character(X_formula)[2])), data = sample_B1, pop_totals = X_totals, method_selection = "logit", @@ -133,7 +133,7 @@ expect_true(y12_dr_corr_all$confidence_interval$lower_bound < mean(Y_12) & expect_silent( y21_dr_corr_all <- nonprob( selection = X_formula, - outcome = as.formula(paste('Y_21', X_formula)), + outcome = as.formula(paste('Y_21 ~ ', as.character(X_formula)[2])), data = sample_B1, pop_totals = X_totals, method_selection = "logit", @@ -152,7 +152,7 @@ expect_equal(y21_dr_corr_all$output$SE, 0.0192478, tolerance = 0.0001) expect_silent( y22_dr_corr_all <- nonprob( selection = X_formula, - outcome = as.formula(paste('Y_22', X_formula)), + outcome = as.formula(paste('Y_22 ~ ', as.character(X_formula)[2])), data = sample_B1, pop_totals = X_totals, method_selection = "logit", diff --git a/inst/tinytest/test_nonprobsvy.R b/inst/tinytest/test_nonprobsvy.R index 1f2b85c..28ab91f 100644 --- a/inst/tinytest/test_nonprobsvy.R +++ b/inst/tinytest/test_nonprobsvy.R @@ -252,7 +252,7 @@ expect_true( # These tests are only supposed to be run on developer's machine and # package GitHub page not on CRAN (they take too long) -# if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) { +if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) { expect_silent( test1a_bootstrap <- nonprob(selection = ~ x, @@ -339,4 +339,4 @@ expect_true( expect_equivalent(test3a$output$mean, test3a_bootstrap$output$mean, tolerance = 0.1) expect_equivalent(test3a$output$SE, test3a_bootstrap$output$SE, tolerance = 0.1) -# } +} diff --git a/man/mean.nonprobsvy.Rd b/man/mean.nonprobsvy.Rd deleted file mode 100644 index 4248b7f..0000000 --- a/man/mean.nonprobsvy.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/simple_methods.R -\name{mean.nonprobsvy} -\alias{mean.nonprobsvy} -\title{Mean values of covariates in subgroups} -\usage{ -\method{mean}{nonprobsvy}(x, nonprob) -} -\arguments{ -\item{x}{\itemize{ -\item formula -}} - -\item{nonprob}{\itemize{ -\item object of nonprobsvy class -}} -} -\value{ -A vector of estimated means of the given covariates in subgroups -} -\description{ -Mean values of covariates in subgroups -} diff --git a/man/total.nonprobsvy.Rd b/man/total.nonprobsvy.Rd deleted file mode 100644 index d509a22..0000000 --- a/man/total.nonprobsvy.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/simple_methods.R -\name{total.nonprobsvy} -\alias{total.nonprobsvy} -\title{Total values of covariates in subgroups} -\usage{ -\method{total}{nonprobsvy}(x, nonprob) -} -\arguments{ -\item{x}{\itemize{ -\item formula -}} - -\item{nonprob}{\itemize{ -\item object of nonprobsvy class -}} -} -\value{ -A vector of estimated totals of the given covariates in subgroups -} -\description{ -Total values of covariates in subgroups -} diff --git a/src/Makevars b/src/Makevars index 3d1e654..f09302a 100644 --- a/src/Makevars +++ b/src/Makevars @@ -15,5 +15,5 @@ ## set the appropriate value, possibly CXX17. # CXX_STD = CXX11 -#PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) -#PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) +PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) +PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)