Skip to content

Commit

Permalink
add more unit tests small fixes regarding estimation with pop totals
Browse files Browse the repository at this point in the history
  • Loading branch information
LukaszChrostowski committed Oct 4, 2024
1 parent 3a13951 commit c3a4d53
Show file tree
Hide file tree
Showing 13 changed files with 981 additions and 142 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ Imports:
nleqslv,
doParallel,
foreach,
parallel
parallel,
formula.tools
LinkingTo:
Rcpp,
RcppArmadillo
Expand Down
5 changes: 3 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,16 @@ importFrom(Rcpp,sourceCpp)
importFrom(doParallel,registerDoParallel)
importFrom(foreach,"%dopar%")
importFrom(foreach,foreach)
importFrom(formula.tools,lhs)
importFrom(formula.tools,rhs)
importFrom(formula.tools,lhs.vars)
importFrom(formula.tools,rhs.vars)
importFrom(maxLik,maxLik)
importFrom(ncvreg,cv.ncvreg)
importFrom(nleqslv,nleqslv)
importFrom(parallel,makeCluster)
importFrom(parallel,stopCluster)
importFrom(stats,AIC)
importFrom(stats,BIC)
importFrom(stats,aggregate)
importFrom(stats,as.formula)
importFrom(stats,binomial)
importFrom(stats,coef)
Expand Down
31 changes: 20 additions & 11 deletions R/internals.R
Original file line number Diff line number Diff line change
Expand Up @@ -151,20 +151,29 @@ get_method <- function(method) {
}

ff <- function(formula) {
fff <- as.character(formula)
f <- strsplit(fff[2], "\\s*\\+\\s*")[[1]]
outcome_formulas <- list()
if (any(duplicated(f))) {
warning("No unique names of the outcome variables in formula. The error has been corrected")
f <- unique(f)
}
l <- length(f)
for (i in 1:l) {
outcome_formulas[[i]] <- as.formula(paste(f[i], fff[3], sep = " ~ "))
formula_string <- paste(deparse(formula), collapse = " ")
formula_parts <- strsplit(formula_string, "~")[[1]]
if (length(formula_parts) != 2) {
stop("The formula must contain exactly one '~' operator.")
}

lhs <- trimws(formula_parts[1])
rhs <- trimws(formula_parts[2])

dependent_vars <- strsplit(lhs, "\\s*\\+\\s*")[[1]]
independent_vars <- strsplit(rhs, "\\s*\\+\\s*")[[1]]

if (any(duplicated(dependent_vars))) {
warning("Duplicate dependent variable names detected. They have been made unique.")
dependent_vars <- unique(dependent_vars)
}
outcome_formulas <- vector("list", length(dependent_vars))
l <- length(dependent_vars)
for (i in seq_along(dependent_vars)) {
outcome_formulas[[i]] <- reformulate(termlabels = independent_vars, response = dependent_vars[i])
}
list(
f = f,
f = dependent_vars,
outcomes = outcome_formulas,
l = l
)
Expand Down
9 changes: 7 additions & 2 deletions R/nonprobDR.R
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,9 @@ nonprobDR <- function(selection,
X_rand <- OutcomeModel$X_rand
nons_names <- OutcomeModel$nons_names
y_nons <- OutcomeModel$y_nons
n_nons <- nrow(X_nons)
R <- rep(1, n_nons)
loc_nons <- which(R == 1)

if (var_selection == TRUE) {
nlambda <- control_outcome$nlambda
Expand Down Expand Up @@ -559,17 +562,19 @@ nonprobDR <- function(selection,
colnames(X) <- c("(Intercept)", colnames(Xsel))
OutcomeModel$X_nons <- SelectionModel$X_nons <- X[loc_nons, ]
SelectionModel$pop_totals <- c(SelectionModel$pop_totals[1], SelectionModel$pop_totals[idx + 1])
pop_totals <- c(pop_totals[1], pop_totals[idx+1])
} else if (control_inference$bias_inf == "div") {
X_outcome <- as.matrix(X[, beta_selected[-1] + 1, drop = FALSE])
Xsel <- X_selection <- as.matrix(X[, theta_selected[-1] + 1, drop = FALSE])
OutcomeModel$X_nons <- cbind(1, X_outcome[loc_nons, ])
OutcomeModel$X_rand <- cbind(1, X_outcome[loc_rand, ])
colnames(OutcomeModel$X_nons) <- colnames(OutcomeModel$X_rand) <- c("(Intercept)", colnames(X_outcome))
colnames(OutcomeModel$X_nons) <- c("(Intercept)", colnames(X_outcome))
SelectionModel$pop_totals <- c(SelectionModel$pop_totals[1], SelectionModel$pop_totals[theta_selected[-1] + 1])
SelectionModel$X_nons <- cbind(1, X_selection[loc_nons, ])
X <- cbind(1, Xsel)
colnames(X) <- colnames(SelectionModel$X_nons) <- c("(Intercept)", colnames(Xsel))
pop_totals <- c(pop_totals[1], pop_totals[theta_selected[-1] + 1])
}
SelectionModel$total_names <- names(SelectionModel$pop_totals)
}
prob_pop_totals <- SelectionModel$pop_totals

Expand Down
3 changes: 2 additions & 1 deletion R/nonprobMI.R
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,7 @@ nonprobMI <- function(outcome,
n_rand <- 0
weights_rand <- NULL
n_nons <- nrow(X_nons)
R <- rep(1, n_nons)

############## WORKING VERSION
if (var_selection == TRUE) {
Expand Down Expand Up @@ -316,7 +317,7 @@ nonprobMI <- function(outcome,
control = control_outcome,
n_nons = n_nons,
n_rand = n_rand,
model_frame = OutcomeModel$model_frame_rand,
model_frame = Model$model_frame_rand,
vars_selection = control_inference$vars_selection,
pop_totals = pop_totals
)
Expand Down
127 changes: 67 additions & 60 deletions R/simple_methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -328,10 +328,10 @@ deviance.nonprobsvy <- function(object,
#'
#' @method total nonprobsvy
#' @return A vector of estimated totals of the given covariates in subgroups
#' @importFrom formula.tools lhs
#' @importFrom formula.tools rhs
#' @importFrom formula.tools lhs.vars
#' @importFrom formula.tools rhs.vars
#' @importFrom stats aggregate
total.nonprobsvy <- function(x, nonprob) {
# Rozbijanie formuły
variables <- lhs.vars(x)
groups <- rhs.vars(x)

Expand All @@ -353,8 +353,9 @@ total.nonprobsvy <- function(x, nonprob) {
#'
#' @method mean nonprobsvy
#' @return A vector of estimated means of the given covariates in subgroups
#' @importFrom formula.tools lhs
#' @importFrom formula.tools rhs
#' @importFrom formula.tools lhs.vars
#' @importFrom formula.tools rhs.vars
#' @importFrom stats aggregate
mean.nonprobsvy <- function(x, nonprob) {
variables <- lhs.vars(x)
groups <- rhs.vars(x)
Expand All @@ -372,58 +373,64 @@ mean.nonprobsvy <- function(x, nonprob) {
}
mean_value
}
#' @title Median values of covariates in subgroups
#'
#' @param x - formula
#' @param nonprob - object of nonprobsvy class
#'
#' @method median nonprobsvy
#' @return A vector of estimated medians of the given covariates in subgroups
#' @importFrom formula.tools lhs
#' @importFrom formula.tools rhs
median.nonprobsvy <- function(x, nonprob) {
variables <- lhs.vars(x)
groups <- rhs.vars(x)

if (is.null(variables)) {
data <- nonprob$data[which(nonprob$R == 1), groups, drop = FALSE]
median_value <- tapply(nonprob$weights[which(nonprob$R == 1)], data, median)
} else {

data <- nonprob$data[which(nonprob$R == 1), c(variables, groups)]
weights <- nonprob$weights[which(nonprob$R == 1)]

median_value <- aggregate(data[, variables], by = data[, groups, drop = FALSE],
FUN = function(y, w) weighted.median(y, w = w[1:length(y)]),
w = weights)
}
median_value
}
#' @title Quantile values of covariates in subgroups
#'
#' @param x - formula
#' @param nonprob - object of nonprobsvy class
#' @param probs - probability
#'
#' @method quantile nonprobsvy
#' @return A vector of estimated quantiles if the given covariates in subgroups
#' @importFrom formula.tools lhs
#' @importFrom formula.tools rhs
quantile.nonprobsvy <- function(x, nonprob, probs = 0.5) {
variables <- lhs.vars(x)
groups <- rhs.vars(x)

if (is.null(variables)) {
data <- nonprob$data[which(nonprob$R == 1), groups, drop = FALSE]
percentile_value <- tapply(nonprob$weights[which(nonprob$R == 1)], data, function(y) quantile(y, probs = probs))
} else {

data <- nonprob$data[which(nonprob$R == 1), c(variables, groups)]
weights <- nonprob$weights[which(nonprob$R == 1)]

percentile_value <- aggregate(data[, variables], by = data[, groups, drop = FALSE],
FUN = function(y, w) weighted.quantile(y, w = w[1:length(y)], probs = probs),
w = weights)
}
percentile_value
}
# @title Median values of covariates in subgroups
#
# @param x - formula
# @param nonprob - object of nonprobsvy class
#
# @method median nonprobsvy
# @return A vector of estimated medians of the given covariates in subgroups
# @importFrom formula.tools lhs.vars
# @importFrom formula.tools rhs.vars
# @importFrom spatstat weighted.median
# @importFrom stats aggregate
# @importFrom stats median
# median.nonprobsvy <- function(x, nonprob) {
# variables <- lhs.vars(x)
# groups <- rhs.vars(x)
#
# if (is.null(variables)) {
# data <- nonprob$data[which(nonprob$R == 1), groups, drop = FALSE]
# median_value <- tapply(nonprob$weights[which(nonprob$R == 1)], data, median)
# } else {
#
# data <- nonprob$data[which(nonprob$R == 1), c(variables, groups)]
# weights <- nonprob$weights[which(nonprob$R == 1)]
#
# median_value <- aggregate(data[, variables], by = data[, groups, drop = FALSE],
# FUN = function(y, w) weighted.median(y, w = w[1:length(y)]),
# w = weights)
# }
# median_value
# }
# @title Quantile values of covariates in subgroups
#
# @param x - formula
# @param nonprob - object of nonprobsvy class
# @param probs - probability
#
# @method quantile nonprobsvy
# @return A vector of estimated quantiles if the given covariates in subgroups
# @importFrom formula.tools lhs.vars
# @importFrom formula.tools rhs.vars
# @importFrom spatstat weighted.quantile
# @importFrom stats aggregate
# @importFrom stats quantile
# quantile.nonprobsvy <- function(x, nonprob, probs = 0.5) {
# variables <- lhs.vars(x)
# groups <- rhs.vars(x)
#
# if (is.null(variables)) {
# data <- nonprob$data[which(nonprob$R == 1), groups, drop = FALSE]
# percentile_value <- tapply(nonprob$weights[which(nonprob$R == 1)], data, function(y) quantile(y, probs = probs))
# } else {
#
# data <- nonprob$data[which(nonprob$R == 1), c(variables, groups)]
# weights <- nonprob$weights[which(nonprob$R == 1)]
#
# percentile_value <- aggregate(data[, variables], by = data[, groups, drop = FALSE],
# FUN = function(y, w) weighted.quantile(y, w = w[1:length(y)], probs = probs),
# w = weights)
# }
# percentile_value
# }
16 changes: 8 additions & 8 deletions inst/tinytest/test-2-ipw-totals.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) {
# if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) {

library(sampling)
library(survey)
Expand Down Expand Up @@ -505,7 +505,7 @@ if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")
#### variable selection ------------------------------------------------------------------
##### one target variable ----------------------------------------------------------------

if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) {
# if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) {

## y_11
expect_silent(
Expand Down Expand Up @@ -625,7 +625,7 @@ if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")
# verbose = T)
# )

}
# }


# check probit ----------------------------------------------------------------------------
Expand Down Expand Up @@ -811,7 +811,7 @@ if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")
row.names = c("Y_11", "Y_12", "Y_21", "Y_22")))


if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) {
# if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) {
#### variable selection ------------------------------------------------------------------
##### one target variable ----------------------------------------------------------------

Expand Down Expand Up @@ -933,7 +933,7 @@ if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")
# verbose = T)
# )

}
# }

## non-linear case ------------------------------------------------------------------------
#### correctly specified variables --------------------------------------------------------
Expand Down Expand Up @@ -1119,7 +1119,7 @@ if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")
row.names = c("Y_11", "Y_12", "Y_21", "Y_22")))


if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) {
# if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")) {
#### variable selection ------------------------------------------------------------------
##### one target variable ----------------------------------------------------------------

Expand Down Expand Up @@ -1241,7 +1241,7 @@ if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")
# verbose = T)
# )

}
# }

# check cloglog -----------------------------------------------------
## linear case ----------------------------------------------------------------------------
Expand Down Expand Up @@ -1752,4 +1752,4 @@ if (isTRUE(tolower(Sys.getenv("TEST_NONPROBSVY_MULTICORE_DEVELOPER")) == "true")
# # expect_true(y22_corr_scad$confidence_interval$lower_bound < mean(Y_22) &
# # y22_corr_scad$confidence_interval$upper_bound > mean(Y_22)) ## conf int
# expect_true(NROW(y22_corr_scad$selection$coefficients) == 2)
}
# }
Loading

0 comments on commit c3a4d53

Please sign in to comment.