Skip to content

Commit

Permalink
Merge pull request #61 from shamindras/iss-60-assertion-getassmps
Browse files Browse the repository at this point in the history
  • Loading branch information
shamindras authored May 2, 2021
2 parents 23d7747 + 5e792ba commit bddb14d
Show file tree
Hide file tree
Showing 104 changed files with 2,817 additions and 294 deletions.
12 changes: 8 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
Package: maars
Title: Tidy Inference Under Misspecified Statistical Models in R
Version: 1.0.0
Version: 1.1.0
Authors@R:
c(person(given = "Shamindra",
family = "Shrotriya",
role = c("aut", "cre"),
role = c("aut", "cre", "cph"),
email = "[email protected]"),
person(given = "Riccardo",
family = "Fogliato",
role = c("aut"),
email = "[email protected]"))
role = c("aut", "cph"),
email = "[email protected]"),
person(given = "Arun Kumar",
family = "Kuchibhotla",
role = c("aut", "cph"),
email = "[email protected]"))
Description: Tidy Inference Under Misspecified Statistical Models in R. An
implementation of the Models As Approximations series of statistics papers.
License: MIT + file LICENSE
Expand Down
36 changes: 36 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,39 @@
# maars 1.1.0

- Change `Significance` heading in our summary output to be abbreviated to `Signif:` to be consistent with `lm()` output
- Change column ordering for bootstrap standard errors and t-statistics rigorously
- Change p-value format to be `2`/`3` digits
- Clean `R/scripts_and_filters/experiments/` dir, remove old experiments
- Clean up script that gets metadata of `maars` function **metadata** from `pkgdown`
- Remove all `base::` prefix use e.g. `base::return()`
- Change `dplyr::summarise` to `dplyr::summarize` for spelling consistency
- Ensure all `stats` functions use the `stats::` prefix
- Fix `.data` related `rlang` issues
- We shouldn't name a variable `df` since this conflicts with `stats::df`. We should change this after the demo
- Clean vignette. Leave the plot in the vignette (to be moved elsewhere)
- Updated the corrected `Boston Housing` Dataset with citations. Add unit tests
for the corrections
- Clean up spelling notes
- Change multiplier weights code to use `switch` based approach
- Switch to `model.matrix` in sandwich variance and use residuals in the computation
- Update `maars` to have a package level doc. Add `@importFrom` statements
- Fix `NOTE` by adding `.gitkeep` in vignettes to `.RBuildignore`
- Fix `NOTE` by DESCRIPTION meta-information by making it a couple of sentences. This is a placeholder and we should refine it before official `CRAN` release.
- Consolidate `boston-housing.R` and `la-county.R` files into a single `data-maars.R` files. Consolidate `test` files accordingly
- Make some minor changes to the vignette
- Add `styling` to our code using the `Makefile` and `styler::style_dir(here::here('R'))`
and for `tests`
- Have a `make style` which does both `R` and `tests`
- Make sure styling does not include vignettes
- Ensure that `url_check()` are all resolved for `CRAN`
- Remove DOI entries from `inst/REFERENCES.bib` since they can cause `CRAN` url issues
- Add search functionality to `pkgdown` our site.
- Use MIT License
- Fix the no visible binding for global variable errors in our code
- Remove mixture of `%>%` and base code, and just break pipes into variables
- Change all `attr(obj, "class") <- c("obj_class_name")` to be of the form `class(obj) <- "obj_class_name"` for consistency
- Change `dplyr::_all` scoped words using the superseded `across` function

# maars 1.0.0

- Set the default `digits = 3` formatting in `summary.maars_lm` printed output
Expand Down
25 changes: 17 additions & 8 deletions R/boot-empirical.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#' be drawn.
#' @param m Number of observations to be sampled with replacement from
#' the original dataset for each bootstrap repetition.
#' @param replace TODO: ADD
#'
#' @return A tibble containing the number of bootstrap iteration, the
#' bootstrap samples (\code{data}),
Expand Down Expand Up @@ -41,14 +42,15 @@
#' }
comp_boot_emp_samples <- function(data,
B = 1,
m = NULL) {
m = NULL,
replace = TRUE) {
n <- nrow(data)
if (is.null(m)) {
m <- n
}
# this is a simplified version of rsample because rsample currently
# does not allow for sampling m!=n observations from the data
indices <- purrr::map(rep(n, B), sample, replace = TRUE, size = m)
indices <- purrr::map(rep(n, B), sample, replace = replace, size = m)

out <- tibble::tibble(
b = as.integer(paste0(1:length(indices))),
Expand Down Expand Up @@ -171,6 +173,7 @@ fit_reg <- function(mod_fit, data, weights = NULL) {
#' @param B Bootstrap repetitions or number of bootstrap samples to be drawn.
#' @param m Number of observations to be sampled with replacement from the
#' dataset for each bootstrap repetition.
#' @param replace TODO: ADD
#'
#' @return A list containing the following elements.
#' \code{var_type}: The type of estimator for the variance of the coefficients
Expand Down Expand Up @@ -207,7 +210,7 @@ fit_reg <- function(mod_fit, data, weights = NULL) {
#'
#' print(out)
#' }
comp_boot_emp <- function(mod_fit, B = 100, m = NULL) {
comp_boot_emp <- function(mod_fit, B = 100, m = NULL, replace = TRUE) {
assertthat::assert_that(all("lm" == class(mod_fit)) | any("glm" == class(mod_fit)),
msg = glue::glue("mod_fit must only be of class lm or glm")
)
Expand All @@ -219,20 +222,26 @@ comp_boot_emp <- function(mod_fit, B = 100, m = NULL) {
m <- n
}

assertthat::assert_that(replace || (!replace & m <= n),
msg = glue::glue("m must be less or equal to n for subsampling")
)

boot_type <- ifelse(replace, 'emp', 'sub')

boot_out <- lapply(
1:B,
function(x) {
fit_reg(
mod_fit = mod_fit,
data = comp_boot_emp_samples(data, B = 1, m)$data[[1]]
data = comp_boot_emp_samples(data, B = 1, m = m, replace = replace)$data[[1]]
)
}
)

cov_mat <- boot_out %>%
purrr::map(~ .x %>% dplyr::pull(estimate)) %>%
dplyr::bind_rows(data = ., .id = NULL) %>%
stats::cov(x = .) * m / n
stats::cov(x = .) * m / n # check in case of subsampling

boot_out <- boot_out %>%
dplyr::bind_rows(.id = "b") %>%
Expand All @@ -243,12 +252,12 @@ comp_boot_emp <- function(mod_fit, B = 100, m = NULL) {
summary_boot <- get_boot_summary(
mod_fit = mod_fit,
boot_out = boot_out,
boot_type = "emp"
boot_type = boot_type # check if subsampling needs any modification
)

out <- list(
var_type = "boot_emp",
var_type_abb = "emp",
var_type = glue::glue("boot_{boot_type}"),
var_type_abb = boot_type,
var_summary = summary_boot,
var_assumptions = c(
glue::glue("Observations are assumed to be independent",
Expand Down
27 changes: 21 additions & 6 deletions R/maars-lm.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#' in \code{list(B = -15, m = -20)} will pass this function without errors,
#' these will be addressed explicitly in \code{\link{comp_boot_emp}} as
#' invalid inputs.
#' @param boot_sub (list) TODO: ADD
#' @param boot_mul (list) : In the case of multiplier bootstrap the expected
#' input is of the form \code{list(B = 10, weights_type = "rademacher")}.
#' Here the named element \code{weights_type} is optional
Expand Down Expand Up @@ -76,14 +77,19 @@
#' }
comp_var <- function(mod_fit,
boot_emp = NULL,
boot_sub = NULL,
boot_res = NULL,
boot_mul = NULL) {
# the following condition will need to be revised once we introduce glm
if (all(c("maars_lm", "lm") %in% class(mod_fit))) {
attr(mod_fit, "class") <- "lm"
}

out_var <- comp_mms_var(mod_fit, boot_emp, boot_res, boot_mul)
out_var <- comp_mms_var(mod_fit = mod_fit,
boot_emp = boot_emp,
boot_sub = boot_sub,
boot_res = boot_res,
boot_mul = boot_mul)
mod_fit[["var"]] <- out_var

class(mod_fit) <- c("maars_lm", "lm")
Expand Down Expand Up @@ -165,6 +171,8 @@ get_mms_summary_split_cli <- function(title,
#' \code{FALSE} to exclude this output from the request.
#' @param boot_emp (logical) : \code{TRUE} if empirical bootstrap standard error
#' output is required, \code{FALSE} to exclude this output from the request.
#' @param boot_sub (logical) : \code{TRUE} if subsampling standard error
#' output is required, \code{FALSE} to exclude this output from the request.
#' @param boot_res (logical) : \code{TRUE} if residual bootstrap standard error
#' output is required, \code{FALSE} to exclude this output from the request.
#' @param boot_mul (logical) : \code{TRUE} if multiplier bootstrap standard error
Expand All @@ -178,11 +186,12 @@ get_mms_summary_split_cli <- function(title,
#' @method summary maars_lm
#' @export
summary.maars_lm <- function(object,
sand = FALSE,
boot_emp = FALSE,
boot_res = FALSE,
boot_mul = FALSE,
well_specified = FALSE,
sand = NULL,
boot_emp = NULL,
boot_sub = NULL,
boot_mul = NULL,
boot_res = NULL,
well_specified = NULL,
digits = 3,
...) {

Expand All @@ -193,6 +202,7 @@ summary.maars_lm <- function(object,
mod_fit = object,
sand = sand,
boot_emp = boot_emp,
boot_sub = boot_sub,
boot_res = boot_res,
boot_mul = boot_mul,
well_specified = well_specified
Expand Down Expand Up @@ -598,6 +608,8 @@ get_mms_summary_confint_split_cli <- function(title,
#' \code{FALSE} to exclude this output from the request.
#' @param boot_emp (logical) : \code{TRUE} if empirical bootstrap standard error
#' output is required, \code{FALSE} to exclude this output from the request.
#' @param boot_sub (logical) : \code{TRUE} if subsampling standard error
#' output is required, \code{FALSE} to exclude this output from the request.
#' @param boot_res (logical) : \code{TRUE} if residual bootstrap standard error
#' output is required, \code{FALSE} to exclude this output from the request.
#' @param boot_mul (logical) : \code{TRUE} if multiplier bootstrap standard error
Expand All @@ -614,8 +626,10 @@ get_mms_summary_confint_split_cli <- function(title,
confint.maars_lm <- function(object,
parm = NULL,
level = 0.95,
# TODO: to be changed
sand = TRUE,
boot_emp = FALSE,
boot_sub = FALSE,
boot_res = FALSE,
boot_mul = FALSE,
well_specified = FALSE,
Expand All @@ -640,6 +654,7 @@ confint.maars_lm <- function(object,
mod_fit = object,
sand = sand,
boot_emp = boot_emp,
boot_sub = boot_sub,
boot_res = boot_res,
boot_mul = boot_mul,
well_specified = well_specified
Expand Down
14 changes: 9 additions & 5 deletions R/ols-confint.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#' \code{FALSE} to exclude this output from the request
#' @param boot_emp (logical) : \code{TRUE} if empirical bootstrap standard error
#' output is required, \code{FALSE} to exclude this output from the request
#' @param boot_sub (logical) : \code{TRUE} if subsampling standard error
#' output is required, \code{FALSE} to exclude this output from the request
#' @param boot_res (logical) : \code{TRUE} if residual bootstrap standard error
#' output is required, \code{FALSE} to exclude this output from the request
#' @param boot_mul (logical) : \code{TRUE} if multiplier bootstrap standard error
Expand All @@ -40,11 +42,12 @@
get_confint <- function(mod_fit,
parm = NULL,
level = 0.95,
sand = TRUE,
boot_emp = FALSE,
boot_mul = FALSE,
boot_res = FALSE,
well_specified = FALSE) {
sand = NULL,
boot_emp = NULL,
boot_sub = NULL,
boot_mul = NULL,
boot_res = NULL,
well_specified = NULL) {

# Check parm is NULL valued
# TODO: Allow this to be a vector of numbers or a vector of names to filter
Expand All @@ -65,6 +68,7 @@ get_confint <- function(mod_fit,
mod_fit = mod_fit,
sand = sand,
boot_emp = boot_emp,
boot_sub = boot_sub,
boot_res = boot_res,
boot_mul = boot_mul,
well_specified = well_specified
Expand Down
2 changes: 1 addition & 1 deletion R/ols-sandwich-lm-var.R
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ comp_lm_var <- function(mod_fit) {
var_type_abb = "lm",
var_summary = summary_lm,
var_assumptions = assumptions_lm,
cov_mat = NULL
cov_mat = vcov(mod_fit)
)

return(out)
Expand Down
Loading

0 comments on commit bddb14d

Please sign in to comment.