README.Rmd

---
output: github_document
editor_options: 
  markdown: 
    wrap: 72
---

<!-- README.md is generated from README.Rmd. Please edit that file -->

```{r, include = FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.path = "man/figures/README-",
  out.width = "100%"
)
```

# analysistools

<!-- badges: start -->

[![Contributor
Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.1-4baaaa.svg)](code_of_conduct.md)
[![check-standard](https://github.com/impact-initiatives/analysistools/actions/workflows/check-standard.yaml/badge.svg)](https://github.com/impact-initiatives/analysistools/actions/workflows/check-standard.yaml)
[![Codecov test
coverage](https://codecov.io/gh/impact-initiatives/analysistools/branch/main/graph/badge.svg)](https://app.codecov.io/gh/impact-initiatives/analysistools?branch=main)

<!-- badges: end -->

The goal of analysistools is to ...

## Installation

You can install the development version of analysistools from
[GitHub](https://github.com/) with:

``` r
# install.packages("devtools")
devtools::install_github("impact-initiatives/analysistools")
```

## Example

```{r load}
library(analysistools)
```

### How to add weights

```{r}
shorter_df <- analysistools_MSNA_template_data[, c(
  "admin1",
  "admin2",
  "expenditure_debt",
  "income_v1_salaried_work",
  "wash_drinkingwatersource", 
  grep("edu_learning_conditions_reasons_v1", names(analysistools_MSNA_template_data), value = T)
  )]

example_sample <- data.frame(
  strata = c("admin1a", "admin1b", "admin1c"),
  population = c(30000, 50000, 80000)
)

weighted_shorter_df <- shorter_df %>%
  add_weights(example_sample,
    strata_column_dataset = "admin1",
    strata_column_sample = "strata",
    population_column = "population"
  )

weighted_shorter_df[, c("admin1", "weights")] %>% head()
```

### How to perform a descriptive analysis (mean, median, proportions)

The *create_analysis* function needs a survey design from *srvyr*.

```{r}
example_design <- srvyr::as_survey(weighted_shorter_df, strata = admin1, weights = weights)
```

If only the design is provided, it will perform mean, median and
proportions.

```{r}
ex1_results <- create_analysis(design = example_design, sm_separator = "/")
```

It should return an object with 3 elements: - the results table (in a
long format and analysis key), - the dataset used, - the list of
analysis performed.

```{r}
names(ex1_results)
```

```{r}
ex1_results[["results_table"]] %>% head()
```

```{r}
ex1_results[["loa"]] %>% head()
```

#### Grouping variables

The group_var can be used to defined the different grouping, independent
variables. For example: - one variable

```{r}
ex2_results <- create_analysis(design = srvyr::as_survey(shorter_df), group_var = "admin1", sm_separator = "/")
ex2_results[["loa"]]
```

-   two variables separately

```{r}
ex3_results <- create_analysis(design = srvyr::as_survey(shorter_df), group_var = c("admin1", "admin2"), sm_separator = "/")
ex3_results[["loa"]]
```

-   two variables combined

```{r}
ex4_results <- create_analysis(design = srvyr::as_survey(shorter_df), group_var = "admin1, admin2", sm_separator = "/")
ex4_results[["loa"]]
```

### How to perform a descriptive analysis with a *list of analysis*

```{r}
ex5_results <- create_analysis(design = srvyr::as_survey(shorter_df), loa = analysistools_MSNA_template_loa, sm_separator = "/")
ex5_results[["loa"]]
```

### How to perform specfic analysis

#### Mean

This is a basic example which shows you how to calculate the mean:

```{r mean}
somedata <- data.frame(
  aa = 1:10,
  bb = rep(c("a", "b"), 5),
  weights = rep(c(.5, 1.5), 5),
  stratas = rep(c("strata_a", "strata_b"), 5)
)
me_design <- srvyr::as_survey(somedata)
create_analysis_mean(me_design, analysis_var = "aa")
create_analysis_mean(me_design, group_var = "bb", analysis_var = "aa")
me_design_w <- srvyr::as_survey(somedata, weights = weights)
create_analysis_mean(me_design_w, analysis_var = "aa")
create_analysis_mean(me_design_w, group_var = "bb", analysis_var = "aa")
```

#### Median

This is a basic example which shows you how to calculate the median:

```{r median}
somedata <- data.frame(
  aa = 1:10,
  bb = rep(c("a", "b"), 5),
  weights = rep(c(.5, 1.5), 5),
  stratas = rep(c("strata_a", "strata_b"), 5)
)
me_design <- srvyr::as_survey(somedata)
create_analysis_median(me_design, analysis_var = "aa")
create_analysis_median(me_design, group_var = "bb", analysis_var = "aa")
me_design_w <- srvyr::as_survey(somedata, weights = weights)
create_analysis_median(me_design_w, analysis_var = "aa")
create_analysis_median(me_design_w, group_var = "bb", analysis_var = "aa")
```

#### Proportion

##### Select one

This is a basic example which shows you how to calculate the proportion
for select one:

```{r proportion}
somedata <- data.frame(
  groups = sample(c("group_a", "group_b"),
    size = 100,
    replace = TRUE
  ),
  value = sample(c("a", "b", "c"),
    size = 100, replace = TRUE,
    prob = c(.6, .4, .1)
  )
)

create_analysis_prop_select_one(srvyr::as_survey(somedata, strata = groups),
  group_var = NA,
  analysis_var = "value",
  level = .95
)
create_analysis_prop_select_one(srvyr::as_survey(somedata, strata = groups),
  group_var = "groups",
  analysis_var = "value",
  level = .95
)
```

##### Select multiple

```{r proportion select multiple}
somedata <- data.frame(
  groups = sample(c("group_a", "group_b"), size = 100, replace = T),
  smvar = rep(NA_character_, 100),
  smvar.option1 = sample(c(TRUE, FALSE), size = 100, replace = T, prob = c(.7, .3)),
  smvar.option2 = sample(c(TRUE, FALSE), size = 100, replace = T, prob = c(.6, .4)),
  smvar.option3 = sample(c(TRUE, FALSE), size = 100, replace = T, prob = c(.1, .9)),
  smvar.option4 = sample(c(TRUE, FALSE), size = 100, replace = T, prob = c(.8, .2)),
  uuid = 1:100 %>% as.character()
) %>%
  cleaningtools::recreate_parent_column(uuid = "uuid", sm_separator = ".")

somedata <- somedata$data_with_fix_concat
create_analysis_prop_select_multiple(srvyr::as_survey(somedata),
  group_var = NA,
  analysis_var = "smvar",
  level = 0.95
)

create_analysis_prop_select_multiple(srvyr::as_survey(somedata),
  group_var = "groups",
  analysis_var = "smvar",
  level = 0.95
)
```

#### Ratios

This is a basic example which shows you how to calculate the ratio
between 2 numeric variables:

```{r}
school_ex <- data.frame(
  hh = c("hh1", "hh2", "hh3", "hh4"),
  num_children = c(3, 0, 2, NA),
  num_enrolled = c(3, NA, 0, NA),
  num_attending = c(1, NA, NA, NA),
  group = c("a", "a", "b", "b")
)
me_design <- srvyr::as_survey(school_ex)
```

Default value will give a ratio of 0.2 as there are 1 child out of 5
attending school. In the hh3, the NA is present because there is a skip
logic, there cannot be a child attending as none are enrolled. The
number of household counted, n, is equal to 2, as there are 2 households
only having child.

```{r}
create_analysis_ratio(me_design,
  analysis_var_numerator = "num_attending",
  analysis_var_denominator = "num_children"
)
```

If numerator_NA_to_0 is set to FALSE, ratio will be 1/3, as hh3 with 2
children and NA for attending will be removed with the na.rm = T inside
the survey_ratio calculation. The number of household used in the
calculation is 1.

```{r}
create_analysis_ratio(me_design,
  analysis_var_numerator = "num_attending",
  analysis_var_denominator = "num_children",
  numerator_NA_to_0 = FALSE
)
```

If filter_denominator_0 is set to FALSE, ratio will be 0.2 as there are
1 child out of 5 attending school. In the hh3, the NA is present because
there is a skip logic, there cannot be a child attending as none are
enrolled. The number of household counted, n, is equal to 3 instead 2.
The household with 0 child is counted in the n.

```{r}
create_analysis_ratio(me_design,
  analysis_var_numerator = "num_attending",
  analysis_var_denominator = "num_children",
  numerator_NA_to_0 = FALSE
)
```

For weigths and group:

```{r}
set.seed(8988)
somedata <- data.frame(
  groups = rep(c("a", "b"), 50),
  children_518 = sample(0:5, 100, replace = TRUE),
  children_enrolled = sample(0:5, 100, replace = TRUE)
) %>%
  dplyr::mutate(children_enrolled = ifelse(children_enrolled > children_518,
    children_518,
    children_enrolled
  ))
somedata[["weights"]] <- ifelse(somedata$groups == "a", 1.33, .67)
create_analysis_ratio(srvyr::as_survey(somedata, weights = weights, strata = groups),
  group_var = NA,
  analysis_var_numerator = "children_enrolled",
  analysis_var_denominator = "children_518",
  level = 0.95
)
create_analysis_ratio(srvyr::as_survey(somedata, weights = weights, strata = groups),
  group_var = "groups",
  analysis_var_numerator = "children_enrolled",
  analysis_var_denominator = "children_518",
  level = 0.95
)
```

### How to review results

The logic behind reviewing analysis is to compare the results from 2
independent analysis of the same variables using the *review_analysis*.

In this example, the results table to be review and the dataset are
loaded.

```{r}
results_to_review <- analysistools::analysistools_MSNA_template_with_ratio_results_table$results_table

dataset_to_analyse <- analysistools::analysistools_MSNA_template_data
```

The list of analysis from the results can be reproduced with
*create_loa_from_results* and the analysis key. This *loa* can be used
to create a new analysis to be compared with.

```{r}
me_loa <- create_loa_from_results(results_to_review)

me_analysis <- create_analysis(srvyr::as_survey(dataset_to_analyse),
                               loa = me_loa,
                               sm_separator = "/")
```

The new results and the results to be reviewed are bound together by the
*analysis_key*.

```{r}
binded_results <- results_to_review %>%
  dplyr::full_join(me_analysis$results_table, by ="analysis_key")
```

```{r}
review_results <- review_analysis(binded_results, 
                                   stat_columns_to_review = c("stat.x", "stat_low.x", "stat_upp.x"),
                                   stat_columns_to_compare_with = c("stat.y", "stat_low.y", "stat_upp.y"))

review_results$review_table %>%
  dplyr::group_by(stat) %>%
  dplyr::summarise(prop_correct = mean(review_check))

review_results$review_table %>%
  dplyr::group_by(stat, review_comment) %>%
  dplyr::tally(sort = T)

review_results$review_table %>%
  dplyr::filter(!review_check) %>%
  dplyr::select(analysis_type,analysis_var,group_var) %>% 
  dplyr::distinct()
```


```{r}
analysis_key_column <-  c("mean @/@ income ~/~ NA @/@ NA ~/~ NA",
                          "prop_select_one @/@ water_source ~/~ tap_water @/@ district ~/~ district_a",
                          "prop_select_one @/@ water_source ~/~ tap_water @/@ district ~/~ district_a ~/~ population ~/~ displaced",
                          "prop_select_multiple @/@ source_information ~/~ relatives @/@ NA ~/~ NA")
test_analysis_results <- data.frame(
  test = c(
    "test equality",
    "test difference",
    "test Missing in y",
    "test Missing in x"
  ),
  stat_col.x = c(0, 1, 2, NA),
  stat_col.y = c(0, 2, NA, 3),
  analysis_key = analysis_key_column
)
review_results2 <- review_analysis(test_analysis_results,
                stat_columns_to_review = "stat_col.x",
                stat_columns_to_compare_with = "stat_col.y")
review_results2$review_table %>%
  dplyr::group_by(stat) %>%
  dplyr::summarise(prop_correct = mean(review_check))

review_results2$review_table %>%
  dplyr::group_by(stat, review_comment) %>%
  dplyr::tally(sort = T)
review_results2$review_table %>%
  dplyr::filter(!review_check) %>%
  dplyr::select(review_check, analysis_type,analysis_var,group_var) %>% 
  dplyr::distinct()
```
### Converting the analysis index into a table

This is is how to turn the analysis index into a table

```{r create_analysis_key_table}
resultstable <- data.frame(analysis_index = c(
  "mean @/@ v1 ~/~ NA @/@ NA ~/~ NA",
  "mean @/@ v1 ~/~ NA @/@ gro ~/~ A",
  "mean @/@ v1 ~/~ NA @/@ gro ~/~ B"
))

key_table <- create_analysis_key_table(resultstable, "analysis_index")
key_table
```

You can then unite the analysis and grouping variables if needed.

```{r}
unite_variables(key_table)
```

## Code of Conduct

Please note that the analysistools project is released with a
[Contributor Code of
Conduct](https://impact-initiatives.github.io/analysistools/CODE_OF_CONDUCT.html).
By contributing to this project, you agree to abide by its terms.