diff --git a/analysis/ccw.R b/analysis/ccw.R index 672d0ce..f62497f 100644 --- a/analysis/ccw.R +++ b/analysis/ccw.R @@ -35,6 +35,7 @@ source(here("analysis", "models_ccw", "cox_cens.R")) source(here("analysis", "models_ccw", "plr_cens.R")) source(here("lib", "functions", "make_filename.R")) source(here("lib", "functions", "dir_structure.R")) +source(here("lib", "design", "covars_table.R")) ################################################################################ # 0.1 Import command-line arguments @@ -46,7 +47,7 @@ if(length(args)==0){ period <- "ba1" contrast <- "all" outcome <- "primary" - model <- "cox" + model <- "plr" subgrp <- "full" supp <- "main" } else { @@ -122,12 +123,6 @@ if (supp %in% c("grace4", "grace3")){ } data <- read_rds(here::here("output", "data", data_filename)) -# change data if run using dummy data -if(Sys.getenv("OPENSAFELY_BACKEND") %in% c("", "expectations")){ - data <- - data %>% - mutate(study_week = runif(nrow(data), 1, 14) %>% floor()) -} ################################################################################ # 0.4 Prepare data @@ -153,9 +148,27 @@ if(Sys.getenv("OPENSAFELY_BACKEND") %in% c("", "expectations")){ # 6. if 'subgrp' is not equal to 'full', the data is subsetted to only include # individuals in a particular subgroup (eg haem malignancies) data <- ccw_simplify_data(data, outcome, contrast, subgrp) +# change data if run using dummy data +if(Sys.getenv("OPENSAFELY_BACKEND") %in% c("", "expectations")){ + data <- + data %>% + select(patient_id, treatment_ccw, tb_postest_treat_ccw, status_ccw_simple, fu_ccw, + treatment_strategy_cat_prim, + treatment_paxlovid_ccw, tb_postest_treat_pax_ccw, + treatment_alt_ccw, tb_postest_treat_alt_ccw, + age, + all_of(covars), + pfizer_most_recent_cov_vac, + az_most_recent_cov_vac, + moderna_most_recent_cov_vac, + study_week) +} data %>% group_by(treatment_ccw) %>% tally() %>% print() +data %>% + group_by(treatment_strategy_cat_prim) %>% + tally() %>% print ################################################################################ # Clone data and add vars outcome, fup and censoring @@ -289,7 +302,7 @@ if (model == "cox"){ data_trt_long %>% add_p_uncens_cox(model_cens_trt, basehaz_trt) } else if (model == "plr"){ - formula_trt_cens <- create_formula_cens_trt_logreg(covars_formula) + formula_trt_cens <- create_formula_cens_trt_logreg(covars_formula, period, contrast) formula_control_cens <- create_formula_cens_control_plr(covars_formula) ############################################################################## # Arm "Control": no treatment within 5 days @@ -317,23 +330,46 @@ if (model == "cox"){ ############################################################################## # Arm "Treatment": treatment within 5 days ############################################################################## - data_trt_surv_last_int_grace <- - data_cloned %>% - filter(arm == "Treatment" & fup >= tstart_plr) - model_cens_trt <- fit_cens_plr(data_trt_surv_last_int_grace, formula_trt_cens) - data_trt_surv_last_int_grace <- - data_trt_surv_last_int_grace %>% - transmute(patient_id, - tstart = tstart_plr, - lag_p_uncens_plr = 1 - predict(model_cens_trt, type = "response")) - data_trt_long <- - data_trt_long %>% - left_join(data_trt_surv_last_int_grace, - by = c("patient_id", "tstart")) %>% - mutate(lag_p_uncens_plr = if_else(is.na(lag_p_uncens_plr), 1, lag_p_uncens_plr)) %>% - group_by(patient_id) %>% - mutate(cmlp_uncens_plr = cumprod(lag_p_uncens_plr)) %>% - ungroup() + if (period == "ba1" & contrast == "all"){ + data_trt_surv_last_int_grace <- + data_cloned %>% # unsplit data + filter(arm == "Treatment" & fup >= tstart_plr) + model_cens_trt <- fit_cens_plr(data_trt_surv_last_int_grace, formula_trt_cens) + data_trt_surv_last_int_grace <- + data_trt_surv_last_int_grace %>% + transmute(patient_id, + tstart = tstart_plr, + lag_p_uncens_plr = 1 - predict(model_cens_trt, type = "response")) + data_trt_long <- + data_trt_long %>% + left_join(data_trt_surv_last_int_grace, + by = c("patient_id", "tstart")) %>% + mutate(lag_p_uncens_plr = if_else(is.na(lag_p_uncens_plr), 1, lag_p_uncens_plr)) %>% + group_by(patient_id) %>% + mutate(cmlp_uncens_plr = cumprod(lag_p_uncens_plr)) %>% + ungroup() + } else{ + data_trt_long_grace <- + data_trt_long %>% + filter(tstart <= tstart_plr) #FIXME should vary depending on how many days are added to fup + model_cens_trt <- fit_cens_plr(data_trt_long_grace, formula_trt_cens) + data_trt_long_grace <- + data_trt_long_grace %>% + transmute(patient_id, + tstart, + p_uncens_plr = 1 - predict(model_cens_trt, type = "response")) + data_trt_long <- + data_trt_long %>% + left_join(data_trt_long_grace, + by = c("patient_id", "tstart")) %>% + mutate(p_uncens_plr = if_else(is.na(p_uncens_plr), 1, p_uncens_plr)) + data_trt_long <- + data_trt_long %>% + group_by(patient_id) %>% + mutate(lag_p_uncens_plr = lag(p_uncens_plr, default = 1), + cmlp_uncens_plr = cumprod(lag_p_uncens_plr)) %>% + ungroup() + } } ################################################################################ diff --git a/analysis/data_ccw/clone_data.R b/analysis/data_ccw/clone_data.R index d189dda..4549d7a 100644 --- a/analysis/data_ccw/clone_data.R +++ b/analysis/data_ccw/clone_data.R @@ -27,25 +27,45 @@ clone_data <- function(data, treatment_window_days = 4){ # [either no treatment or treatment after five days] # --> we keep their observed outcomes and follow-up times treatment_ccw == "Untreated" ~ status_ccw_simple, + # Case 3: patients receive paxlovid within 5 days + # --> they are still alive and followed up until treatment + treatment_paxlovid_ccw == "Treated" ~ 0, + # Case 4: patients receive alternative treatment within 5 days + # --> they are still alive and followed up until treatment + treatment_alt_ccw == "Treated" ~ 0, ), fup = case_when( # Case 1: patients receive treatment within 5 days (scenarios A to E) # --> they are still alive and followed up until treatment - treatment_ccw == "Treated" ~ tb_postest_treat, + treatment_ccw == "Treated" ~ tb_postest_treat_ccw, # Case 2: patients do not receive treatment within 5 days (scenarios F to M) # [either no treatment or treatment after five days] # --> we keep their observed outcomes and follow-up times - treatment_ccw == "Untreated" ~ fu_ccw, + treatment_ccw == "Untreated" & + (treatment_paxlovid_ccw == "Untreated" & treatment_alt_ccw == "Untreated") ~ fu_ccw, + # Case 3: patients receive paxlovid within 5 days + # --> they are still alive and followed up until treatment + treatment_paxlovid_ccw == "Treated" ~ tb_postest_treat_pax_ccw, + # Case 4: patients receive alternative treatment within 5 days + # --> they are still alive and followed up until treatment + treatment_alt_ccw == "Treated" ~ tb_postest_treat_alt_ccw, ), # ADD VARIABLES CENSORING censoring = case_when( - # Case 1: Patients receive treatment within 5 days (scenarios A to E) + # Case 1: patients receive treatment within 5 days (scenarios A to E) # --> they are censored in the control group at time of treatment (see fup) treatment_ccw == "Treated" ~ 1, # Case 2: patients do not receive treatment within 5 days (scenarios F to M) # [either no treatment or treatment after five days] # --> we keep their observed outcomes and follow-up times - treatment_ccw == "Untreated" ~ 0, + treatment_ccw == "Untreated" & + (treatment_paxlovid_ccw == "Untreated" & treatment_alt_ccw == "Untreated") ~ 0, + # Case 3: patients receive paxlovid within 5 days + # --> they are censored in the control group at time of treatment (see fup) + treatment_paxlovid_ccw == "Treated" ~ 1, + # Case 4: patients receive alternative treatment within 5 days + # --> they are censored in the control group at time of treatment (see fup) + treatment_alt_ccw == "Treated" ~ 1, ), ) ################################################################################ @@ -61,15 +81,24 @@ clone_data <- function(data, treatment_window_days = 4){ treatment_ccw == "Treated" ~ status_ccw_simple, # Case 2: Patients die or are lost to follow-up within 5 days # without being treated (scenarios K and L) + # FIXME: check if we need to make sure here that not treated with pax and alt treatment?? # --> we keep their observed outcomes and follow-up times treatment_ccw == "Untreated" & + (treatment_paxlovid_ccw == "Untreated" & treatment_alt_ccw == "Untreated") & fu_ccw <= treatment_window_days ~ status_ccw_simple, # Case 3: Patients do not receive treatment within 5 days # and are still alive or at risk at 5 days (scenarios F-J and M) # --> they don't experience an event and their follow-up time is 5 # days - treatment_ccw == "Untreated" & + treatment_ccw == "Untreated" & + (treatment_paxlovid_ccw == "Untreated" & treatment_alt_ccw == "Untreated") & fu_ccw > treatment_window_days ~ 0, + # Case 4: Patients receive paxlovid within 5 days + # --> they are censored in the treatment group at time of treatment (see fup) + treatment_paxlovid_ccw == "Treated" ~ 0, + # Case 5: Patients receive alternative treatment within 5 days + # --> they are censored in the treatment group at time of treatment (see fup) + treatment_alt_ccw == "Treated" ~ 0, ), fup = case_when( # Case 1: Patients receive treatment within 5 days (scenarios A to E) @@ -79,15 +108,23 @@ clone_data <- function(data, treatment_window_days = 4){ # without being treated (scenarios K and L) # --> we keep their observed outcomes and follow-up times treatment_ccw == "Untreated" & + (treatment_paxlovid_ccw == "Untreated" & treatment_alt_ccw == "Untreated") & fu_ccw <= treatment_window_days ~ fu_ccw, # Case 3: Patients do not receive treatment within 5 days # and are still alive or at risk at 5 days (scenarios F-J and M) # --> they don't experience an event and their follow-up time is 5 # days treatment_ccw == "Untreated" & + (treatment_paxlovid_ccw == "Untreated" & treatment_alt_ccw == "Untreated") & fu_ccw > treatment_window_days ~ treatment_window_days, + # Case 4: Patients receive paxlovid within 5 days + # --> they are censored in the treatment group at time of treatment (see fup) + treatment_paxlovid_ccw == "Treated" ~ tb_postest_treat_pax_ccw, + # Case 5: Patients receive alternative treatment within 5 days + # --> they are censored in the treatment group at time of treatment (see fup) + treatment_alt_ccw == "Treated" ~ tb_postest_treat_alt_ccw, ), - # ADD VARIALBE CENSORING + # ADD VARIABLE CENSORING censoring = case_when( # Case 1: Patients receive treatment within 5 days (scenarios A to E): # --> they are uncensored in the treatment arm and remain at risk of @@ -97,12 +134,20 @@ clone_data <- function(data, treatment_window_days = 4){ # without being treated (scenarios K and L) # --> we keep their follow-up times but they are uncensored treatment_ccw == "Untreated" & + (treatment_paxlovid_ccw == "Untreated" & treatment_alt_ccw == "Untreated") & fu_ccw <= treatment_window_days ~ 0, # Case 3: Patients do not receive treatment within 5 days and are # still alive or at risk at 5 days (scenarios F-J and M): # --> they are considered censored and their follow-up time is 5 days treatment_ccw == "Untreated" & + (treatment_paxlovid_ccw == "Untreated" & treatment_alt_ccw == "Untreated") & fu_ccw > treatment_window_days ~ 1, + # Case 4: Patients receive paxlovid within 5 days + # --> they are censored in the treatment group at time of treatment (see fup) + treatment_paxlovid_ccw == "Treated" ~ 1, + # Case 5: Patients receive alternative treatment within 5 days + # --> they are censored in the treatment group at time of treatment (see fup) + treatment_alt_ccw == "Treated" ~ 1, ), ) ################################################################################ diff --git a/analysis/data_ccw/simplify_data.R b/analysis/data_ccw/simplify_data.R index 6f37fd6..a125a36 100644 --- a/analysis/data_ccw/simplify_data.R +++ b/analysis/data_ccw/simplify_data.R @@ -44,7 +44,7 @@ ccw_simplify_data <- function(data, outcome, contrast, subgrp){ TRUE ~ "Treated") %>% factor(levels = c("Untreated", "Treated")), tb_postest_treat_alt_ccw = if_else( - treatment_paxlovid_prim == "Untreated", + treatment_alt_ccw == "Untreated", NA_real_, tb_postest_treat) ) diff --git a/analysis/data_properties/describe_flow_data_long.R b/analysis/data_properties/describe_flow_data_long.R index 8aa2bf0..0ae9dbd 100644 --- a/analysis/data_properties/describe_flow_data_long.R +++ b/analysis/data_properties/describe_flow_data_long.R @@ -4,6 +4,7 @@ # # This script can be run via an action in project.yaml using three arguments: # - 'period' /in {ba1, ba2} (--> ba1 or ba2 analysis) +# - 'contrast' /in {all, molnupiravir, sotrovimab} (--> mol/sot vs untrt, mol vs untrt, sot vs untrt) # - 'outcome' /in {primary, secondary} (--> primary or secondary outcome) # - 'subgrp' /in {full, haem, transplant} (--> full cohort or haematological subgroup) # @@ -30,8 +31,9 @@ args <- commandArgs(trailingOnly=TRUE) if(length(args)==0){ # use for interactive testing period <- "ba1" + contrast <- "all" outcome <- "primary" - model <- "cox" + model <- "plr" subgrp <- "full" supp <- "main" } else { @@ -40,6 +42,9 @@ if(length(args)==0){ make_option("--period", type = "character", default = "ba1", help = "Period where the analysis is conducted in, options are 'ba1' or 'ba2' [default %default].", metavar = "period"), + make_option("--contrast", type = "character", default = "all", + help = "Contrast of the analysis, options are 'all' (treated vs untreated), 'molnupiravir' (molnupiravir vs untreated) or 'sotrovimab' (sotrovimab vs untreated) [default %default].", + metavar = "contrast"), make_option("--outcome", type = "character", default = "primary", help = "Outcome used in the analysis, options are 'primary' or 'secondary' [default %default].", metavar = "outcome"), @@ -58,6 +63,7 @@ if(length(args)==0){ opt <- parse_args(opt_parser) period <- opt$period + contrast <- opt$contrast outcome <- opt$outcome model <- opt$model subgrp <- opt$subgrp @@ -76,7 +82,7 @@ fs::dir_create(data_properties_long_dir) # 1 Import data ################################################################################ data_dir <- concat_dirs("data", output_dir, model, subgrp, supp) -data_filename <- make_filename("data_long", period, outcome, contrast = "all", model, subgrp, supp, "feather") +data_filename <- make_filename("data_long", period, outcome, contrast, model, subgrp, supp, "feather") input_file <- fs::path(data_dir, data_filename) data_long <- read_feather(input_file) treatment_window_days <- 4 @@ -111,7 +117,9 @@ retrieve_n_flowchart <- function(data_long, arm) { } n_cens <- data_cens %>% filter(outcome == 0) %>% nrow() n_cens_outc <- data_cens %>% filter(outcome == 1) %>% nrow() - n_art_cens <- data_long %>% filter(censoring == 1) %>% nrow() + n_art_cens <- data_long %>% filter(censoring == 1 & treatment_paxlovid_ccw == "Untreated" & treatment_alt_ccw == "Untreated") %>% nrow() + n_art_cens_pax <- data_long %>% filter(censoring == 1 & treatment_paxlovid_ccw == "Treated") %>% nrow() + n_art_cens_alt <- data_long %>% filter(censoring == 1 & treatment_alt_ccw == "Treated") %>% nrow() if (arm == "Control"){ data_fup <- data_long %>% filter(censoring == 0 & max_fup > treatment_window_days_05) } else if (arm == "Treatment"){ @@ -119,7 +127,7 @@ retrieve_n_flowchart <- function(data_long, arm) { } n_fup <- data_fup %>% nrow() n_fup_outc <- data_fup %>% filter(outcome == 1) %>% nrow() - out <- tibble(arm, n_total, n_cens, n_art_cens, n_cens_outc, n_fup, n_fup_outc) + out <- tibble(arm, n_total, n_cens, n_art_cens, n_art_cens_pax, n_art_cens_alt, n_cens_outc, n_fup, n_fup_outc) } ################################################################################ @@ -146,9 +154,9 @@ flowchart_red <- # 5. Save output ################################################################################ filename <- - make_filename("flow_data", period, outcome, contrast = "", model, subgrp = "full", supp, type = "csv") + make_filename("flow_data", period, outcome, contrast, model, subgrp = "full", supp, type = "csv") filename_red <- - make_filename("flow_data_redacted", period, outcome, contrast = "", model, subgrp = "full", supp, type = "csv") + make_filename("flow_data_redacted", period, outcome, contrast, model, subgrp = "full", supp, type = "csv") write_csv(flowchart, fs::path(data_properties_long_dir, filename)) diff --git a/analysis/models_ccw/plr_cens.R b/analysis/models_ccw/plr_cens.R index 4731e6a..9c9f31a 100644 --- a/analysis/models_ccw/plr_cens.R +++ b/analysis/models_ccw/plr_cens.R @@ -1,12 +1,19 @@ # PLR model -create_formula_cens_trt_logreg <- function(covars_formula){ - formula_cens <- paste0("censoring ~ ", - paste0(covars_formula, collapse = " + ")) %>% - as.formula() +create_formula_cens_trt_logreg <- function(covars_formula, period, contrast){ + if (period == "ba1" & contrast == "all"){ # no pax init and alt trt init, + # censoring only happens end of trt window + formula_cens <- paste0("censoring ~ ", + paste0(covars_formula, collapse = " + ")) %>% + as.formula() + } else{ + formula_cens <- paste0("censoring ~ ", + paste0(c("factor(fup)", covars_formula), collapse = " + ")) %>% + as.formula() + } } create_formula_cens_control_plr <- function(covars_formula){ formula_cens <- paste0("censoring ~ ", - paste0(c("ns(fup, 4)", covars_formula), collapse = " + ")) %>% + paste0(c("factor(fup)", covars_formula), collapse = " + ")) %>% as.formula() } ################################################################################ diff --git a/project.yaml b/project.yaml index ab52793..19bd25a 100644 --- a/project.yaml +++ b/project.yaml @@ -1318,8 +1318,28 @@ actions: needs: [perform_ccw_analysis_plr] outputs: moderately_sensitive: - table1: output/plr/data_properties/data_long/flow_data_plr.csv - table2: output/plr/data_properties/data_long/flow_data_redacted_plr.csv + table1: output/plr/data_properties/data_long/flow_data_all_plr.csv + table2: output/plr/data_properties/data_long/flow_data_redacted_all_plr.csv + + describe_flow_data_long_mol_plr: + run: r:latest analysis/data_properties/describe_flow_data_long.R + --model plr + --contrast molnupiravir + needs: [perform_ccw_analysis_plr] + outputs: + moderately_sensitive: + table1: output/plr/data_properties/data_long/flow_data_molnupiravir_plr.csv + table2: output/plr/data_properties/data_long/flow_data_redacted_molnupiravir_plr.csv + + describe_flow_data_long_sot_plr: + run: r:latest analysis/data_properties/describe_flow_data_long.R + --model plr + --contrast sotrovimab + needs: [perform_ccw_analysis_plr] + outputs: + moderately_sensitive: + table1: output/plr/data_properties/data_long/flow_data_sotrovimab_plr.csv + table2: output/plr/data_properties/data_long/flow_data_redacted_sotrovimab_plr.csv describe_flow_data_long_plr_ba2: run: r:latest analysis/data_properties/describe_flow_data_long.R @@ -1328,8 +1348,30 @@ actions: needs: [perform_ccw_analysis_plr_ba2] outputs: moderately_sensitive: - table1: output/plr/data_properties/data_long/ba2_flow_data_plr.csv - table2: output/plr/data_properties/data_long/ba2_flow_data_redacted_plr.csv + table1: output/plr/data_properties/data_long/ba2_flow_data_all_plr.csv + table2: output/plr/data_properties/data_long/ba2_flow_data_redacted_all_plr.csv + + describe_flow_data_long_mol_plr_ba2: + run: r:latest analysis/data_properties/describe_flow_data_long.R + --period ba2 + --model plr + --contrast molnupiravir + needs: [perform_ccw_analysis_plr] + outputs: + moderately_sensitive: + table1: output/plr/data_properties/data_long/ba2_flow_data_molnupiravir_plr.csv + table2: output/plr/data_properties/data_long/ba2_flow_data_redacted_molnupiravir_plr.csv + + describe_flow_data_long_sot_plr_ba2: + run: r:latest analysis/data_properties/describe_flow_data_long.R + --period ba2 + --model plr + --contrast sotrovimab + needs: [perform_ccw_analysis_plr] + outputs: + moderately_sensitive: + table1: output/plr/data_properties/data_long/ba2_flow_data_sotrovimab_plr.csv + table2: output/plr/data_properties/data_long/ba2_flow_data_redacted_sotrovimab_plr.csv ## # # # # # # # # # # # # # # # # # # # ## # # # # # # # # # # # # # # # # # # #