diff --git a/analysis/data_processing.R b/analysis/data_processing.R index 2c3fd21..d9a1b97 100644 --- a/analysis/data_processing.R +++ b/analysis/data_processing.R @@ -46,6 +46,9 @@ df_input <- merge(df_input, household_comp_vars, by = "patient_id") #create time dependency if(cohort == "infants" | cohort == "infants_subgroup") { + print(nrow(df_input)) + print(class(study_start_date)) + print(class(study_end_date)) df_input <- df_input %>% mutate( date = map2(study_start_date, study_end_date, ~seq(.x, .y, by = 30.44)) @@ -176,6 +179,22 @@ if (cohort != "infants" & cohort != "infants_subgroup") { ) } +#define two vaccinations categories for each outcome type, set vaccination to null +#if immunity date occurs after outcome date +if (cohort != "infants" & cohort != "infants_subgroup") { + df_input <- df_input %>% + mutate( + #define flu_vaccination_mild + flu_vaccination_mild = factor(if_else( + flu_vaccination_immunity_date <= flu_primary_date, "Yes", "No" + )), + #define flu_vaccination severe + flu_vaccination_severe = factor(ifelse( + flu_vaccination_immunity_date <= flu_secondary_date, "Yes", "No" + )) + ) +} + #covid vaccination if (study_start_date >= covid_prior_vacc_min & cohort != "infants" & cohort != "infants_subgroup") { df_input <- df_input %>% @@ -205,6 +224,22 @@ if (study_start_date >= covid_current_vacc_min & cohort != "infants" & cohort != ) } +#define two vaccinations categories for each outcome type, set vaccination to null +#if immunity date occurs after outcome date +if (study_start_date >= covid_current_vacc_min & cohort != "infants" & cohort != "infants_subgroup") { + df_input <- df_input %>% + mutate( + #define covid_vaccination_mild + covid_vaccination_mild = factor(if_else( + covid_vaccination_immunity_date <= covid_primary_date, "Yes", "No" + )), + #define covid_vaccination severe + covid_vaccination_severe = factor(ifelse( + covid_vaccination_immunity_date <= covid_secondary_date, "Yes", "No" + )) + ) +} + # #re-level factors so they have reference categories for the regression models # df_input <- df_input %>% # mutate(