From 865ecec9161c1f72774ae7f3ebd402e77a90020d Mon Sep 17 00:00:00 2001 From: Milou Sep <46712191+mscsep@users.noreply.github.com> Date: Wed, 12 May 2021 09:38:07 +0200 Subject: [PATCH] initial commit --- LICENSE | 2 +- R/FGT_descriptives.R | 68 +++++ R/FGT_mids_transformations.R | 79 ++++++ R/FPS_LMM_Analyses_call.R | 381 ++++++++++++++++++++++++++ R/FPS_LMM_Assumptions_call.R | 169 ++++++++++++ R/FPS_LMM_Assumptions_source.R | 83 ++++++ R/FPS_LMM_Results_to_Plot.R | 205 ++++++++++++++ R/FPS_LMM_Results_to_Word.R | 69 +++++ R/FPS_LMM_Trialtype.Trial.Condition.r | 82 ++++++ R/FPS_LMM_pool_EMM.r | 148 ++++++++++ R/FPS_LMM_trial.Condition.R | 40 +++ R/FPS_LMM_trialtype.Condition.r | 40 +++ R/FPS_LM_Condition.r | 26 ++ R/FPS_imputation.R | 279 +++++++++++++++++++ R/FPS_mids_Quality.R | 271 ++++++++++++++++++ README.md | 52 ++++ SAM_FGT.Rproj | 13 + data/README.md | 1 + processed_data/README.md | 1 + results/README.md | 1 + 20 files changed, 2009 insertions(+), 1 deletion(-) create mode 100644 R/FGT_descriptives.R create mode 100644 R/FGT_mids_transformations.R create mode 100644 R/FPS_LMM_Analyses_call.R create mode 100644 R/FPS_LMM_Assumptions_call.R create mode 100644 R/FPS_LMM_Assumptions_source.R create mode 100644 R/FPS_LMM_Results_to_Plot.R create mode 100644 R/FPS_LMM_Results_to_Word.R create mode 100644 R/FPS_LMM_Trialtype.Trial.Condition.r create mode 100644 R/FPS_LMM_pool_EMM.r create mode 100644 R/FPS_LMM_trial.Condition.R create mode 100644 R/FPS_LMM_trialtype.Condition.r create mode 100644 R/FPS_LM_Condition.r create mode 100644 R/FPS_imputation.R create mode 100644 R/FPS_mids_Quality.R create mode 100644 README.md create mode 100644 SAM_FGT.Rproj create mode 100644 data/README.md create mode 100644 processed_data/README.md create mode 100644 results/README.md diff --git a/LICENSE b/LICENSE index 58f1a5f..5a02a87 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2021 Milou Sep +Copyright (c) 2020 Milou Sep Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/R/FGT_descriptives.R b/R/FGT_descriptives.R new file mode 100644 index 0000000..7b48ca8 --- /dev/null +++ b/R/FGT_descriptives.R @@ -0,0 +1,68 @@ +# Descriptive statistics of the data from the Fear Generalization Task (FGT) in the SAM study +# Written by Milou Sep. + +# load packages +library(dplyr) +library(Rmisc) # For Mean and CI calculation of raw data +library(haven) # to load SPSS file in r + +# Shock Intensities ------------------------------------------------------- +# Load FGT data +FGT_batch <- read.csv2("data/SAM_FGT.csv", na.strings = c("NaN","5555","8888","9999")) +# load shock amperage values +FGT_SWU <- read.csv2("data/SAM_FGT_Amperage.csv") + +# Select only participants that completed the FGT (n=117) +FGT_participants <- FGT_batch$subjName +# Changes characters to patterns for grepl(). Info on matching multiple patterns https://stackoverflow.com/questions/6947587/matching-multiple-patterns +FGT_pattern<-paste(FGT_participants, collapse ="|") +FGT_SWU <- FGT_SWU[grepl(FGT_pattern, as.character(FGT_SWU$Participantnummer)),] + +# Calculate mean, standard deviation, range of shock intensities +summary(FGT_SWU$`Stroomsterkte..mA.`) # mean & range +sd(FGT_SWU$`Stroomsterkte..mA.`) # standard deviation +# Calculate means CI / Condition +SWU.Mean.Condition.CI <- group.CI(`Stroomsterkte..mA.`~Conditie, FGT_SWU, ci = 0.95) +oneway.test(`Stroomsterkte..mA.`~Conditie, FGT_SWU) # NS + + +# Count missing FPS data per type missing (for paper) ---------------------------- +# Note Missing codes (provided with Matlab code): +# 5555 % Digital registration error, message on screen & MissingValue 5555. +# 8888 % Excessive baseline activity. (More than 2 SD deviation) +# 9999 % Latency onset not valid (Note, this only affects onset scores, so not present in magnitude variables) + +FGT_batch_count.missing <- read.csv2("data/SAM_FGT.csv") +Umag_for_missing <- subset.data.frame(FGT_batch_count.missing, select = c(grep("Umag", names(FGT_batch_count.missing)))) +# Number of missing values in original data +n.missing.Technical<-sum(Umag_for_missing == '5555') #= 798 +n.missing.Noise<-sum(Umag_for_missing == '8888') #= 256 +# Total Number of observations +n.observations<-nrow(Umag_for_missing)*ncol(Umag_for_missing) +# percent.missing +(n.missing.Technical/n.observations)*100 # = 7.105 +(n.missing.Noise/n.observations)*100 # = 2.279 % +# 0-responses +n.nulltesponses<-sum(Umag_for_missing == "0") #= 240 +(n.nulltesponses/n.observations)*100 # 2.137 % + + +# FGT Contingency --------------------------------------------------------- +# load data +SAM_questionnaires <- read_sav("data/SAM_questionnaires.sav") # Questionnaires +SAM_versions <- read.csv("data/SAM_Codes_Task_Protocol_Versions.csv") # Information on task versions +# select required variables +SAM_questionnaires %>% select(SubjectNumber,Condition,shock_indicator)->SAM_questionnaires2 # NOTE, 59 missing values in 'shock indicator', because this question was added to the questionnaire later. +SAM_versions %>% select(SubjectNumber,FGTversion)->SAM_versions2 +# merge information +full_join(SAM_versions2, SAM_questionnaires2, by="SubjectNumber")->FGT_contingency +# Create variable to indicate if threat context was identified correctly. +FGT_contingency %>% mutate(Correct_FGT_contingency = case_when( + shock_indicator == FGTversion ~ 1, # correct response + shock_indicator != FGTversion ~ 0 # incorrect response +))-> FGT_contingency +# Differences between experimental groups? +group.CI(Correct_FGT_contingency~Condition, FGT_contingency, ci = 0.95) +oneway.test(Correct_FGT_contingency~Condition, FGT_contingency) # NS +# save participants with correct responses +FGT_contingency%>% filter(Correct_FGT_contingency == 1) %>% select(SubjectNumber) %>% saveRDS(.,"processed_data/participants.for.contingency.sensitivity.analyses.rda") diff --git a/R/FGT_mids_transformations.R b/R/FGT_mids_transformations.R new file mode 100644 index 0000000..e4f9888 --- /dev/null +++ b/R/FGT_mids_transformations.R @@ -0,0 +1,79 @@ +# Functions for the transformation and manipulation of mids objects in the analysis of the data from the Fear Generalization Task (FGT) in the SAM study +# written by Milou Sep + +# Info on how to perform manipulations/calculations on midsobject: +# https://stackoverflow.com/questions/26667162/perform-operation-on-each-imputed-dataset-in-rs-mice + +library(dplyr) + +# Function to calculate Means on subset mids object data (bv early trials only..) +means_EML_2way <- function(data_subset){ + data_subset.means<-aggregate(data_subset$FPS, + by=list(data_subset$.imp, data_subset$pp, data_subset$Condition, data_subset$trialtype), + FUN=mean) + names(data_subset.means)<-c(".imp", "pp", "Condition", "trialtype", "FPS") + # Some checks + print(unique(data_subset.means$Condition)) + print(unique(data_subset.means$pp)) + print(unique(data_subset.means$.imp)) + + data_subset.means.mids <- as.mids(data_subset.means) # Make mids. + return(data_subset.means.mids) +} + +# Function to log-transform FPS data if necessary (checked in Assumption Checks) +log.transform.mids <- function(data_set){ + data_set.long<-complete(data_set, action = "long", include = T) # Change mids to long format + data_set.long$FPS <- log(data_set.long$FPS + 1) # Add 1 to deal with 0-responses + log.mids.dataset <- as.mids(data_set.long) + return(log.mids.dataset) +} + + +# Select one imputed dataset for testing (Note function returns dataframe!) +only_one_imputation <- function(data_mids, imp){ + # 1) Mids to dataframe + data_mids.long <- complete(data_mids, action = "long", include = T) + tibble(data_mids.long) + # 2) Perform manipulations: + data_mids.long %>% filter(.imp == imp) ->x + return(x) +} + + +# For sensitivity analyses ------------------------------------------------ + +# Sensitivity analyses to check the effect of potential influential participants: +remove.influential.points <- function(data_mids, influentialpoints){ + # Note numbers should be assigned to influential points e.g. c(1,2,3) + # Mids to dataframe + data_mids.long <- complete(data_mids, action = "long", include = T) + # Perform manipulations: search & exclude rows from influential points (or participants) + indices <- which(data_mids.long$pp %in% influentialpoints) + data_mids.long.noinfl <- data_mids.long[-c(indices),] + # Back to mids. + data_mids.noinfl <- as.mids(data_mids.long.noinfl) + return(data_mids.noinfl) +} + +# Function to change factor Trial and/or Trialtype to continuous variables +Within.continuous.mids <- function(data_set){ + # This was recommended for analyses of fear gradients by https://www.sciencedirect.com/science/article/pii/S0005791618300612 & http://www.frontiersin.org/Quantitative_Psychology_and_Measurement/10.3389/fpsyg.2015.00652/abstract + data_set.long<-complete(data_set, action = "long", include = T) # Change mids to long format + # manipulations: + data_set.long$trialtype <- as.numeric(data_set.long$trialtype) + data_set.long$trial <- as.numeric(data_set.long$trial) + str(data_set.long) + # transform back to mids + mids.dataset <- as.mids(data_set.long) + return(mids.dataset) +} + +# Sensitivity analyses on participants that detected the threat stimulus correctly (according to self-report) +Sensitivity_contingency <- function(midsobject){ + long_df<-complete(midsobject, action = "long", include = T) + pp_to_include <- which(long_df$pp %in% pp_Sensitivity$X1) + long_df_included <- long_df[c(pp_to_include),] + mids_included <- as.mids(long_df_included) + return(mids_included) +} \ No newline at end of file diff --git a/R/FPS_LMM_Analyses_call.R b/R/FPS_LMM_Analyses_call.R new file mode 100644 index 0000000..ca47633 --- /dev/null +++ b/R/FPS_LMM_Analyses_call.R @@ -0,0 +1,381 @@ +#' --- +#' title: "LMM Analyses of unstandardized Fear Potentiated Startle responses to Cue, Context, ITI and Habituation probes (Learning D1 and Memory D2) in the Fear Generalization Task (FGT) in the SAM study" +#' author: "Milou Sep" +#' output: +#' html_document: default +#' --- + +# some information on analyses of imputed data: http://web.maths.unsw.edu.au/~dwarton/missingDataLab.html +#' # FGT "fear conditioning" specific analysis info: +#+ Section, include = FALSE +# * separate tasks phases (separate analyses) +# - day 1 (learning) and day 2 (memory) +# * outcome measures in FGT (separate analyses): +# - startle probe habituation (=hab) +# - context habituate [only day 1] (=habctx, Trialtype: threat, safe & new) +# - cue & context learning/memory (=cue and context; Trialtype: threat, safe & new; Startle probetypes: cue vs context) +# - inter-trial interval (=ITI) + +# Loading required packages ----------------------------------------------- +library(mice); library(lme4); library(miceadds) + +# Loading imputed data ---------------------------------------------------- +load("processed_data/Umag.mids.28.01.19.rda") # imputed data: 1) combined Mids object, and day 1 & day 2 Mids subsets. +# Note These files contain separate sorted Mids objects for cue and context, habctx, hab (per day). Note habctx is only measured on day1. +# Note For ITI on day 1 and day 2 are the imputed means analyzed (because imputation quality of individual trials was insufficient, quality of the imputed means was good). + +# Source analyses functions ----------------------------------------------- +source("R/FPS_LMM_trialtype.trial.condition.r") +source("R/FPS_LMM_trialtype.condition.r") +source("R/FPS_LMM_trial.condition.R") # For analyzes of responses to Habituation probes (on d1 and d2) +source("R/FPS_LM_condition.r") # For analyses mean ITI (D1 & D2) +source("R/FPS_LMM_pool_EMM.r") # Contains 1) RG's function to pool LMER estimated marginal means (EMM) from imputed datasets (used to followup "Trialtype" Effects in early-mid-late epochs) and 2) a function to plot mean epochs of follow-up analyses +# Load data export function +source("R/FPS_LMM_Results_to_Word.R") # Function to export results in word-friendly format +# Source functions for data transformations mids objects +source("R/FGT_mids_transformations.R") + +# Sensitivity analyses with participants that detected the threat stimulus correctly (according to self-report). Note this information was not collected for the complete sample. +# These sensitivity analyses were just for exploration (not published). +readRDS("processed_data/participants.for.contingency.sensitivity.analyses.rda")->pp_Sensitivity # Data in processed_data folder (created with "FGT_descriptive.R") +pp_Sensitivity$SubjectNumber <- gsub("SMGH","", pp_Sensitivity$SubjectNumber) # Remove "SMGH" before participant number +pp_Sensitivity$SubjectNumber <-as.factor(as.numeric(pp_Sensitivity$SubjectNumber)) # Change to numeric and than to factor (=same as mids) +str(pp_Sensitivity$SubjectNumber) # to check + + +# Call of Analyses scripts ------------------------------------------------ + +# Day 1: Learning / Acquisition phase ------------------------------------- + +# Habituation (Noise Alone) FPS (acquisition) --------------------------------- +# Note LMER assumptions checked after imputation (in 100 datasets) and satisfied after log(FPS) +A_Hab_Trials.log <- log.transform.mids(Umag.mids$Acq_HAB_Trials) +Acq_Hab_2way <- FGT_LMER_trial.Condition(dataset=A_Hab_Trials.log) +Export.FGT.Table(FGT_Results_Table = Acq_Hab_2way, TableName = "Umag.Hab.Acq.Trials", file.export = T) # Note this function is in 'FGT_Results_to_Word.R' +# Main effect trial, no interactions (No follow-up analyses planned/needed). + +# Sensitivity analyses to check the effect of potential influential participants, identified via Assumption Checks: pp 98 and pp 100 +A_Hab_Trials.log.noinf <- remove.influential.points(data_mids=A_Hab_Trials.log, influentialpoints=c(98,100)) +Acq_Hab_2way_noinf <- FGT_LMER_trial.Condition(dataset=A_Hab_Trials.log.noinf) +Export.FGT.Table(FGT_Results_Table = Acq_Hab_2way_noinf, TableName = "Umag.Hab.Acq.noinf.Trials", file.export = F) +# Conclusion: no significant change in result. + + +# Cued FPS (acquisition) -------------------------------------------------- +# Note LMER assumptions checked after imputation (in 100 datasets) and satisfied after log(FPS) +A_Cue_Trials.log <- log.transform.mids(Umag.mids$Acq_Cued_Trials) +Acq_Cued_3way <- FGT_LMER_Trialtype.Trial.Condition(dataset=A_Cue_Trials.log) +Export.FGT.Table(FGT_Results_Table = Acq_Cued_3way, TableName = "Umag.Cue.Acq.Trials", file.export = T) +# Interaction effect Trialtype * Trial (and main effect trialtype & trial) + +# Sensitivity analyses to check the effect of potential influential participants, identified via Assumption Checks: pp 15, 97 (and 90?) +A_Cue_Trials.log.noinf <- remove.influential.points(data_mids=A_Cue_Trials.log, influentialpoints=c(15,97)) +Acq_Cued_3way_noinf <- FGT_LMER_Trialtype.Trial.Condition(dataset=A_Cue_Trials.log.noinf) +Export.FGT.Table(FGT_Results_Table = Acq_Cued_3way_noinf, TableName = "Umag.Cue.Acq.noinf.Trials", file.export = F) +# Conclusion: no significant change in result. + +# * Follow-up analyses: decompose factor "Trial" by analyzing means of early & late trials separately +A_Cue<-complete(A_Cue_Trials.log, action = "long", include = T) # Change mids to long format +unique(A_Cue$trial) # 12 trials + +# Create Early Trials mids object +A_Cue_Early <- subset(A_Cue, trial %in% c(1,2,3,4)) +unique(A_Cue_Early$trial) # to check +A_Cue_Early_M <- means_EML_2way(A_Cue_Early) +# Analyze Early Trials +Acq_Cued_2way.Early <- FGT_LMER_trialtype.Condition(dataset=A_Cue_Early_M) +Export.FGT.Table(FGT_Results_Table = Acq_Cued_2way.Early, TableName = "Umag.Cue.Acq.Early", file.export = T) +# No significant effects. + +# Create Mid Trials mids object +A_Cue_Mid <- subset(A_Cue, trial %in% c(5,6,7,8)) +unique(A_Cue_Mid$trial) +A_Cue_Mid_M <- means_EML_2way(A_Cue_Mid) +# Analyze Mid Trials +Acq_Cued_2way.Mid <- FGT_LMER_trialtype.Condition(dataset=A_Cue_Mid_M) +Export.FGT.Table(FGT_Results_Table = Acq_Cued_2way.Mid, TableName = "Umag.Cue.Acq.Mid", file.export = T) +# No significant effects. + +# Create Late Trials mids object +A_Cue_Late <- subset(A_Cue, trial %in% c(9,10,11,12)) +unique(A_Cue_Late$trial) +A_Cue_Late_M <- means_EML_2way(A_Cue_Late) +# Analyze Late Trials +Acq_Cued_2way.Late <- FGT_LMER_trialtype.Condition(dataset=A_Cue_Late_M) +Export.FGT.Table(FGT_Results_Table = Acq_Cued_2way.Late, TableName = "Umag.Cue.Acq.Late", file.export = T) +# Significant effect Trialtype + +# * Follow-up Main effect Trialtype (to determine direction of effect): #### +# To date, there are no statistical methods implemented in the major software packages to pool the results from post-hoc tests on EMMs of imputed datasets. +# Therefore, differences in EMMs could not be tested statistically in the current study. Also see https://www.tandfonline.com/doi/abs/10.1080/00273171.2013.855890 +# The EMM were pooled to get an indication for the direction of the effect. + +# Copy Relevant Model from LMER script +A_Cue_Main_NO_2way<-with(expr=lmer(FPS~1+Condition+trialtype+(1|pp), control=lmerControl(optimizer="bobyqa")),data = A_Cue_Late_M, REML=F) +# Call function to pool EMM: +Acq_Cued_2way.Late.followupTrialtype <-pool.means.trialtype(model=A_Cue_Main_NO_2way, m=100, phase="ACQ") # Enter larger model from the specific comparison that is followed up. In this case Full main effects model. +Acq_Cued_2way.Late.followupTrialtype # Note Qbar = pooled means (overall point estimate) +# Export results to table: +Export.FGT.FollowupTrialtype(FGT_FollowupResults = Acq_Cued_2way.Late.followupTrialtype, + TableName = "Umag.Cue.Acq.Late_FU.trialtype", file.export = T) +# No further follow-up analyses planned/needed + + +# Contextual FPS (acquisition) -------------------------------------------- +# Note LMER assumptions checked after imputation (in 100 datasets) and satisfied after log(FPS) +A_Ctx_Trials.log <- log.transform.mids(Umag.mids$Acq_Context_Trials) +# A_Ctx_Trials.log<-Sensitivity_contingency(A_Ctx_Trials.log) # Same results +Acq_Ctx_3way <- FGT_LMER_Trialtype.Trial.Condition(dataset=A_Ctx_Trials.log) +Export.FGT.Table(FGT_Results_Table = Acq_Ctx_3way, TableName = "Umag.Ctx.Acq.Trials", file.export = T) +# Main trialtype & trial + +# No Sensitivity analyses to check effect influential participants: No influential points were identified + +# * Follow-up analyses 1: overall mean effect trialtype (Note these EMM are averaged over Conditions & Trials!) +Main_NO_2ways_ACQCTX<-with(expr=lmer(FPS~1+Condition+trialtype+trial+(1|pp), control=lmerControl(optimizer="bobyqa")),data = A_Ctx_Trials.log, REML=F) +ACQ_CTX_2way.followupTrialtype<- pool.means.trialtype(model=Main_NO_2ways_ACQCTX, m=100, phase='ACQ') +Export.FGT.FollowupTrialtype(FGT_FollowupResults=ACQ_CTX_2way.followupTrialtype, + TableName = "Umag.Ctx.ACQ.ALL_FU.trialtype", file.export = T) +# The effects plotted +Trialtype.PlotD2(ACQ_CTX_2way.followupTrialtype, rownames(ACQ_CTX_2way.followupTrialtype), ACQ_CTX_2way.followupTrialtype$Qbar) + +# * Follow-up analyses 2: decompose factor "Trial" by analyzing means of early & late trials separately +A_Ctx<-complete(A_Ctx_Trials.log, action = "long", include = T) # Change mids to long format +unique(A_Ctx$trial) # 6 trials + +# Create Early Trials mids object +A_Ctx_Early <- subset(A_Ctx, trial %in% c(1,2)) +unique(A_Ctx_Early$trial) +A_Ctx_Early_M <- means_EML_2way(A_Ctx_Early) +# Analyze Early Trials +Acq_Ctx_2way.Early <- FGT_LMER_trialtype.Condition(dataset=A_Ctx_Early_M) +Export.FGT.Table(FGT_Results_Table = Acq_Ctx_2way.Early, TableName = "Umag.Ctx.Acq.Early", file.export = T) +# no significant effects + +# Create Mid Trials mids object +A_Ctx_Mid <- subset(A_Ctx, trial %in% c(3,4)) +unique(A_Ctx_Mid$trial) +A_Ctx_Mid_M <- means_EML_2way(A_Ctx_Mid) +# Analyze Mid Trials +Acq_Ctx_2way.Mid <- FGT_LMER_trialtype.Condition(dataset=A_Ctx_Mid_M) +Export.FGT.Table(FGT_Results_Table = Acq_Ctx_2way.Mid, TableName = "Umag.Ctx.Acq.Mid", file.export = T) +# no significant effects + +# Create Late Trials mids object +A_Ctx_Late <- subset(A_Ctx, trial %in% c(5,6)) +unique(A_Ctx_Late$trial) +A_Ctx_Late_M <- means_EML_2way(A_Ctx_Late) +# Analyze Late Trials +Acq_Ctx_2way.Late <- FGT_LMER_trialtype.Condition(dataset=A_Ctx_Late_M) +Export.FGT.Table(FGT_Results_Table = Acq_Ctx_2way.Late, TableName = "Umag.Ctx.Acq.Late", file.export = T) +# no significant effects + +# No further follow-up analyses planned/needed + + +# ITI FPS (acquisition) -------------------------------------------- +# Note LMER assumptions checked after imputation (in 100 datasets) and satisfied after log(FPS) +A_ITI_Mean.log <-log.transform.mids(Umag.mids$Acq_ITI_Means) +Acq_ITI_LM <- FGT_LM.Condition(dataset=A_ITI_Mean.log) +Export.FGT.Table(FGT_Results_Table = Acq_ITI_LM, TableName = "Umag.ITI.Acq.Mean", file.export = T) +# no significant effects + +# Sensitivity analyses to check the effect of potential influential participants, identified via Assumption Checks: p 28, 107 (76 in most datasets) +A_ITI_Mean.log.noinf <- remove.influential.points(data_mids=A_ITI_Mean.log, influentialpoints=c(28,107,76)) +Acq_ITI_noinf <- FGT_LM.Condition(dataset=A_ITI_Mean.log.noinf) +Export.FGT.Table(FGT_Results_Table = Acq_ITI_noinf, TableName = "Umag.ITI.Acq.noinf.Mean", file.export = T) +# Conclusion: significant change in result. Significant effect of condition following exclusion (28, 107, 76). Same for the exclusion of 28 and 107 alone. This discrepancy is described in the paper. + +# Follow-up main effect condition: +Main_ITI_Acq<-with(expr=lm(FPS~1+Condition),data = A_ITI_Mean.log.noinf) +ACQ_ITI.followupCondition<- pool.means.condition(model=Main_ITI_Acq, m=100) +Export.FGT.FollowupCondition(FGT_FollowupResults=ACQ_ITI.followupCondition, + TableName = "Umag.ITI.ACQ.FU.Condition", file.export = T) + + +# Day 2: Testing / Generalization phase ----------------------------------- + +# Habituation (Noise Alone) FPS (test) --------------------------------- +# Note LMER assumptions checked after imputation (in 100 datasets) and satisfied after log(FPS) +T_Hab_Trials.log <- log.transform.mids(Umag.mids$Test_HAB_Trials) +#T_Hab_Trials.log<-Sensitivity_contingency(T_Hab_Trials.log) # Same results +Test_Hab_2way <- FGT_LMER_trial.Condition(dataset=T_Hab_Trials.log) +Export.FGT.Table(FGT_Results_Table = Test_Hab_2way, TableName = "Umag.Hab.Test.Trials", file.export = T) +# No significant effects (No follow-up analyses planned/needed) + +# # Sensitivity analyses to check the effect of potential influential participants, identified via Assumption Checks: p24 +T_Hab_Trials.log.noinf <- remove.influential.points(data_mids=T_Hab_Trials.log, influentialpoints=c(24)) +Test_Hab_2way.noinf <- FGT_LMER_trial.Condition(dataset=T_Hab_Trials.log.noinf) +Export.FGT.Table(FGT_Results_Table = Test_Hab_2way.noinf, TableName = "Umag.Hab.Test.noinf", file.export = T) +# Conclusion: no significant change in result. + + +# Cued FPS Test ----------------------------------------------------------- +# Note LMER assumptions checked after imputation (in 100 datasets) and satisfied after log(FPS) +T_Cued_Trials.log <- log.transform.mids(Umag.mids$Test_Cued_Trials) +# T_Cued_Trials.log<-Sensitivity_contingency(T_Cued_Trials.log) # Same results +Test_Cued_3way <- FGT_LMER_Trialtype.Trial.Condition(dataset=T_Cued_Trials.log) +Export.FGT.Table(FGT_Results_Table = Test_Cued_3way, + TableName = "Umag.Cue.Test.Trials", file.export = T) +# Main effect Trialtype & Main effect Trial + +# Sensitivity analyses to check the effect of potential influential participants, identified via Assumption Checks: pp 26, 127 (and 100?) +T_Cued_Trials.log.noinf <- remove.influential.points(data_mids=T_Cued_Trials.log, influentialpoints=c(26, 127)) +Test_Cued_3way.noinf <- FGT_LMER_Trialtype.Trial.Condition(dataset=T_Cued_Trials.log.noinf) +Export.FGT.Table(FGT_Results_Table = Test_Cued_3way.noinf, TableName = "Umag.Cue.Test.noinf", file.export = F) +# Conclusion: no significant change in result. + +# Sensitivity analyses to check the influence of factor vs continuous variable "trailtype": +# Continuous analyses suggested by (http://www.frontiersin.org/Quantitative_Psychology_and_Measurement/10.3389/fpsyg.2015.00652/abstract) & https://www.sciencedirect.com/science/article/pii/S0005791618300612 +T_Cued_Trials.log_Trialtypecontinuous<-Within.continuous.mids(T_Cued_Trials.log) +Test_Cued_3way_TTcontinuous <- FGT_LMER_Trialtype.Trial.Condition(dataset=T_Cued_Trials.log_Trialtypecontinuous) +Export.FGT.Table(FGT_Results_Table = Test_Cued_3way_TTcontinuous, TableName = "Umag.Cue.Test.Trials_TTcontinuous", file.export = F) +# Conclusion: no significant change in result. + +# * Follow-up analyses 2: decompose factor "Trial" by analyzing means of early & late trials separately #### +T_Cue<-complete(T_Cued_Trials.log, action = "long", include = T) # Change mids to long format +unique(T_Cue$trial) # 6 trials + +# Create Early Trials mids object +T_Cue_Early <- subset(T_Cue, trial %in% c(1,2)) +unique(T_Cue_Early$trial) +T_Cue_Early_M <- means_EML_2way(T_Cue_Early) +# Analyze Early Trials +Test_Cue_2way.Early <- FGT_LMER_trialtype.Condition(dataset=T_Cue_Early_M) +Export.FGT.Table(FGT_Results_Table = Test_Cue_2way.Early, TableName = "Umag.Cue.Test.Early", file.export = T) +# Main effect Trialtype + +# Follow-up Main effect trialtype (to determine direction of effect): +# Copy Relevant Model from LMER script: +T_CUE_Main_NO_2way.EARLY<-with(expr=lmer(FPS~1+Condition+trialtype+(1|pp), control=lmerControl(optimizer = 'bobyqa')),data = T_Cue_Early_M, REML=F) +# Call function to pool EMM: +Test_Cue_2way.Early.followupTrialtype <-pool.means.trialtype(model=T_CUE_Main_NO_2way.EARLY, m=100, phase="TEST") # Enter larger model from the specific comparison that is followed up. In this case Full main effects model. +Test_Cue_2way.Early.followupTrialtype # Note Qbar = pooled means (overall point estimate) +# Export results to table +Export.FGT.FollowupTrialtype(FGT_FollowupResults = Test_Cue_2way.Early.followupTrialtype, + TableName = "Umag.Cue.Test.Early_FU.trialtype", file.export = T) +# The effects plotted +Trialtype.PlotD2(Test_Cue_2way.Early.followupTrialtype, rownames(Test_Cue_2way.Early.followupTrialtype), Test_Cue_2way.Early.followupTrialtype$Qbar) + + +# Create Mid Trials mids object +T_Cue_Mid <- subset(T_Cue, trial %in% c(3,4)) +unique(T_Cue_Mid$trial) +T_Cue_Mid_M <- means_EML_2way(T_Cue_Mid) +# Analyze Mid Trials +Test_Cue_2way.Mid <- FGT_LMER_trialtype.Condition(dataset=T_Cue_Mid_M) +Export.FGT.Table(FGT_Results_Table = Test_Cue_2way.Mid, TableName = "Umag.Cue.Test.Mid", file.export = T) +# Main effect Trialtype + +# Follow-up Main effect trialtype (to determine direction of effect): +# Copy Relevant Model from LMER script: +T_CUE_Main_NO_2way.MID<-with(expr=lmer(FPS~1+Condition+trialtype+(1|pp), control=lmerControl(optimizer = 'bobyqa')),data = T_Cue_Mid_M, REML=F) +# Call function to pool EMM: +Test_Cue_2way.MID.followupTrialtype <-pool.means.trialtype(model=T_CUE_Main_NO_2way.MID, m=100, phase="TEST") # Enter larger model from the specific comparison that is followed up. In this case Full main effects model. +Test_Cue_2way.MID.followupTrialtype # Note Qbar = pooled means (overall point estimate) +# Export results to table +Export.FGT.FollowupTrialtype(FGT_FollowupResults = Test_Cue_2way.MID.followupTrialtype, + TableName = "Umag.Cue.Test.Mid_FU.trialtype", file.export = T) +# The effects plotted +Trialtype.PlotD2(Test_Cue_2way.MID.followupTrialtype, rownames(Test_Cue_2way.MID.followupTrialtype), Test_Cue_2way.MID.followupTrialtype$Qbar) + + +# Create Late Trials mids object +T_Cue_Late <- subset(T_Cue, trial %in% c(5,6)) +unique(T_Cue_Late$trial) +T_Cue_Late_M <- means_EML_2way(T_Cue_Late) +# Analyze Late Trials +Test_Cue_2way.Late <- FGT_LMER_trialtype.Condition(dataset=T_Cue_Late_M) +Export.FGT.Table(FGT_Results_Table = Test_Cue_2way.Late, TableName = "Umag.Cue.Test.Late", file.export = T) +# Main effect Trialtype + +# Follow-up Main effect trialtype (to determine direction of effect): +# Copy Relevant Model from LMER script: +T_CUE_Main_NO_2way.LATE<-with(expr=lmer(FPS~1+Condition+trialtype+(1|pp), control=lmerControl(optimizer = 'bobyqa')),data = T_Cue_Late_M, REML=F) +# Call function to pool EMM: +Test_Cue_2way.LATE.followupTrialtype <-pool.means.trialtype(model=T_CUE_Main_NO_2way.LATE, m=100, phase="TEST") # Enter larger model from the specific comparison that is followed up. In this case Full main effects model. +Test_Cue_2way.LATE.followupTrialtype # Note Qbar = pooled means (overall point estimate) +# Export results to table +Export.FGT.FollowupTrialtype(FGT_FollowupResults = Test_Cue_2way.LATE.followupTrialtype, + TableName = "Umag.Cue.Test.Late_FU.trialtype", file.export = T) +# The effects plotted +Trialtype.PlotD2(Test_Cue_2way.LATE.followupTrialtype, rownames(Test_Cue_2way.LATE.followupTrialtype), Test_Cue_2way.LATE.followupTrialtype$Qbar) + + +# Contextual FPS Test ----------------------------------------------------- +# Note LMER assumptions checked after imputation (in 100 datasets) and satisfied after log(FPS) +T_Ctx_Trials.log <-log.transform.mids(Umag.mids$Test_Context_Trials) +#T_Ctx_Trials.log<-Sensitivity_contingency(T_Ctx_Trials.log) # Significant trial*condition interaction, no effect of Condition in follow-up analyses on epochs from early, mid and late trials (see below). No further follow-up. +Test_Ctx_3way <- FGT_LMER_Trialtype.Trial.Condition(dataset=T_Ctx_Trials.log) +Export.FGT.Table(FGT_Results_Table = Test_Ctx_3way, + TableName = "Umag.Ctx.Test.Trials", file.export = T) +# Main effect trial. No interactions (No follow-up analyses planned/needed) + +# Sensitivity analyses to check the effect of potential influential participants, identified via Assumption Checks: pp 20, 31, 132 +T_Ctx_Trials.log.noinf <- remove.influential.points(data_mids=T_Ctx_Trials.log, influentialpoints=c(20, 31, 132)) +Test_Ctx_3way.noinf <- FGT_LMER_Trialtype.Trial.Condition.Workaround.D2.CTX(dataset=T_Ctx_Trials.log.noinf) +Export.FGT.Table(FGT_Results_Table = Test_Ctx_3way.noinf, TableName = "Umag.Ctx.Test.noinf", file.export = F) +# Conclusion: no significant change in result. + +# Sensitivity analyses to check the influence of factor vs continuous variable "trailtype": +T_Ctx_Trials.log_Trialtypecontinuous<-Within.continuous.mids(T_Ctx_Trials.log) +Test_Ctx_3way_TTcontinuous <- FGT_LMER_Trialtype.Trial.Condition(dataset=T_Ctx_Trials.log_Trialtypecontinuous) +Export.FGT.Table(FGT_Results_Table = Test_Ctx_3way_TTcontinuous, TableName = "Umag.Ctx.Test.Trials_TTcontinuous", file.export = F) +# Conclusion: no significant change in result. + +# * Follow-up analyses sensitivity analyses contingency: decompose factor "Trial" by analyzing means of early & late trials separately #### +T_Ctx<-complete(T_Ctx_Trials.log, action = "long", include = T) # Change mids to long format +unique(T_Ctx$trial) # 3 trials + +# Create Early Trials mids object +T_Ctx_Early <- subset(T_Ctx, trial %in% c(1)) +unique(T_Ctx_Early$trial) +T_Ctx_Early_M <- means_EML_2way(T_Ctx_Early) +# Analyze Early Trials +Test_Ctx_2way.Early <- FGT_LMER_trialtype.Condition(dataset=T_Ctx_Early_M) +Export.FGT.Table(FGT_Results_Table = Test_Ctx_2way.Early, TableName = "Umag.Ctx.Test.Early", file.export = T) +# No significant effects of condition for trial 1 + +# Create Mid Trials mids object +T_Ctx_Mid <- subset(T_Ctx, trial %in% c(2)) +unique(T_Ctx_Mid$trial) +T_Ctx_Mid_M <- means_EML_2way(T_Ctx_Mid) +# Analyze Mid Trials +Test_Ctx_2way.Mid <- FGT_LMER_trialtype.Condition(dataset=T_Ctx_Mid_M) +Export.FGT.Table(FGT_Results_Table = Test_Ctx_2way.Mid, TableName = "Umag.Ctx.Test.Mid", file.export = T) +# No significant effects of condition for trial 2 + +# Create Late Trials mids object +T_Ctx_Late <- subset(T_Ctx, trial %in% c(3)) +unique(T_Ctx_Late$trial) +T_Ctx_Late_M <- means_EML_2way(T_Ctx_Late) +# Analyze Late Trials +Test_Ctx_2way.Late <- FGT_LMER_trialtype.Condition(dataset=T_Ctx_Late_M) +Export.FGT.Table(FGT_Results_Table = Test_Ctx_2way.Late, TableName = "Umag.Ctx.Test.Late", file.export = T) +# No significant effects of condition for trial 3 (but main effect of trialtype here) + +# Follow-up Main effect trialtype (to determine direction of effect): +# Copy Relevant Model from LMER script: +T_Ctx_Main_NO_2way.LATE<-with(expr=lmer(FPS~1+Condition+trialtype+(1|pp),control=lmerControl(optimizer="bobyqa")),data = T_Ctx_Late_M, REML=F) +# Call function to pool EMM: +Test_Ctx_2way.LATE.followupTrialtype <-pool.means.trialtype(model=T_Ctx_Main_NO_2way.LATE, m=100, phase="TEST") # Enter larger model from the specific comparison that is followed up. In this case Full main effects model. +Test_Ctx_2way.LATE.followupTrialtype # Note Qbar = pooled means (overall point estimate) +# Export results to table +Export.FGT.FollowupTrialtype(FGT_FollowupResults = Test_Ctx_2way.LATE.followupTrialtype, + TableName = "Umag.Ctx.Test.Late_FU.trialtype", file.export = T) +# The effects plotted +Trialtype.PlotD2(Test_Ctx_2way.LATE.followupTrialtype, rownames(Test_Ctx_2way.LATE.followupTrialtype), Test_Ctx_2way.LATE.followupTrialtype$Qbar) + + +# ITI FPS (Test) -------------------------------------------- +# Note LMER assumptions checked after imputation (in 100 datasets) and satisfied after log(FPS) +T_ITI_Mean.log <-log.transform.mids(Umag.mids$Test_ITI_Means) +Test_ITI_LM <- FGT_LM.Condition(dataset=T_ITI_Mean.log) +Export.FGT.Table(FGT_Results_Table = Test_ITI_LM, TableName = "Umag.ITI.Test.Mean", file.export = F) +# No significant effects + +# Sensitivity analyses to check the effect of potential influential participants, identified via Assumption Checks: p 76, 107 (88 and 94 in most datasets) +T_ITI_Mean.log.noinf <- remove.influential.points(data_mids=T_ITI_Mean.log, influentialpoints=c(76, 107, 88, 94)) +Test_ITI_LM_noinf <- FGT_LM.Condition(dataset=T_ITI_Mean.log.noinf) +Export.FGT.Table(FGT_Results_Table = Test_ITI_LM_noinf, TableName = "Umag.ITI.Test.noinf.Mean", file.export = T) +# Conclusion: no influence on results (no exclusion of pp) diff --git a/R/FPS_LMM_Assumptions_call.R b/R/FPS_LMM_Assumptions_call.R new file mode 100644 index 0000000..8a411e3 --- /dev/null +++ b/R/FPS_LMM_Assumptions_call.R @@ -0,0 +1,169 @@ +## Script to calls "FPS_LMM_Assumptions_scours.r" multiple times, to check the assumptions for the linear(mixed)model analyses in the data from the Fear Generalization Task (FGT) in the SAM study +# Written by Milou Sep + +## Selection of Random effects in full models: +# Although, factors 'Trial' and 'Trialtype' are both 'within factors' in most datasets below. Random slopes are only added to the full model for subject (+1|pp). +# To compare nested models, random effects need to be the same, therefore all (LMER) models have random effects: +(1|pp). Various random effects were checked, +# but more complex random effects than 1|pp (like 1|pp+1|trialtype or 1|pp+1|trial) cause model convergence problems in some outcome measures. +# We therefore selected the simplest random effects term (note various random effects did not crucially influence model results, i.e. no difference in the effects that reached significance.) + +# Set WD & Load Mids ------------------------------------------------------ +load("processed_data/Umag.mids.28.01.19.rda") +summary(Umag.mids) + +# Description of the variables that are defined prior to each 'assumption check'. Assumptions are checked by rendering the file `FPS_LMM_Assumptions_source.R`. +# dataset_name --> just a character string that is used in the .html output +# Data_list --> a list of datasets. The 1 element is the dataframe with missing values, the following are the 100 imputed datasets +# log_transformation --> can be "yes" or something else. If "yes", a log-transformation of FPS is performed in the source script +# Model_Formula=formula(" ") --> the formula for LM() or LMER() should be added as a character string +# within_factor --> can be 1 (=present) or 0 (=absent). Cooks distances are differently calculated for LM() (if no within factor is present) and lmer() (if one or multiple within factors are present) + +###### ACQUISITION ###### + +# Habituation Trials Day 1 ------------------------------------------------- +# Required variables definitions +{dataset_name = 'Hab_trials_D1' +Data_list=c(list(Umag.mids$Acq_HAB_Trials$data), + miceadds::mids2datlist(Umag.mids$Acq_HAB_Trials, X=NULL)) +log_transformation = 'yes' +Model_Formula=formula("FPS~1+Condition*trial+(1|pp)") +within_factor = 1 +# Render Assumption Check script to HTML +rmarkdown::render("R/FPS_LMM_Assumptions_source.R", "html_document", + output_file = paste0('Assumptions_HAB_trials_D1.',date(),'.html')) +}# Show individual influential points +save(influential.case.names, file=paste0('influentialCases_HAB_trials_D1.',date(),'.rda')) +influential.case.names # Show influential participants: +unique(influential.case.names) +mean(unlist(influentialpoints), na.rm=T) # Mean cook's distance of outliers. +rm("influentialpoints", "influential.case.names", "log_transformation") + + +# Cue Trials Day 1 ------------------------------------------------- +# Required variable definitions +{dataset_name = 'Cue_trials_D1' +Data_list=c(list(Umag.mids$Acq_Cued_Trials$data), + miceadds::mids2datlist(Umag.mids$Acq_Cued_Trials, X=NULL)) +log_transformation = 'yes' +Model_Formula=formula("FPS~1+Condition*trial*trialtype+(1|pp)") +within_factor = 1 +# Render Assumption Check script to HTML +rmarkdown::render("R/FPS_LMM_Assumptions_source.R", "html_document", + output_file = paste0('Assumptions_Cue_trials_D1.',date(),'.html')) +}# Show individual influential points +save(influential.case.names, file=paste0('influentialCases_Cue_trials_D1.',date(),'.rda')) +influential.case.names +#unique(influential.case.names) +mean(unlist(influentialpoints), na.rm=T) # Mean cook's distance of outliers. +rm("influentialpoints", "influential.case.names", "log_transformation") + + +# Context Trials Day 1 ------------------------------------------------- +# Required variable definitions +{dataset_name = 'Context_trials_D1' +Data_list=c(list(Umag.mids$Acq_Context_Trials$data), + miceadds::mids2datlist(Umag.mids$Acq_Context_Trials, X=NULL)) +log_transformation = 'yes' +Model_Formula=formula("FPS~1+Condition*trial*trialtype+(1|pp)") +within_factor = 1 +# Render Assumption Check script to HTML +rmarkdown::render("R/FPS_LMM_Assumptions_source.R", "html_document", + output_file = paste0('Assumptions_Context_trials_D1.',date(),'.html')) +}# Show individual influential points +save(influential.case.names, file= paste0('influentialCases_Context_trials_D1.', date(),'.rda')) +influential.case.names +unique(influential.case.names) +mean(unlist(influentialpoints), na.rm=T) # Mean cook's distance of outliers. +rm(influentialpoints, influential.case.names, "log_transformation") + + +# ITI Mean Day 1 ------------------------------------------------- +# Required variable definitions +{dataset_name = 'ITI_Mean_D1' +Data_list=c(list(Umag.mids$Acq_ITI_Means$data), + miceadds::mids2datlist(Umag.mids$Acq_ITI_Means, X=NULL)) +log_transformation = 'yes' +Model_Formula=formula("FPS~1+Condition") +within_factor = 0 +# Render Assumption Check script to HTML +rmarkdown::render("R/FPS_LMM_Assumptions_source.R", "html_document", + output_file = paste0('Assumptions_ITI_Mean_D1.',date(),'.html')) +} # Note variables "influential.case.names" and "influential points" are only created when LMER() is used (in this case LM() is used, because there is only 1 grouping factor). +rm(influentialpoints, influential.case.names, "log_transformation") + + + +###### TEST ###### + +# Habituation Trials Day 2 ------------------------------------------------- +# Required variable definitions +{dataset_name = 'Hab_trials_D2' + Data_list=c(list(Umag.mids$Test_HAB_Trials$data), + miceadds::mids2datlist(Umag.mids$Test_HAB_Trials, X=NULL)) + log_transformation = 'yes' + Model_Formula=formula("FPS~1+Condition*trial+(1|pp)") + within_factor = 1 + # Render Assumption Check script to HTML + rmarkdown::render("R/FPS_LMM_Assumptions_source.R", "html_document", + output_file = paste0('Assumptions_HAB_trials_D2.',date(),'.html')) +}# Show individual influential points +save(influential.case.names, file= paste0('influentialCases_HAB_trials_D2.',date(),'.rda')) +influential.case.names +unique(influential.case.names) +mean(unlist(influentialpoints), na.rm=T) # Mean cook's distance of outliers. +rm("influentialpoints", "influential.case.names", "log_transformation") + + +# Cue Trials Day 2 ------------------------------------------------- +# Required variable definitions +{dataset_name = 'Cue_trials_D2' +Data_list=c(list(Umag.mids$Test_Cued_Trials$data), + miceadds::mids2datlist(Umag.mids$Test_Cued_Trials, X=NULL)) +log_transformation = 'yes' +Model_Formula=formula("FPS~1+Condition*trial*trialtype+(1|pp)") +within_factor = 1 +# Render Assumption Check script to HTML +rmarkdown::render("R/FPS_LMM_Assumptions_source.R", "html_document", + output_file = paste0('Assumptions_Cue_trials_D2.',date(),'.html')) +}# Show individual influential points +save(influential.case.names, file= paste0('influentialCases_Cue_trials_D2.',date(),'.rda')) +influential.case.names +unique(influential.case.names) +length(influential.case.names) +mean(unlist(influentialpoints), na.rm=T) # Mean cook's distance of outliers. +rm(influentialpoints, influential.case.names, "log_transformation") + + +# Context Trials Day 2 ------------------------------------------------- +# Required variable definitions +{dataset_name = 'Context_trials_D2' +Data_list=c(list(Umag.mids$Test_Context_Trials$data), + miceadds::mids2datlist(Umag.mids$Test_Context_Trials, X=NULL)) +log_transformation = 'yes' +Model_Formula=formula("FPS~1+Condition*trialtype*trial+(1|pp)") +within_factor = 1 +# Render Assumption Check script to HTML +rmarkdown::render("R/FPS_LMM_Assumptions_source.R", "html_document", + output_file = paste0('Assumptions_Context_trials_D2.',date(),'.html')) +}# Show individual influential points +save(influential.case.names, file= paste0('influentialCases_Context_trials_D2.', date(), '.rda')) +influential.case.names +unique(influential.case.names) +length(influential.case.names) +mean(unlist(influentialpoints), na.rm=T) # Mean cook's distance of outliers. +rm(influentialpoints, influential.case.names, "log_transformation") + + +# ITI Mean Day 2 ------------------------------------------------- +# Required variable definitions +{dataset_name = 'ITI_Mean_D2' + Data_list=c(list(Umag.mids$Test_ITI_Means$data), + miceadds::mids2datlist(Umag.mids$Test_ITI_Means, X=NULL)) + log_transformation = 'yes' # Log-transformation is needed, to ensure normal distribution of the residuals. + Model_Formula=formula("FPS~1+Condition") + within_factor = 0 + # Render Assumption Check script to HTML + rmarkdown::render("R/FPS_LMM_Assumptions_source.R", "html_document", + output_file = paste0('Assumptions_ITI_Mean_D2.',date(),'.html')) +} # Note variables "influential.case.names" and "influential points" are only created when LMER() is used (in this case LM() is used, because there is only 1 grouping factor). +rm(influentialpoints, influential.case.names, "log_transformation") diff --git a/R/FPS_LMM_Assumptions_source.R b/R/FPS_LMM_Assumptions_source.R new file mode 100644 index 0000000..c1b134a --- /dev/null +++ b/R/FPS_LMM_Assumptions_source.R @@ -0,0 +1,83 @@ +#' --- +#' title: "Check LMM assumption on FPS data from the Fear Generalization Task (FGT) in the SAM study" +#' author: "Milou Sep" +#' date: '`r format(Sys.time(), "%d %B, %Y")`' +#' output: +#' html_document: default +#' --- + +#' Load required packages +library(mice); library(lme4); library(influence.ME); library(miceadds) + +#' Define required variables +dependent_var='FPS'# Fear Potentiated Startle +n_subjects = 117 # Note this is always the same for all outcome measures (or datasets) in the FGT +influential.case.names<-c() +influentialpoints<-list() + +#' Loop assumption checks over all datasets (= list elements) in Data_list +for(i in 1:101){ # Note 101 = 1 dataset with missing values and 100 imputed datasets + +# Print in output which dataset is checked -------------------------------- + if (i == 1){print(paste(dataset_name, dependent_var, "- LMER assumption check on data with missings")) + }else if (i>=1){ print(paste(dataset_name, dependent_var, "- LMER assumption check on imputed dataset", i-1))} # i-1 is included because the data list contains 1 complete case dataset, so (i=2)-1 is imputed dataset 1 + +# Select Dataset for assumption check ------------------------------------- + check.data=Data_list[[i]] + +# Log-transform FPS (if needed) ---------------------------------------------- + # Note log-transformation needs to be performed prior to the formula call. The unstandardized data contains 0-responses, which will lead to infinite values (that LMER() can not handle). + # The solution is to add 1 to all FPS values BEFORE the log-transformation (log()) (ref https://stackoverflow.com/questions/8415778/lm-na-nan-inf-error). + # Note, an alternative solution, to make 0-responses NA, is not suitable here because 0-responses contain meaningful information for FPS analyses. + # The 0-responses stay in the analyses with +1, as log(1)=0. + if (log_transformation == 'yes'){ + check.data$FPS <- check.data$FPS + 1 + check.data$FPS <- log(check.data$FPS) + } + # print(str(check.data$FPS)) # To check if transformation works. + +# Fit Linear(mixed)model -------------------------------------------------- + if (within_factor == 1){ + FullModel<-lmer(Model_Formula, data=check.data, REML=F, na.action = na.omit) + } else if (within_factor == 0){ + FullModel<-lm(Model_Formula, data=check.data) + } + +# Plot's for LMER assumption checks --------------------------------------- + ## Raw data Histogram + datacolum<-check.data[which(colnames(check.data) == dependent_var)] + hist(data.matrix(datacolum), main=paste(dataset_name, "dataset", i, "- Historgram raw data", dependent_var)) + ## Linearity + plot(fitted(FullModel),residuals(FullModel), main=paste(dataset_name, "dataset", i, "- Linearity contrast", dependent_var)); abline(0,0,lty=2,lwd=2); + ## Normality of residuals + ### histogram Residuals + hist(residuals(FullModel), main=paste(dataset_name, "dataset", i, "- Historgram Model Residuals", dependent_var)); + ## QQ-plots + qqnorm(residuals(FullModel), main=paste(dataset_name, "dataset", i, "- QQ plot", dependent_var)); qqline(residuals(FullModel),col = 2) + ## Cook's distance + if (within_factor == 1){ # Display Cook's distance for LMER() model + estex.FullModel <-influence(FullModel,"pp") + cooks_FullModel<-cooks.distance(estex.FullModel) + these_influential_points<-cooks_FullModel[(cooks_FullModel>(4/(n_subjects))),] # Shows measures with cook's distance above cutoff + print(these_influential_points) + # Plot cook's distance + plot(estex.FullModel, which="cook", + cutoff=4/(n_subjects), sort=TRUE, + xlab="Cook's Distance", + ylab="pp", + cex.lab=0.01, # Smaller font size for as labels. + main=paste(dataset_name, "dataset", i, "Cook's Distance", dependent_var)) + # Collect influential cases (as information for analyses) + influentialpoints[[i]] <- these_influential_points # participant name & value of cook's distance + if (i>1){ # collects only names of participants that are influential in imputed datasets(not cooks distance data) + influential.case.names <- c(influential.case.names, names(influentialpoints[[i]])) + } + } else if (within_factor == 0){ # Display Cook's distance for lm() model + plot(FullModel,2) # QQ plot, with case names. + plot(FullModel,4) # Plot for cooks distance for lm() (Note, this shows observations, not participants numbers) + } +} + +#' Remove redundant variables from the global environment. +rm(list=c("check.data", "cooks_FullModel", "Data_list", "datacolum", "dataset_name", "dependent_var", "estex.FullModel", + "FullModel", "i", "Model_Formula", "n_subjects", "these_influential_points", "within_factor"), pos=.GlobalEnv) diff --git a/R/FPS_LMM_Results_to_Plot.R b/R/FPS_LMM_Results_to_Plot.R new file mode 100644 index 0000000..e4e7c8b --- /dev/null +++ b/R/FPS_LMM_Results_to_Plot.R @@ -0,0 +1,205 @@ +#' --- +#' title: "Visualization of Raw FPS response during the Fear Generalization Task (FGT) in the SAM study" +#' author: "Milou Sep" +#' date: august 2019 +#' output: +#' html_document: default +#' --- + +# clear workspace +rm(list=ls()) + +# packages ---------------------------------------------------------------- +# library(readr) # to load .txt data +library(dplyr) +library(tidyr) +library(reshape2) # to change wide to long format +library(Rmisc) # to calculate mean and CI in raw data +library(ggplot2) # to create plots +library(ggpubr) # To arrange plots. + +# load and restructure data ----------------------------------------------- +FGT_batch <- read.csv2("data/SAM_FGT.csv", na.strings = c("NaN","5555","8888","9999")) +FGT_batch$subjName <- gsub("SMGH","", FGT_batch$subjName) +Umag <- subset.data.frame(FGT_batch, select = c(grep("Umag", names(FGT_batch)),Condition, subjName)) + +# Change name "Condition" in "Group" +Umag%>%dplyr::rename(Group=Condition) -> Umag + +# 1) Change data to long format +melt(Umag, id.vars = c("subjName", "Group"), measure.vars = grep("Umag", names(Umag)), + variable.name = 'variable', value.name='FPS') ->Umag_long +str(Umag_long) + +# 2) Split variable names +separate(Umag_long, col =variable, into = c("phase", "probe", NA, "trial"), sep = "_", convert = T) -> Umag_long +str(Umag_long) +unique(Umag_long$phase) + +# 3) Split trialtype code (letter) and trial number (digit) +Umag_long %>% mutate(trialtype = gsub("[[:digit:]]","",trial), # Note = replace letter by "" + trial = as.numeric(gsub("[^[:digit:]]","",trial))) -> Umag_long # Note = replace not a letter by "" + +# 4) Add HAB and ITI as trialtypes +Umag_long %>% mutate( + trialtype = ifelse(probe == "ITI", "ITI", trialtype), + trialtype = ifelse(probe == "HAB", "HAB", trialtype), + trialtype = ifelse(probe == "HABctx", "HABctx", trialtype), +) -> Umag_long # %>% filter(probe == "Cue") # to check +# to check +Umag_long[which(Umag_long$probe == 'ITI'),] # Check iti +Umag_long[ which( Umag_long$trialtype == "HABctx"),] # for HAB! + +# 5) Change to factor +cols <- c("subjName","Group","phase", "probe", "trialtype") +#Use lapply() to coerce and replace the chosen columns: +Umag_long[cols] <- lapply(Umag_long[cols], factor) +# checking +unique(Umag_long$probe) #-> Cue, Ctx, ITI, Hab, Habctx +unique(Umag_long$trialtype) + +# Add labels to Group and trialtype for readability in plots (and change order of appearance in plots) +Umag_long$probe <- factor(Umag_long$probe, levels=c("HAB", "HABctx", "Cue", "ITI", "Ctx"), + labels = c("NAh", "pre-acq ctx", "CUE", "ITI", "CTX")) +attributes(Umag_long$probe) + +attributes(Umag_long$Group) +Umag_long$Group <- factor(Umag_long$Group, levels=c(3,2,1), + labels =c("No-Stress", "Immediate-Stress", "Delayed-Stress")) + +attributes(Umag_long$trialtype) +Umag_long$trialtype <- factor(Umag_long $trialtype, levels=c( "T" ,"S" , "N" , "ITI", "HAB", "HABctx" ), + labels = c("CTX+", "CTX-", "G-CTX", "ITI", "NAh", "pre-acq context")) +# 6) Change to numeric +Umag_long$FPS<-as.numeric(Umag_long$FPS) + +# Calculate mean and CI --------------------------------------------------- +group.CI(FPS~Group+phase+probe+trial+trialtype, Umag_long, ci = 0.95) -> means_long +str(means_long) + +# Function to plot raw data ----------------------------------------------- +Plot_Raw_FGT <- function(data){ + plot<- + ggplot(data = data, aes(x=trial, y= FPS.mean, group = factor(trialtype), colour= trialtype)) + + # Layout: + facet_grid(.~ Group ) + # Groups next to each other + theme_classic() + + scale_x_continuous(breaks = unique (data$trial)) + + xlab("Trialnumber") + + ylab('FPS (mean +/-95%CI)') + # ax labels & title + labs(colour = "Trialtype")+ + # adjust font size legend and axis + theme(legend.title=element_text(size=12), legend.text=element_text(size=9), + axis.text = element_text(size=9), axis.title= element_text(size=10))+ + # add color to plot: https://stackoverflow.com/questions/17180115/manually-setting-group-colors-for-ggplot2 + scale_color_manual( # Colours: http://sape.inf.usi.ch/quick-reference/ggplot2/colour + values= c('NAh' = 'black', + 'CTX+' = 'red2', + 'CTX-' = 'mediumblue', + 'G-CTX' = 'forestgreen', + 'ITI' = 'gray50' )) + + # add Data: + geom_line(aes(colour=trialtype), stat="summary" , fun.y="mean", na.rm=T, size=1) + # remove.na=TRUE is used to remove warnings about missing values (which are present in raw data) + geom_point(stat="summary",fun.y="mean", na.rm=T, size=2) + # Add datapoints + # .. and errorbars (info: https://stackoverflow.com/questions/48800212/set-error-bars-to-standard-deviation-on-a-ggplot2-bar-graph) + geom_errorbar(aes(ymin=FPS.lower, ymax=FPS.upper), position=position_dodge(.5), width=0.5) + + return(plot)} + + +# Call function to plot data per phase ------------------------------------ + +# Acquisition: Noise Alone (Figure 4A) +means_long %>% + filter(phase =='sA', + probe == 'NAh') %>% + Plot_Raw_FGT() %>% + + ggtitle(("Pre-aquisition noise alone trials")) ->acq_NAh + +# CUE (Figure 4B) +means_long %>% + mutate(trial = ifelse(probe == "ITI", (trial*2), trial)) %>% # Change the trial numbers of the ITI trials (i.e. *2) for visualization, so that it is clear that ITI probes were delivered throughout acquisition phase (not just the first part) + filter(phase =='sA', + probe == 'CUE'| + probe == 'ITI') %>% + Plot_Raw_FGT() %>% + + ggtitle(("Contextualization of cued fear")) -> acq_cue + +# CTX (Figure 4C) +means_long %>% + filter(phase =='sA', + probe == 'CTX'| + probe == 'ITI') %>% + Plot_Raw_FGT() %>% + + ggtitle(("Contextual fear expression (acquisition)")) -> acq_ctx + + +# arrange the plots of the acquisition phase in Figure 4 +FGT_acq.plots <- + ggarrange( + acq_NAh, + acq_cue, + acq_ctx, + labels = c("A", "B", "C"), + ncol = 1, nrow = 3, + font.label=list(size=12, face="bold"), + align = "v", + legend="right", + common.legend = F) + +annotate_figure(FGT_acq.plots, + fig.lab=c("Figure 4"), + fig.lab.pos = "top.right", + fig.lab.face = "bold", + fig.lab.size = 12) + +ggsave(paste0("results/Figure4.Acq.Plots.colour.",date(),".eps"), device="eps", dpi = 800, height = 7, width = 7, limitsize = T ) + + + +# TEST: Noise Alone, Cue, CTX --------------------------------------------- + +# NAh; Figure 5A +means_long %>% + filter(phase =='sG', + probe == 'NAh') %>% + Plot_Raw_FGT() %>% + + ggtitle(("Pre-memory test noise alone trials")) ->test_NAh + +# CUE; Figure 5B +means_long %>% + mutate(trial = ifelse(probe == "ITI", (trial*2), trial)) %>% # Change the trial numbers of the ITI trials (i.e. *2) for visualization, so that it is clear that ITI probes were delivered throughout acquisition phase (not just the first part) + filter(phase =='sG', + probe == 'CUE'| + probe == 'ITI') %>% + Plot_Raw_FGT() %>% + + ggtitle(("Context-dependency of cued fear memory")) -> test_cue + +# CTX; Figure 5C +means_long %>% + filter(phase =='sG', + probe == 'CTX'| + probe == 'ITI') %>% + Plot_Raw_FGT() %>% + + ggtitle(("Contextual fear expression (memory)")) -> test_ctx + +# arrange the plots of the test phase in Figure 5. +FGT_test.plots <- + ggarrange( + test_NAh, + test_cue, + test_ctx, + labels = c("A", "B", "C"), + ncol = 1, nrow = 3, + font.label=list(size=12, face="bold"), + align = "v", + legend="right", + common.legend = F) + +annotate_figure(FGT_test.plots, + fig.lab=c("Figure 5"), + fig.lab.pos = "top.right", + fig.lab.face = "bold", + fig.lab.size = 12) + +ggsave(paste0("results/Figure5.Test.Plots.colour.",date(),".eps"), device="eps", dpi = 800, height = 7, width = 7, limitsize = T ) diff --git a/R/FPS_LMM_Results_to_Word.R b/R/FPS_LMM_Results_to_Word.R new file mode 100644 index 0000000..75edda8 --- /dev/null +++ b/R/FPS_LMM_Results_to_Word.R @@ -0,0 +1,69 @@ +# Supporting function to export the results from pool.compare() in the analysis of the data from the Fear Generalization Task (FGT) in the SAM study to word tables +# written by Milou Sep +# version 13.9.18 + +# Load required packages -------------------------------------------------- +library(flextable) # To make publication ready tables for word | info: https://davidgohel.github.io/flextable/articles/overview.html +library(officer) # To export chi tables to word | info: https://davidgohel.github.io/flextable/articles/layout.html + +# Function for Chi2 Tables ------------------------------------------------ +Export.FGT.Table <- function(FGT_Results_Table, TableName, file.export){ + # Create Flextable + FGT_Table <- flextable(FGT_Results_Table) + # Add Layout settings to flextable(). + # Make significant value's Bold [Info: https://cran.r-project.org/web/packages/flextable/vignettes/format.html#bold] + FGT_Table <- bold(FGT_Table, i= ~pvalue < 0.05 , j="pvalue") # Note is j= is included, only value not total row bold + # set number of digits # https://davidgohel.github.io/flextable/articles/format.html#set_formatter-function + FGT_Table <- display(FGT_Table, col_key="df1", pattern = "{{df1}}", formatters = list(df1~sprintf("%.00f",df1))) # no digits for df (note if 1 digit would be required, use %.01f etc.). + FGT_Table <- theme_vanilla(FGT_Table) # remove thick lines + # FGT_Table <- set_header_labels(FGT_Table, LogLikelihood="Log Likelihood" ,deltadf= "delta df", pvalue="p-value") # change names + FGT_Table <- autofit(FGT_Table) # adapt table dimensions to content + + if (file.export == T){ + doc <- read_docx() + doc <- body_add_flextable(doc, value = FGT_Table, align="center") + print(doc, target = paste0("results/",TableName, date(),".docx")) + } + + return(FGT_Table) +} + + +Export.FGT.FollowupTrialtype <- function(FGT_FollowupResults, TableName, file.export){ + names<-rownames(FGT_FollowupResults) + # Create Flextable + FGT_Table <- flextable(data=cbind(names, FGT_FollowupResults)) + FGT_Table<-set_header_labels(FGT_Table, names="Trialtype", Qbar = "Pooled Means", Total.var = "variance", + lower.95="lower 95CI", upper.95 = "upper 95CI") # Note Qbar = pooled means (overall point estimate) + # Add Layout settings to flextable(). + FGT_Table <- theme_vanilla(FGT_Table) + FGT_Table <- autofit(FGT_Table) + + if (file.export == T){ + doc <- read_docx() + doc <- body_add_flextable(doc, value = FGT_Table, align="center") + print(doc, target = paste0("results/", TableName, date(),".docx")) + } + + return(FGT_Table) +} + + +Export.FGT.FollowupCondition <- function(FGT_FollowupResults, TableName, file.export){ + names<-rownames(FGT_FollowupResults) + # Create Flextable + FGT_Table <- flextable(data=cbind(names, FGT_FollowupResults)) + FGT_Table<-set_header_labels(FGT_Table, names="Condition", Qbar = "Pooled Means", Total.var = "variance", + lower.95="lower 95CI", upper.95 = "upper 95CI") + # Add Layout settings to flextable (). + FGT_Table <- theme_vanilla(FGT_Table) + FGT_Table <- autofit(FGT_Table) + + if (file.export == T){ + doc <- read_docx() + doc <- body_add_flextable(doc, value = FGT_Table, align="center") + print(doc, target = paste0("results/", TableName, date(),".docx")) + } + + return(FGT_Table) +} diff --git a/R/FPS_LMM_Trialtype.Trial.Condition.r b/R/FPS_LMM_Trialtype.Trial.Condition.r new file mode 100644 index 0000000..20297d0 --- /dev/null +++ b/R/FPS_LMM_Trialtype.Trial.Condition.r @@ -0,0 +1,82 @@ +# Supporting function for the analysis of the data from the Fear Generalization Task (FGT) in the SAM study +# Written by Milou Sep + +FGT_LMER_Trialtype.Trial.Condition <- function(dataset){ + + # Note, the Bobyaqa optimizer method was used for better model convergence (Note, this optimizer was also used for MCT analyses (https://github.com/mscsep/SAM_MCT). + # Info on LMER convergence problems: https://rstudio-pubs-static.s3.amazonaws.com/33653_57fc7b8e5d484c909b615d8633c01d51.html + # Info on optimizer checks http://svmiller.com/blog/2018/06/mixed-effects-models-optimizer-checks/ + # ?allFit() # Note: allFit() function doesn't work with Mids objects. + + FullModel <-with(expr=lmer(FPS~1+Condition*trialtype*trial+(1|pp),control=lmerControl(optimizer="bobyqa") ),data = dataset, REML=F) + + Full_NO_3way<-with(expr=lmer(FPS~1+Condition+trialtype+trial+ # Main effects + Condition:trialtype+Condition:trial+trialtype:trial+ # 2 way interaction effects + (1|pp) ,control=lmerControl(optimizer="bobyqa")),data = dataset,REML=F) # Random effects term + + ### 2 Way interactions [compared to Full Model without 3Way] ### + # Define Models + Full_NO_Condition.trial<-with(expr=lmer(FPS~1+Condition+trialtype+trial+ + Condition:trialtype+trialtype:trial+ + (1|pp) ,control=lmerControl(optimizer="bobyqa") ),data = dataset, REML=F) + + Full_NO_trial.trialtype<-with(expr=lmer(FPS~1+Condition+trialtype+trial+ + Condition:trialtype+Condition:trial+ + (1|pp) ,control=lmerControl(optimizer="bobyqa") ),data = dataset, REML=F) + + Full_NO_Condition.trialtype<-with(expr=lmer(FPS~1+Condition+trialtype+trial+ + Condition:trial+trialtype:trial+ + (1|pp),control=lmerControl(optimizer="bobyqa") ),data = dataset, REML=F) + + # 3 way interaction + Condition.trial.trialtype <- pool.compare(FullModel,Full_NO_3way, method='wald') # To test if there is a 3way interaction, and the Log-Likelihood (and df) of the full model # pool.compare(main, base, method='wald') + + # 2 way interactions + Condition.trial <- pool.compare(Full_NO_3way,Full_NO_Condition.trial, method='wald') + trial.trialtype <- pool.compare(Full_NO_3way,Full_NO_trial.trialtype, method='wald') + Condition.trialtype <- pool.compare(FullModel, Full_NO_Condition.trialtype, method='wald') + + ### Main effects ### + # Define Models + Main_NO_2ways<-with(expr=lmer(FPS~1+Condition+trialtype+trial+(1|pp) ,control=lmerControl(optimizer="bobyqa")),data = dataset, REML=F) + Main_Condition<-with(expr=lmer(FPS~1+trial+trialtype+(1|pp) ,control=lmerControl(optimizer="bobyqa") ),data = dataset, REML=F) + Main_trial<-with(expr=lmer(FPS~1+Condition+trialtype+(1|pp) ,control=lmerControl(optimizer="bobyqa") ),data = dataset, REML=F) + Main_trialtype<-with(expr=lmer(FPS~1+Condition+trial+(1|pp) ,control=lmerControl(optimizer="bobyqa") ),data = dataset, REML=F) + + # Compare Models + Condition <- pool.compare(Main_NO_2ways,Main_Condition, method='wald') + trial <- pool.compare(Main_NO_2ways,Main_trial, method='wald') + trialtype <- pool.compare(Main_NO_2ways,Main_trialtype, method='wald') + + + # Table of model comparisons (for publication) -------------------------------- + # Outcomes of pool.compare() (RG: export " Dm rm df1 df2 p " to table) + #?pool.compare + + # Make vectors for data in each row + threeway.C.T.T <- c("Condition x Trial x Trialtype", Condition.trial.trialtype$Dm, Condition.trial.trialtype$rm, Condition.trial.trialtype$df1, Condition.trial.trialtype$df2, Condition.trial.trialtype$pvalue) + + twoway.C.T <- c("Condition x Trial", Condition.trial$Dm, Condition.trial$rm, Condition.trial$df1, Condition.trial$df2, Condition.trial$pvalue) + twoway.C.Tt <- c("Condition x Trialtype", Condition.trialtype$Dm, Condition.trialtype$rm, Condition.trialtype$df1, Condition.trialtype$df2, Condition.trialtype$pvalue) + twoway.T.Tt <- c("Trial x Trialtype", trial.trialtype$Dm, trial.trialtype$rm, trial.trialtype$df1, trial.trialtype$df2, trial.trialtype$pvalue) + + Main.effect.Condition <- c("Condition", Condition$Dm, Condition$rm, Condition$df1, Condition$df2, Condition$pvalue) + Main.effect.trialtype <- c("Trialtype", trialtype$Dm, trialtype$rm, trialtype$df1, trialtype$df2, trialtype$pvalue) + Main.effect.trial <- c("Trial", trial$Dm, trial$rm, trial$df1, trial$df2, trial$pvalue) + + WORD.table.FGT <- data.frame(rbind(threeway.C.T.T , + twoway.C.T, twoway.C.Tt , twoway.T.Tt, + Main.effect.Condition, Main.effect.trialtype, Main.effect.trial)) + colnames(WORD.table.FGT) <- c("Model", "Dm", "rm", "df1", "df2", "pvalue") + + # Data cells need to be changed to numeric (not factor), otherwise problems with flextable(). + # str(WORD.table.FGT) + WORD.table.FGT[c(2,3,4,5,6)] <- sapply(WORD.table.FGT[c(2,3,4,5,6)], as.character) # First convert to character .. + WORD.table.FGT[c(2,3,4,5,6)] <- sapply(WORD.table.FGT[c(2,3,4,5,6)], as.numeric) # .. then to numeric + # sapply(WORD.table.FGT, class) # check data class of table. + + # Return function output -------------------------------------------------- + + return(WORD.table.FGT) + +} \ No newline at end of file diff --git a/R/FPS_LMM_pool_EMM.r b/R/FPS_LMM_pool_EMM.r new file mode 100644 index 0000000..beff5b7 --- /dev/null +++ b/R/FPS_LMM_pool_EMM.r @@ -0,0 +1,148 @@ +# Pool LMER Estimated Marginal Means (EMM) of imputed data from the Fear Generalization Task (FGT) in the SAM study +# written by Rosalie Gorter, adapted by Milou Sep (function created) +# Last version 16.1.19 + +library(emmeans) # to calculate EMMs +library(ggplot2) # to visualize EMMs + + +# EMM Trialtype ----------------------------------------------------------- + +pool.means.trialtype <- function(model, m, phase){ + # model is the largest model, m is number of imputations, phase is "ACQ" or "TEST" + + results<-list() + for(i in 1:m){#place emmeans() results for every imputation in list + results[i]<-list(emmeans(model[[4]][[i]], + pairwise ~ trialtype, #set contrasts + adjust="tukey",#p-value adjustment for multiple comparisons family wise (note: Benjamini-Hochberg correction is not appropriate here because p-values are not independent http://www-stat.wharton.upenn.edu/~steele/Courses/956/Resource/MultipleComparision/Writght92.pdf) + lmer.df = "kenward-roger")) + } + + # # To check: + # model[[4]][[1]] + # results[[1]] + # results[[1]]$emmeans + # data.frame(results[[1]][1]$emmeans)$emmean + # data.frame(results[[i]][1]$emmeans)$SE + + # pool parameters according to rubins rules + # Rubin DB. Inference and missing data. Biometrika. 1976;63:581-92. + # Marshall A, Altman DG, Holder RL, Royston P. Combining estimates of interest in prognostic modelling studies after multiple imputation: current practice and guidelines. BMC Med Res Methodol. 2009;9:57. doi:10.1186/1471-2288-9-57. + # steps: calculate pooled parameter; calculate pooled standard error (pse); used pse to calculate 95% CI + + # Combine all imputed values into pooled mean and variance and 95% CI + + if (phase == "ACQ"){ + Qhat<-matrix(NA,ncol=m,nrow=2) #estimated means for every imputed dataset + rownames(Qhat)<-c("mean.Threat","mean.Safe") + colnames(Qhat)<-c(paste("imp.",1:m,sep="")) + + U_i<-matrix(NA,ncol=m,nrow=2) #estimated variance per imputed dataset associated with mean + rownames(U_i)<-c("SE.Threat","SE.Safe") + colnames(U_i)<-c(paste("imp.",1:m,sep="")) + + } else if (phase == "TEST"){ + Qhat<-matrix(NA,ncol=m,nrow=3) #estimated means for every imputed dataset + rownames(Qhat)<-c("mean.Threat","mean.Safe", "mean.New") + colnames(Qhat)<-c(paste("imp.",1:m,sep="")) + + U_i<-matrix(NA,ncol=m,nrow=3) #estimated variance per imputed dataset associated with mean + rownames(U_i)<-c("SE.Threat","SE.Safe", "SE.New") + colnames(U_i)<-c(paste("imp.",1:m,sep="")) + } + + for(i in 1:m){ #place estimates per imputation in matrix + Qhat[,i]<-data.frame(results[[i]][1]$emmeans)$emmean + U_i[,i]<-data.frame(results[[i]][1]$emmeans)$SE + } + + #pool means and se's + Ubar<-rowMeans(U_i) #within imputation variance + B<-(1/(m-1)) * rowSums((Qhat-rowMeans(Qhat))^2)#between imputation variance + Total.var <-Ubar+B #Total variance + Qbar<-rowMeans(Qhat) #pooled means (overall point estimate) + r_m<-(1+m^-1)*B/Ubar #relative increase in variance + nu<-(m-1)*(1+r_m)^2 #degrees of freedom t reference distribution + results[[1]] + t.bound<-abs(qt(0.05/2, nu))#calculate t values for boundaries 95% CI based on adjusted df (nu) + + lower.95<-Qbar-t.bound*Total.var^(1/2) #rubin1987 p77 + upper.95<-Qbar+t.bound*Total.var^(1/2) + + pooled.results.means<-data.frame(cbind(Qbar,Total.var,lower.95,upper.95)) + return(pooled.results.means) #pooled estimates per Condition +} + + +# EMM Condition ----------------------------------------------------------- + +pool.means.condition <- function(model, m){ + # model is the largest model, m is number of imputations + results<-list() + for(i in 1:m){#place emmeans() results for every imputation in list + results[i]<-list(emmeans(model[[4]][[i]], + pairwise ~ Condition, #set contrasts + adjust="tukey",#p-value adjustment for multiple comparisons family wise (note: Benjamini-Hochberg correction is not appropriate here because p-values are not independent http://www-stat.wharton.upenn.edu/~steele/Courses/956/Resource/MultipleComparision/Writght92.pdf) + lmer.df = "kenward-roger")) + } + + # pool parameters according to rubins rules + # Rubin DB. Inference and missing data. Biometrika. 1976;63:581-92. + # Marshall A, Altman DG, Holder RL, Royston P. Combining estimates of interest in prognostic modelling studies after multiple imputation: current practice and guidelines. BMC Med Res Methodol. 2009;9:57. doi:10.1186/1471-2288-9-57. + # steps: calculate pooled parameter; calculate pooled standard error; used pse to calculate 95% CI + + # Combine all imputed values into pooled mean and variance and 95% CI + + Qhat<-matrix(NA,ncol=m,nrow=3) #estimated means for every imputed dataset + rownames(Qhat)<-c("mean.Delayed.Stress","mean.Direct.Stress", "mean.No.Stress") + colnames(Qhat)<-c(paste("imp.",1:m,sep="")) + + U_i<-matrix(NA,ncol=m,nrow=3) #estimated variance per imputed dataset associated with mean + rownames(U_i)<-c("SE.Delayed.Stress","SE.Direct.Stress", "SE.No.Stress") + colnames(U_i)<-c(paste("imp.",1:m,sep="")) + + for(i in 1:m){ #place estimates per imputation in matrix + Qhat[,i]<-data.frame(results[[i]][1]$emmeans)$emmean + U_i[,i]<-data.frame(results[[i]][1]$emmeans)$SE + } + + #pool means and se's + Ubar<-rowMeans(U_i) #within imputation variance + B<-(1/(m-1)) * rowSums((Qhat-rowMeans(Qhat))^2)#between imputation variance + Total.var <-Ubar+B #Total variance + Qbar<-rowMeans(Qhat) #pooled means (overall point estimate) + r_m<-(1+m^-1)*B/Ubar #relative increase in variance + nu<-(m-1)*(1+r_m)^2 #degrees of freedom t reference distribution + results[[1]] + t.bound<-abs(qt(0.05/2, nu))#calculate t values for bounderies 95% CI based on adjusted df (nu) + + lower.95<-Qbar-t.bound*Total.var^(1/2) #rubin1987 p77 + upper.95<-Qbar+t.bound*Total.var^(1/2) + + pooled.results.means<-data.frame(cbind(Qbar,Total.var,lower.95,upper.95)) + return(pooled.results.means) #pooled estimates per Condition +} + + +# Function to plot mean epochs follow-up analyses ------------------------- +Trialtype.PlotD2 <- function(dataset_to_plot, x, y){ + plot2<- + ggplot(data=dataset_to_plot, aes(x, y)) + + theme_classic() + + ylab('FPS \n (mean+/-95%CI)') + xlab("") + # axis labels & title + scale_x_discrete(limits=c("mean.Threat","mean.Safe","mean.New"), # Change order x-axis + labels=c("Threat ", "Safe", "New ")) + + scale_fill_manual( # Colours: http://sape.inf.usi.ch/quick-reference/ggplot2/colour + values= c( Safe = "navyblue", Threat = "red3", New = "black")) + + # labels =c(Delayed = "delayed-stress", + # Direct = "direct-stress", + # Control = "no-stress")) + + # adjust fontsize legend + theme(legend.title=element_text(size=12), legend.text=element_text(size=9))+ + # Add mean & CI's + geom_bar(stat="identity", position = position_dodge(.9), alpha=.8) + + geom_errorbar(aes(ymin=lower.95, ymax=upper.95), width=.2, position=position_dodge(.9)) + + return(plot2) +} \ No newline at end of file diff --git a/R/FPS_LMM_trial.Condition.R b/R/FPS_LMM_trial.Condition.R new file mode 100644 index 0000000..9405569 --- /dev/null +++ b/R/FPS_LMM_trial.Condition.R @@ -0,0 +1,40 @@ +# Supporting function for the analysis of the data from the Fear Generalization Task (FGT) in the SAM study +# Written by Milou Sep + +FGT_LMER_trial.Condition <- function(dataset){ + + { + ## 2way interactions ## + # Define Models + FullModel <-with(expr=lmer(FPS~1+Condition*trial+(1|pp),control=lmerControl(optimizer="bobyqa")),data = dataset, REML=F) + Main_NO_2way<-with(expr=lmer(FPS~1+Condition+trial+(1|pp),control=lmerControl(optimizer="bobyqa")),data = dataset, REML=F) + # Compare Models + Condition.trial <- pool.compare(FullModel, Main_NO_2way, method='wald') + + ## Main effects ## + # Define Models + Main_Condition<-with(expr=lmer(FPS~1+trial+(1|pp),control=lmerControl(optimizer="bobyqa")),data = dataset, REML=F) + Main_trial<-with(expr=lmer(FPS~1+Condition+(1|pp),control=lmerControl(optimizer="bobyqa")),data = dataset, REML=F) + # Compare Models + Condition <- pool.compare(Main_NO_2way,Main_Condition, method='wald') + trial <- pool.compare(Main_NO_2way,Main_trial, method='wald') + } + + # Table of model comparisons (Publication) -------------------------------- + twoway.C.T <- c("Condition x Trial", Condition.trial$Dm, Condition.trial$rm, Condition.trial$df1, Condition.trial$df2, Condition.trial$pvalue) + Main.effect.Condition <- c("Condition", Condition$Dm, Condition$rm, Condition$df1, Condition$df2, Condition$pvalue) + Main.effect.trial <- c("Trial", trial$Dm, trial$rm, trial$df1, trial$df2, trial$pvalue) + + WORD.table.FGT <- data.frame(rbind(twoway.C.T , + Main.effect.Condition, Main.effect.trial)) + colnames(WORD.table.FGT) <- c("Model", "Dm", "rm", "df1", "df2", "pvalue") + + # Data cells need to be changed to numeric (not factor), otherwise problems with flextable(). + WORD.table.FGT[c(2,3,4,5,6)] <- sapply(WORD.table.FGT[c(2,3,4,5,6)], as.character) # First convert to character .. + WORD.table.FGT[c(2,3,4,5,6)] <- sapply(WORD.table.FGT[c(2,3,4,5,6)], as.numeric) # .. then to numeric + + # Return function output -------------------------------------------------- + + return(WORD.table.FGT) + +} diff --git a/R/FPS_LMM_trialtype.Condition.r b/R/FPS_LMM_trialtype.Condition.r new file mode 100644 index 0000000..d2586de --- /dev/null +++ b/R/FPS_LMM_trialtype.Condition.r @@ -0,0 +1,40 @@ +# Supporting function for the analysis of the data from the Fear Generalization Task (FGT) in the SAM study +# Written by Milou Sep + +FGT_LMER_trialtype.Condition <- function(dataset){ + + { + ## 2way interactions ## + # Define Models + FullModel <-with(expr=lmer(FPS~1+Condition*trialtype+(1|pp), control=lmerControl(optimizer="bobyqa")),data = dataset, REML=F) + Main_NO_2way<-with(expr=lmer(FPS~1+Condition+trialtype+(1|pp),control=lmerControl(optimizer="bobyqa")),data = dataset, REML=F) + # Compare Models + Condition.trialtype <- pool.compare(FullModel, Main_NO_2way, method='wald') + + ## Main effects ## + # Define Models + Main_Condition<-with(expr=lmer(FPS~1+trialtype+(1|pp),control=lmerControl(optimizer="bobyqa")),data = dataset, REML=F) + Main_trialtype<-with(expr=lmer(FPS~1+Condition+(1|pp),control=lmerControl(optimizer="bobyqa")),data = dataset, REML=F) + # Compare Models + Condition <- pool.compare(Main_NO_2way,Main_Condition, method='wald') + trialtype <- pool.compare(Main_NO_2way,Main_trialtype, method='wald') + } + + # Table of model comparisons (Publication) -------------------------------- + twoway.C.T <- c("Condition x Trialtype", Condition.trialtype$Dm, Condition.trialtype$rm, Condition.trialtype$df1, Condition.trialtype$df2, Condition.trialtype$pvalue) + Main.effect.Condition <- c("Condition", Condition$Dm, Condition$rm, Condition$df1, Condition$df2, Condition$pvalue) + Main.effect.trialtype <- c("Trialtype", trialtype$Dm, trialtype$rm, trialtype$df1, trialtype$df2, trialtype$pvalue) + + WORD.table.FGT <- data.frame(rbind(twoway.C.T , + Main.effect.Condition, Main.effect.trialtype)) + colnames(WORD.table.FGT) <- c("Model", "Dm", "rm", "df1", "df2", "pvalue") + + # Data cells need to be changed to numeric (not factor), otherwise problems with flextable(). + WORD.table.FGT[c(2,3,4,5,6)] <- sapply(WORD.table.FGT[c(2,3,4,5,6)], as.character) # First convert to character .. + WORD.table.FGT[c(2,3,4,5,6)] <- sapply(WORD.table.FGT[c(2,3,4,5,6)], as.numeric) # .. then to numeric + + # Return function output -------------------------------------------------- + + return(WORD.table.FGT) + +} diff --git a/R/FPS_LM_Condition.r b/R/FPS_LM_Condition.r new file mode 100644 index 0000000..8ae6aa2 --- /dev/null +++ b/R/FPS_LM_Condition.r @@ -0,0 +1,26 @@ +# Supporting function for the analysis of the data from the Fear Generalization Task (FGT) in the SAM study +# Written by Milou Sep + +FGT_LM.Condition <- function(dataset){ + + # Compare Conditions + Base<-with(expr=lm(FPS~1),data = dataset) + Main_Condition<-with(expr=lm(FPS~1+Condition),data = dataset) + # Compare models + Condition <- pool.compare(Main_Condition, Base, method='wald') + + # Make vectors for data in each row + Main.effect.Condition <- c("Condition", Condition$Dm, Condition$rm, Condition$df1, Condition$df2, Condition$pvalue) + + WORD.table.FGT <- data.frame(rbind( Main.effect.Condition)) + colnames(WORD.table.FGT) <- c("Model", "Dm", "rm", "df1", "df2", "pvalue") + + # Data cells need to be changed to numeric (not factor), otherwise problems with flextable(). + WORD.table.FGT[c(2,3,4,5,6)] <- sapply(WORD.table.FGT[c(2,3,4,5,6)], as.character) # First convert to character .. + WORD.table.FGT[c(2,3,4,5,6)] <- sapply(WORD.table.FGT[c(2,3,4,5,6)], as.numeric) # .. then to numeric + + # Return function output -------------------------------------------------- + + return(WORD.table.FGT) + +} diff --git a/R/FPS_imputation.R b/R/FPS_imputation.R new file mode 100644 index 0000000..e383cdf --- /dev/null +++ b/R/FPS_imputation.R @@ -0,0 +1,279 @@ +# Imputation Fear potentiated startle (EMG eyeblink) data from the Fear Generalization Task (FGT) in the SAM study +# Written by Rosalie Gorter & Milou Sep. + +# NOTE: The function "Impute_FGT_EMG_SAM" is used twice in this script: +# 1) to impute individual trials (set sorttype to 'trials'). +# - Note: trials will be imputed if more than 1/3 of the trials in a category is present (in other words if <2/3 missing), +# if less than 1/3 is present (in other words if >2/3 is missing; `missing code 4`) all the trials (for that category) will be set to missing +# 2) to create imputed means (set sorttype to 'mean') +# - Note: means will be based on imputed trials if more than 2/3 of trials is present (in other words if <1/3 missing; `missing code 1`), +# or imputed directly if less than 2/3 of the trials is present (or in other words if >1/3 missing; `missing code 2` ). + +#install.packages("mice") +library("mice") +# install.packages("readr") +library(readr) + +# Read Data --------------------------------------------------------------- +# (Note: Only valid digital Biopac cues analyzed) +FGT_batch <- read_delim("data/SAM_FGT.csv", ";",locale=locale(decimal_mark = ","), escape_double = FALSE, trim_ws = TRUE, na = c("NaN","5555","8888","9999")) +# Note Missing codes (assigned via Matlab preprocessing code): +# 5555 % Digital registration error, message on screen & MissingValue 5555. +# 8888 % Excessive baseline activity. (More than 2 SD deviation) +# 9999 % Latency onset not valid (Note, this only affects onset scores, so not present in magnitude variables) + +# Prepare Data ------------------------------------------------------------ +# remove "SMGH" from subject names to use numbers on x-as (& in subset) +FGT_batch$subjName <- gsub("SMGH","", FGT_batch$subjName) +# Make Condition Factor +FGT_batch$Condition <- as.factor(FGT_batch$Condition) +# Subset all Unstandardized startle magnitude variables (Umag) +Umag <- subset.data.frame(FGT_batch, select = c(grep("Umag", names(FGT_batch)),Condition)) +# Note! Imputation now only done on Umag data (which is better than standardized data for LMM analyses). The dataset also contains Standardized startle magnitude variables (Smag), Peak latencies () and Onset Latencies are also available in FGT_batch. +# This script could also be used to impute Standardized FPS responses. +# Smag <- subset.data.frame(FGT_batch, select = c(grep("Smag", names(FGT_batch)),Condition)) + +# Imputation Settings ----------------------------------------------------- +M<-100 +MAXIT<-50 + +# Call imputation script -------------------------------------------------- + +# Impute separate trials (used for Habituation-trials, Cue-trials, Context-trials) +out_Umag_trials<-Impute_FGT_EMG_SAM(data=Umag, M=M, MAXIT=MAXIT, sorttype='trials') # call function below +save(out_Umag_trials, file = 'processed_data/OUPUT_IMPUTATIE_FGT_out_M50_MAXIT100_Umag_Trials.rda') # save Imputed datasets + +# Impute means (used for inter-trial intervals) +out_Umag_all<-Impute_FGT_EMG_SAM(data=Umag, M=M, MAXIT=MAXIT, sorttype='mean') # call function below +save(out_Umag_all, file = 'processed_data/OUPUT_IMPUTATIE_FGT_out_M50_MAXIT100_Umag_AllMeans.rda') # save Imputed datasets + +# Export original data ------------------------------------------------------ +# Add subjectname to dataset (was removed for imputations, but required for further data analyses) +Umag$subject <- as.numeric(FGT_batch$subjName) +# Save input data to imputation file +save(Umag, file = 'processed_data/INPUT_IMPUTATIE_FGT_Umag.rda') # Raw dataset (needed to select `complete cases` for midsobject) + + +# Imputation Function Definition ------------------------------------------ +Impute_FGT_EMG_SAM<-function(data,M,MAXIT,sorttype,...){ + + # DEFINE VARIABLE PATTERNS & NAMES ---------------------------------------- + FGT_Variabels <- c('sA_Cue_.*_T', 'sA_Cue_.*_S', 'sA_Ctx_.*_T','sA_Ctx_.*_S', 'sA_ITI_', # Variables 1t/m5 + 'sA_HAB_', # Variable 6 + 'sA_HABctx_.*_T', 'sA_HABctx_.*_S','sA_HABctx_.*_N', # Variables 7,8,9 + 'sG_Cue_.*_T', 'sG_Cue_.*_S', 'sG_Cue_.*_N', # Variables 10,11,12 + # Variables with only 3 trials per category: + 'sG_Ctx_.*_T','sG_Ctx_.*_S', 'sG_Ctx_.*_N', 'sG_ITI_', # Variable 13, 14, 15, 16 + 'sG_HAB_' ) # Variable 17 [same method as variable 6] + FGT_Variabels_Names <- gsub(x=FGT_Variabels, pattern="_.*_", replacement='_', fixed=TRUE) # Note Fixed true is needed for r to 'interpret' .* as character and not as 'wildcard' + + # SELECT & RESTRUCTURE FOR IMPUTATION ------------------------------------- + restructure_FGTdata <-function(data_restruct, sorttype){ # Function to restructure data + # Make empty lists for the for-loop below + selected_FGT_data_allTrials <- list() + # In de for-loop below are all columns selected + # Select the columns that fit the variables in FGT_Variables vector + for(i in 1:length(FGT_Variabels)){ + this_data <- data_restruct[,(grep(FGT_Variabels[i], names(data_restruct)))] + selected_FGT_data_allTrials[[i]] <- this_data + } + # Name elements in lists + names(selected_FGT_data_allTrials) <- FGT_Variabels_Names + # Return variable + return(selected_FGT_data_allTrials) + } + + ## Call restructure script that is defined above ## + ldata <- restructure_FGTdata(data, sorttype) + + + # COUNT & RECODE MISSINGS ------------------------------------------------- + Recode_Missings_FGT<-function(sorttype){ + # Function to count & code missing values in FGT startle data. + + # 1) Specific for the imputation of means (sorttype = "all"): + # Note: Codes 0 = All present (0 missing values), 1 = Less than 1/3 missing ((or in other words if more than 2/3 present), 2 = More than 1/3 missing (or in other words if less than 2/3 present) + # Result: Code 1 creates means based on imputed trials, Code 2 indicates that the mean needs to be imputed directly. + if (sorttype == 'mean') { # If more than 1/3 missing: Code 2 + n.missings <- code.missings <- list() # make empty list + for (i in 1:length(ldata)) { + n.missings[[i]] <- rowSums(is.na(cbind(ldata[[i]][, 1:ncol(ldata[[i]])]) * 1)) # Note n.missing is the number of missing trials PER participant WITHIN one category (e.g. Acquisition threat) + code.missings[[i]] <- + ifelse(n.missings[[i]] > (1/3 * ncol(ldata[[i]])), 2, 1) # [Note: ifelse(test, yes, no)] + code.missings[[i]] <- + ifelse(n.missings[[i]] == 0, 0, code.missings[[i]])} + + # 2) Specific for the imputation of individual trials (sorttype = 'trials'): + # Note: Code 0 = All present (0 missing values); Code 4 = More than 2/3 missing (or in other words if less than 1/3 present) + # Result: Code 4 indicates that all trials will be set to missing; without code 4 (so less than 2/3 missing, or in other words more than 1/3 present) the individual trials are imputed. + } else if (sorttype == 'trials'){ + n.missings <- code.missings <- list() + for (i in 1:length(ldata)) { + n.missings[[i]] <- rowSums(is.na(cbind(ldata[[i]][, 1:ncol(ldata[[i]])]) * 1)) + code.missings[[i]] <- + ifelse(n.missings[[i]] > (2/3 * ncol(ldata[[i]])), 4, 1) # If more than 2/3 missing: Code 4 + code.missings[[i]] <- + ifelse(n.missings[[i]] == 0, 0, code.missings[[i]])} + } + + out <- list(code.missings, n.missings) + names(out) <- c("code", "n") + return(out) + } + + # Call function above. Note: `Missings` is a list of two with `Missings[[1]]` = code.missings and `Missings[[2]]` = n.missings + Missings <- Recode_Missings_FGT(sorttype) + + # Save trials & missings in list and as file + data_missings_trials<-list(ldata,Missings$n,Missings$code) + names(data_missings_trials)<-c("ldata","n.missings","code.missings") + saveRDS(data_missings_trials, "processed_data/data_missing_trials.rda") + + + if(sorttype == 'mean'){ # If imputation should create imputed means: + + # CALCULATE MEAN STARTLES ------------------------------------------------- + mean.startle<-matrix(unlist(lapply(ldata,rowMeans,na.rm=T)),nrow=nrow(data)) # True = omit missing values van calculations + mean.startle.completecases<-matrix(unlist(lapply(ldata,rowMeans,na.rm=F)),nrow=nrow(data)) # False = do not omit missing values van calculations [result: if a missing value is present, the mean is missing] + # Create & Assign variable names for means + FGT_Variabels_Names_Means <- paste0("mean_",FGT_Variabels_Names) + colnames(mean.startle) <- FGT_Variabels_Names_Means + colnames(mean.startle.completecases) <- FGT_Variabels_Names_Means + + # SAVE MEANS -------------------------------------------------------------- + #save data in list and in file + data_missings_means<-list(ldata,Missings$n,Missings$code,mean.startle) + names(data_missings_means)<-c("ldata","n.missings","code.missings","mean.startle") + saveRDS(data_missings_means, "processed_data/data_missings_means_all.rda") + + # PREPARE DATA FOR IMPUTATION --------------------------------------------- + # generate dataset with only means based on more than 2/3 of trials (pulses) + for(i in 1:length(ldata)){ + for(j in 1:nrow(data)){ + if(Missings$code[[i]][j]==2){mean.startle[j,i]<-NA} # change the means that are based on less than 2/3 of trials (pulses) (code 2) to NA + } + } + # Add Condition to the matrix with means + mean.startle.c<-cbind(mean.startle,data$Condition) + colnames(mean.startle.c)[length(colnames(mean.startle.c))]<-"Condition" + + } + + + # MAKE PREDICTION MATRIX -------------------------------------------------- + # Note: the imputation on all trials is performed before trials are sorted in categories (so applicable to sorttype = 'trials' and sorttype = 'mean') + pred_allpulses = (1 - diag(1, ncol(data))) + rownames(pred_allpulses) <- colnames(data) + colnames(pred_allpulses) <- colnames(data) + pred_allpulses["Condition", 1:ncol(pred_allpulses)] <- 0 + pred_allpulses[1:ncol(pred_allpulses)-1, "Condition"] <- 1 + + # Prediction matrix for imputation of means: specific for sorttype = 'mean' (not needed if sorttype = 'trials'). + if (sorttype == 'mean'){ + pred_means = (1 - diag(1, ncol(mean.startle.c))) + pred_means[18, 1:ncol(pred_means)] <- 0 # The number (18) indicate the variable location + 1 (because condition was added to matrix) + pred_means[1:ncol(pred_means)-1, 18] <- 1 + } + + # IMPUTATION -------------------------------------------------------------- + ## Trials + Startle_imputatie_pulses <- mice(data=data, + pred=pred_allpulses, + m = M, + maxit = MAXIT, + seed=356) + saveRDS(Startle_imputatie_pulses, "startle_pulses_imp") + ## Means + if (sorttype == 'mean'){ + Startle_imputatie_means <- mice(mean.startle.c, + pred=pred_means, + m = M, + maxit = MAXIT, + seed=356) + saveRDS(Startle_imputatie_means, "startle_means_imp") + } + + # MERGE DATA -------------------------------------------------------------- + dataout <- list() # Empty list + for(m in 1:M) { # number of Imputed datasets + + ## Create 3 complete datasets, and add suffix _i to all variable names: + + # 1) Dataset with imputed trials (pulses) (for mean calculations & individual trials dataset) + Imputed_Pulses_complete <- complete(Startle_imputatie_pulses, m,include = F) # Note include = T, would include original data with missing values included. + colnames(Imputed_Pulses_complete) <- paste0(names(Imputed_Pulses_complete),"_i") + + # 2) Only when sorttype = 'mean' + if (sorttype == 'mean'){ + # 2A) Dataset with imputed means + Imputed_Means_complete <- complete(Startle_imputatie_means, m,include = F) + colnames(Imputed_Means_complete) <- paste0(names(Imputed_Means_complete),"_i") + # 2B) Dataset with calculated means based on dataset 1: the imputed trials (pulses) + ldataIMP <- restructure_FGTdata(Imputed_Pulses_complete, sorttype) # Imputed data sorted according to sorttype, using the 'restructure function' + Means_based_on_Imputed_Pulses<-list() # Create empty list + for (i in 1:length(ldataIMP)) { # fill empty mean list + this_mean <- rowMeans( cbind(ldataIMP[[i]]) ,na.rm=TRUE) # Calculate row means for all elements in ldataIMP + Means_based_on_Imputed_Pulses <- as.data.frame(cbind(Means_based_on_Imputed_Pulses, this_mean))} # add row mean to dataframe + # Add column names to dataset 2B + if (sorttype == 'mean'){colnames(Means_based_on_Imputed_Pulses) <- paste0(FGT_Variabels_Names_Means,"_i")} + ## Create Merged means dataset. Note this dataset contains the appropriate mean (either from dataset 2A or 2B), based on the Missing$Code. + mergedmeans <- data.frame (matrix(0,nrow(data), length(ldata))) # Make empty matrix + # Add column names + suffix 'merged' + if (sorttype == 'mean'){colnames(mergedmeans) <- paste0(FGT_Variabels_Names_Means,"_merged")} + # Fill the `mergedmeans` matrix with the appropriate data, based on missing codes. + for(i in 1:length(ldata)){ + for(j in 1:nrow(data)){ + if(Missings$code[[i]][j]==1){mergedmeans[j,i]<-Means_based_on_Imputed_Pulses[j,i]} # if less than 1/3 Missing (or 1/3 missing) + if(Missings$code[[i]][j]==0){mergedmeans[j,i]<-Imputed_Means_complete[j,i]} # 0 missing + if(Missings$code[[i]][j]==2){mergedmeans[j,i]<-Imputed_Means_complete[j,i]} # if more than 1/3 missing + } + } + dataout[[m]] <- mergedmeans + + # 3) Only when sorttype = 'trials' + } else if (sorttype == 'trials'){ + # 3A) dataset with separate imputed trials, corrected for the `2/3 missing rule`, for analyses with "trials" as factor. + # Restructure imputed pulses (trials) (of imputed dataset m) + ldataIMP <- restructure_FGTdata(Imputed_Pulses_complete, sorttype) # Imputed data sorted according to sorttype, using the 'restructure function' (output in list) + # copy data to new list before transformations + ldataIMP_corrected <- ldataIMP + # Loop over all variable categories (the different elements in ldataIMP, 17 in total), e.g. threat acquisition, safe acquisition etc. + for (i in 1:length(ldataIMP)){ # length ldataIMP = 17 + # Loop per variable category over all participants + for (j in 1:nrow(data)){ # nrow(data) = 117 + # Check per variable category i for each participant j if the data (in that variable category) needs recoding. + # Missing Codes: 0 = All present, 4 = More than 2/3 missing + if(Missings$code[[i]][j]==4){ # when more than 2/3 of the trials in this category is missing + ldataIMP_corrected[[i]][j,] <- rep(NA,length(ldataIMP[[i]])) # rownumber j in dataset i, replaced by NA's for all trials (= length ldataIMP[[i]]) + # For detailed checking of missing codes (if required) + # Missings$code[[1]]# a vector with a missing code for each participant in Threat acquisition (dataset 1) trials + # Missings$code[[1]][4] # a vector with a missing code for participant 4 in Threat acquisition (dataset 1) trials + } + } + } + + # 3B Convert list, to dataframe + All.Trials.MissingCorrected<-cbind(data.frame(matrix(unlist(ldataIMP_corrected),nrow = 117)), Imputed_Pulses_complete$Condition_i) + colnames(All.Trials.MissingCorrected) <- colnames(Imputed_Pulses_complete) # add names + # Store in output list + dataout[[m]]<-All.Trials.MissingCorrected + } + + } + + + # CREATE OUTPUT ----------------------------------------------------------- + if (sorttype != 'trials'){ + out<-list(dataout,mean.startle.completecases) + names(out)<-c("MergedMeans_imputed","Means_Completecases") + } else if (sorttype == 'trials'){ + out<-list(dataout, data) + names(out)<-c("Trials_imputed","Trials_Original") + } + + # Save output in the processed_data folder + save(out, file = 'processed_data/OUPUT_IMPUTATIE_FGT_out_endofscript.rda') # Imputed datasets + + return(out) + +} diff --git a/R/FPS_mids_Quality.R b/R/FPS_mids_Quality.R new file mode 100644 index 0000000..409737d --- /dev/null +++ b/R/FPS_mids_Quality.R @@ -0,0 +1,271 @@ +# Create MIDS-objects from the imputed startle data (EMG) of the Fear Generalization Task (FGT) in the SAM study, and check the imputation quality. +# Written by Milou Sep + +# This script can be used to create MIDS objects (a datatype form the mice package) from the output of the FGT imputation script: "FPS_imputation.R" +# The central function in this script is "MAKE_MIDS_Imputed_FGT_EMG_SAM", in which the data is sorted, factors are created and mids objects is formed +# Note This script works with both output of sorttype 'mean' and 'trials' and always creates separate mids objects for day1 and day2. + +# For information on the imputation quality checks: https://www.kaggle.com/captcalculator/imputing-missing-data-with-the-mice-package-in-r. + +# * Load required packages -------------------------------------------------- +# install.packages("mice"); install.packages("car"); utils::install.packages("miceadds", dependencies = TRUE); install.packages("reshape2") +library(mice) +library(car) +library(miceadds) +library(reshape2) +library(stringr) # Split variable name in multiple columns (info from: https://stackoverflow.com/questions/4350440/split-data-frame-string-column-into-multiple-columns) + +# * Load required files ----------------------------------------------------- +# Load raw data (umag): +load(file = 'processed_data/INPUT_IMPUTATIE_FGT_Umag.rda', envir = .GlobalEnv, verbose = FALSE) +# Load imputed data umag (Note M=100 and MAXIT=50): +load(file = 'processed_data/OUPUT_IMPUTATIE_FGT_out_M50_MAXIT100_Umag_Trials.rda', envir = .GlobalEnv, verbose = FALSE) +load(file= "processed_data/OUPUT_IMPUTATIE_FGT_out_M50_MAXIT100_Umag_AllMeans.rda", envir = .GlobalEnv, verbose = FALSE) + +# Check amount of missing data -------------------------------------------- +summary(Umag) +n.missing<-sum(is.na(Umag)) +# total number of observations +n.observations<-nrow(Umag)*(ncol(Umag)-2) # Note: -2 to remove subject en condition variables from the count +# percent.missing +(n.missing/n.observations)*100 # = 10.39886 + +# Number of missing values in the imputed trials datasets +(sum(is.na(out_Umag_trials$Trials_imputed[[1]]) ) / n.observations) *100 # = 7,13141 +summary(out_Umag_trials$Trials_imputed[[1]]) + +# Number of missing values in the imputed means (based on all trials) datasets +summary(out_Umag_all$Means_Completecases) # Note data with missing values +aantal_missings_m.all<-sum(is.na(out_Umag_all$Means_Completecases)) +observations_m.all <- nrow(out_Umag_all$Means_Completecases) * ncol(out_Umag_all$Means_Completecases) +(aantal_missings_m.all/observations_m.all) * 100 # = 24.13273% +# To check +summary(out_Umag_all$MergedMeans_imputed[[1]]) # imputed data, no missing values + + +# Definition of Function Make Mids" -------------------------------------- +MAKE_MIDS_Imputed_FPS <- function(input_data, output_data, M, var_pattern, within.factors, number.trialtypes, iti.mean){ + # This function can be used to select FPS responses to cue or context stimuli in the acquisition and test phase of the FGT task + # Selected data is changed to long format and factors trialtype and trial will be added to the dataset based on original variable names + # Note 1: number.trialtypes is 2 if only threat and safe trials are present and 3 if threat, safe and new trials are present. + # Note 2: M is the number of imputations + + # Make data.frame with (Subject Name) & (experimental) Condition + Subject_Condition <- data.frame(factor(input_data$subject), + factor(input_data$Condition, levels=c("1","2","3"), + labels=c("Delayed Stress", "Direct Stress", "No Stress"))) + colnames(Subject_Condition) <- c("pp", "Condition") + + # To check if condition row's are identical (if they are both 'factor') + # identical(factor(Subject_Condition$Smag.Condition), factor(Subject_Condition$output_data..1....m...Condition_i)) # Yes, so both can be used in later dataset + + Data_List<-list() # Define empty list (for main mids object) + + for(m in 0:M){ # Loop over m is 1 t/m 101 (including the complete cases dataset, the original data, and 100 imputed datasets) + + # Select either Complete Case data (if m is 0) ... + if (m==0){ Data <- output_data[[2]][ ,grep(pattern=var_pattern, x=names(data.frame(output_data[[2]])))] # Note data.frame() required for names(). + # ... or a imputed datasets (if m is NOT 0). + } else if (m!=0) {Data <- output_data[[1]][[m]][ ,grep(pattern=var_pattern, x=names(output_data[[1]][[m]]))]} + + # Bind data with Subject name & Experimental condition... + Data <- cbind(Subject_Condition, Data) + # ... and change data to long format + Data_Long <- melt(Data, id.vars = c("pp", "Condition"), value.name = 'FPS') + + + # If the individual trials are analyzed: iti.mean = 0 (so no factors need to be created) + if(iti.mean == 0){ + + # Split the variable name in 5 elements on "_", add the 4th element that is created (e.g. "T1" or "S4") as variable "TrialCode" to `datalong`... + variable.codes<-data.frame(stringr::str_split_fixed(string=Data_Long$variable, pattern="_",n=5)) + ## To check results of 'str_split_fixed' for cue acquisition trials: + # unique(variable.codes[,1]) # sA [ for cue acq all & EL: Mean] + # unique(variable.codes[,2]) # cue [ for cue acq all & EL: sA] + # unique(variable.codes[,3]) # Umag [ for cue acq all & EL: cue] + # unique(variable.codes[,4]) # T1, T2, T3, ..etc S1, S2, S3 etc. [ for cue acq all & EL: T & S, for ITI E&L is this empty] + # unique(variable.codes[,5]) # i [for cue acq all: merged, for cue acq EL: contains E&L, for ITI EL this is E&L} + + # ... and trialtype too: + if("trialtype" %in% within.factors){ + # If both trialtype and trial are BOTH within factors, which is only the case in datasets for analyses + # of the acquisition and test phase of cued and contextual FPS, variable.code 4 needs to be divided in letters & digits + # (for info see https://stackoverflow.com/questions/9756360/split-character-data-into-numbers-and-letters) + # Numbers (recognized by pattern: "[[:digit:]]") are replaced by nothing (""), as a result only letters remain. + Data_Long$trialtype<-factor(gsub("[[:digit:]]","", variable.codes[,4])) # Note: gsub(pattern, replacement, x) + # Idem, only here are 'not numbers' (so the letters) replaced by nothing (""). + Data_Long$trial<-factor(gsub("[^[:digit:]]","", variable.codes[,4])) + + # If trialtype is a within factor, recode trialtypes Threat, Safe & New + # Recode trialtype & make factor + if (number.trialtypes == 2){ + # Recode T en S to 0 en 1 + Data_Long$trialtype<-recode(var=Data_Long$trialtype, recodes="'T'= 0; 'S'= 1") + # Make trialtype factor. + Data_Long$trialtype <- factor(Data_Long$trialtype, levels = c("0","1"), labels = c("Threat", "Safe")) + } else if (number.trialtypes == 3) { + # Recode T en S en N to 0 en 1 en 2 + Data_Long$trialtype<-recode(var=Data_Long$trialtype, recodes="'T'= 0; 'S'= 1; 'N'=2") + # Make trialtype factor. + Data_Long$trialtype <- factor(Data_Long$trialtype, levels = c("0","1", "2"), labels = c("Threat", "Safe", "New")) + } + + # ... and if trialtype is NO within factor: + } else if (!("trialtype" %in% within.factors)){ + # If trialtype is not a within factor (and no means are analyzed), which is the case in analyses of HAB & ITI trials: + Data_Long$trial<-factor(variable.codes[,4]) + } + + } + + # To check the results of the data manipulations + # str(Data_Long) + # unique(Data_Long$trialtype) + + Data_List[[m+1]] <- Data_Long # Note m+1 is needed because M starts at 0 in this loop, but 0 does not indicate a position in the list. + } + + # Note do.call applies rbind() to on all list elements (M+1) + Data_Frame <- do.call(rbind,Data_List) + # Add column's .imp en .id (both necessary to make mids object, via as.mids()! + # Add code .imp (required to indicate the number of imputed data set, 0 = with missing values, 1 = imputed dataset 1 etc. (M)) + Data_Frame$.imp <- factor(c(rep(0:M,each=c(nrow(Data_List[[1]]))))) + # Add .id column (that indicates the row in the dataset) + Data_Frame$.id <- factor(c(1:nrow(Data_Frame))) + + # Create mids object(s) for overall dataframe.. + mids.data<-as.mids(long=Data_Frame, .imp=".imp", .id=".id") + + # Define Function output + # return(Data_List) # list with M+1 elements (dataframes), the first is the original data (with missing values) + # return(Data_Frame) # rows from the M+1 dataframes below each other in one dataframe + return(mids.data) # mids object of variable "Data_Frame" +} + +# Call script above, per trial type: + +# Acquisition Cued FPS ---------------------------------------------------- +Acquisition_Cued_FPS_mids_Trials <- MAKE_MIDS_Imputed_FPS( + input_data=Umag,output_data = out_Umag_trials, M=100, var_pattern = 'sA_Cue', + within.factors=c("trial", "trialtype"), number.trialtypes=2, iti.mean=0) +# Check quality imputations +print(densityplot(Acquisition_Cued_FPS_mids_Trials, ~FPS)) +bwplot(Acquisition_Cued_FPS_mids_Trials) +# MS & RG: OK + +# Acquisition Context FPS ------------------------------------------------- +Acquisition_Context_FPS_mids_Trials <- MAKE_MIDS_Imputed_FPS( + input_data=Umag, output_data = out_Umag_trials, M=100, var_pattern = 'sA_Ctx', + within.factors=c("trial", "trialtype"), number.trialtypes=2, iti.mean=0) +print(densityplot(Acquisition_Context_FPS_mids_Trials, ~FPS)) +bwplot(Acquisition_Context_FPS_mids_Trials) +# MS & RG: OK + +# Context Habituation ----------------------------------------------------- +HABctx_FPS_mids_Trials <- MAKE_MIDS_Imputed_FPS( + input_data=Umag, output_data = out_Umag_trials, M=100, var_pattern = 'sA_HABctx', + within.factors=c("trial", "trialtype"), number.trialtypes=3, iti.mean=0) +# No imputations for this trialtype. +# print(densityplot(HABctx_FPS_mids_Trials, ~FPS)) +# bwplot(HABctx_FPS_mids_Trials) +summary(complete(HABctx_FPS_mids_Trials)) # 72 missings (12 participants, 3 categories, 3trials per category) +# 12*2*3 +summary(Umag) # for 12 participants are all habctx trials missing (= more then 2/3 missing), so no imputations were performed. + +# Test Cued FPS ----------------------------------------------------------- +Test_Cued_FPS_mids_Trials <- MAKE_MIDS_Imputed_FPS( + input_data=Umag, output_data = out_Umag_trials, M=100, var_pattern = 'sG_Cue', + within.factors=c("trial", "trialtype"), number.trialtypes=3, iti.mean=0) +print(densityplot(Test_Cued_FPS_mids_Trials, ~FPS)) +bwplot(Test_Cued_FPS_mids_Trials) # Note, some high values in the original data, less in the imputed data. +# MS & RG: OK + +# Test Context FPS -------------------------------------------------------- +Test_Context_FPS_mids_Trials <- MAKE_MIDS_Imputed_FPS( + input_data=Umag, output_data = out_Umag_trials, M=100, var_pattern = 'sG_Ctx', + within.factors=c("trial", "trialtype"), number.trialtypes=3, iti.mean=0) +print(densityplot(Test_Context_FPS_mids_Trials, ~FPS)) +bwplot(Test_Context_FPS_mids_Trials) +# MS & RG: OK + +# Acquisition ITI (Trials)--------------------------------------------------------- +Acquisition_ITI_FPS_mids_Trials <- MAKE_MIDS_Imputed_FPS( + input_data=Umag,output_data = out_Umag_trials, M=100, var_pattern = 'sA_ITI_', + within.factors=c("trial"), number.trialtypes=0, iti.mean=0) +print(densityplot(Acquisition_ITI_FPS_mids_Trials, ~FPS )) +bwplot(Acquisition_ITI_FPS_mids_Trials) +# MS & RG: lot of variation between imputations (= quality insufficient), analyze imputed means of ITI (that have good quality imputations). + +# Acquisition ITI (Mean) +Acquisition_ITI_FPS_mids_MeanAll <- MAKE_MIDS_Imputed_FPS( +input_data=Umag, output_data=out_Umag_all, M=100, var_pattern = "sA_ITI", +within.factors=c(0), number.trialtypes=0, iti.mean=1) +print(densityplot(Acquisition_ITI_FPS_mids_MeanAll, ~FPS )) +bwplot(Acquisition_ITI_FPS_mids_MeanAll) +# MS & RG: OK + +# Test ITI ---------------------------------------------------------------- +Test_ITI_FPS_mids_Trials <- MAKE_MIDS_Imputed_FPS( + input_data=Umag,output_data = out_Umag_trials, M=100, var_pattern = 'sG_ITI_', + within.factors=c("trial"),number.trialtypes=0, iti.mean=0) +# No imputations for this trialtype +# print(densityplot(Test_ITI_FPS_mids_Trials, ~FPS )) +# bwplot(Test_ITI_FPS_mids_Trials) +summary(Test_ITI_FPS_mids_Trials) # for 3 participants, all ITI's were missing. This is more than 2/3, so no imputations were performed. +summary(Umag) + +# Test ITI (Mean) +Test_ITI_FPS_mids_MeanAll <- MAKE_MIDS_Imputed_FPS( + input_data=Umag, output_data=out_Umag_all, M=100, var_pattern = "sG_ITI", + within.factors=c(0), number.trialtypes=0, iti.mean=1) +print(densityplot(Test_ITI_FPS_mids_MeanAll, ~FPS )) +bwplot(Test_ITI_FPS_mids_MeanAll) +# MS & RG: OK + +# pre-Acquisition Habituation --------------------------------------------- +Acquisition_HAB_FPS_mids_Trials <- MAKE_MIDS_Imputed_FPS( + input_data=Umag, output_data = out_Umag_trials, M=100, var_pattern = 'sA_HAB_', + within.factors=c("trial"),number.trialtypes=0, iti.mean=0) +print(densityplot(Acquisition_HAB_FPS_mids_Trials, ~FPS )) +bwplot(Acquisition_HAB_FPS_mids_Trials) +# MS & RG: OK + +# pre-Test Habituation ---------------------------------------------------- +Test_HAB_FPS_mids_Trials <- MAKE_MIDS_Imputed_FPS( + input_data=Umag, output_data = out_Umag_trials, M=100, var_pattern = 'sG_HAB_', + within.factors=c("trial"),number.trialtypes=0, iti.mean=0) +print(densityplot(Test_HAB_FPS_mids_Trials, ~FPS )) +bwplot(Test_HAB_FPS_mids_Trials) +# MS & RG: OK + + +# CREATE OUTPUT ----------------------------------------------------------- + +# Put elements that can be analyses (so not imputed ITI trials) in a list and save +Umag.mids <-list( + Acquisition_HAB_FPS_mids_Trials, + HABctx_FPS_mids_Trials, + Acquisition_Cued_FPS_mids_Trials, + Acquisition_Context_FPS_mids_Trials, + # Acquisition_ITI_FPS_mids_Trials, + Acquisition_ITI_FPS_mids_MeanAll, + Test_ITI_FPS_mids_MeanAll, + # Test_ITI_FPS_mids_Trials, + Test_HAB_FPS_mids_Trials, + Test_Cued_FPS_mids_Trials, + Test_Context_FPS_mids_Trials) + +names(Umag.mids) <- c( + "Acq_HAB_Trials", + "Acq_HABCTX_Trials", + "Acq_Cued_Trials", + "Acq_Context_Trials", + # "Acq_ITI_Trials", + "Acq_ITI_Means", + "Test_ITI_Means", + # "Test_ITI_Trials", + "Test_HAB_Trials", + "Test_Cued_Trials", + "Test_Context_Trials") + +save(Umag.mids, file = 'processed_data/Umag.mids.28.01.19.rda') # Sorted Mids objects diff --git a/README.md b/README.md new file mode 100644 index 0000000..5cd2ea8 --- /dev/null +++ b/README.md @@ -0,0 +1,52 @@ +# SAM_FGT +Data Imputation and Analyses of Fear Generalization Task in SAM study, as described in: +- Sep, M.S.C., Gorter, R., van Ast, V.A., Joƫls, M., & Geuze, E. (2019) No Time-Dependent Effects of Psychosocial Stress on Fear Contextualization and Generalization: A Randomized-Controlled Study With Healthy Participants. Chronic Stress, 3, 247054701989654. https://doi.org/10.1177/2470547019896547 + +#### Step 1: Data +Datasets (available on Dataverse): +- `SAM_FGT.csv` contains fear-potentiated startle responses (FPS) during the FGT task +- `SAM_FGT_Amperage.csv` contains shock intensities used in the conditioning phase of the FGT, per participant. +- `SAM_questionnaires.sav` contains questionnaire information (including fear contingency scores) +- `SAM_Codes_Task_Protocol_Versions.csv` contains information on the task versions, per participant + +#### Step 2: Descriptive Statistics +`FGT_descritpvies.R` (in the 'R' folder) loads: +- `SAM_FGT.csv` and `SAM_FGT_Amperage.csv` for the description of shock intensities and missing values. +- `SAM_questionnaires.sav` and `SAM_Codes_Task_Protocol_Versions.csv` to prepare data for sensitivity analyses with fear contingency scores (saved as `participants.for.contingency.sensitivity.analyses.rda` in the folder `processed_data`) + +#### Step 3: Multiple Imputation (MI) of FPS +The `FPS_imputation.R` script (in the 'R' folder) loads `SAM_FGT.csv` and prepares the data for multiple imputation (the cleaned data is saved as `INPUT_IMPUTATIE_FGT_Umag.rda` in the folder `processed_data`). + +Perform MI via the function `Impute_FGT_EMG_SAM` to: + 1) to **impute individual trials** (set `sorttype` to 'trials'). + - Note: trials will be imputed *if more than 1/3 of the trials in a category is present* (in other words if <2/3 missing), if less than 1/3 is present (in other words if >2/3 is missing; `missing code 4`) all the trials (for that category) will be set to missing + 2) to **create imputed means** (set `sorttype` to 'mean'). + - Note: means will be based on imputed trials if *more than 2/3 of trials is present* (in other words if <1/3 missing; `missing code 1`), or imputed directly if less than 2/3 of the trials is present (or in other words, *if >1/3 missing*; `missing code 2` ). + +The **imputed datasets** are saved in the folder `processed_data` as: + 1) `OUPUT_IMPUTATIE_FGT_out_M50_MAXIT100_Umag_AllMeans.rda` + 2) `OUPUT_IMPUTATIE_FGT_out_M50_MAXIT100_Umag_Trials.rda` + +Note, the function `Impute_FGT_EMG_SAM` also saves the output of the script automatically with a generic name -`OUPUT_IMPUTATIE_FGT_out_endofscript.rda`- in the folder `processed_data`. + +#### Step 4: MI quality checks & data transformations +- `FPS_mids_Quality.R` (in the 'R' folder) creates a sorted `mids` object -`Umag.mids.28.01.19.rda`- from the imputed data in the folder `processed_data`, that is used for the analyses. + +#### Step 5: Assumptions linear mixed effect models (LMM): +The assumptions for LMM analyses are checked within each imputed dataset via `FPS_LMM_Assumptions_call.R` (in the 'R' folder). Note, this script loads `Umag.mids.28.01.19.rda` and renders the script `FPS_LMM_Assumptions_source.R` to a rmarkdown file for each outcome measure (files will appear in 'R' folder). + +#### Step 6: LMM analyses: +The LMM analyses are performed within each imputed dataset via `FPS_LMM_Analyses_call.R` (in the 'R' folder). This script loads `Umag.mids.28.01.19.rda` and sources: + +1) scripts with **analyses functions**: + - `FPS_LMM_Trialtype.Trial.Condition.r` + - `FPS_LMM_trialtype.Condition.r` + - `FPS_LMM_trial.Condition.R` + - `FPS_LMM_LM_Condition.r` + +2) a script to pool (and plot) **LMM estimates**: `FPS_LMM_pool_EMM.r` +3) a script to **transform `mids` objects**: `FGT_mids_transformations.R` +4) a script to **export results**: `FPS_LMM_Results_to_Word.R` + +#### Step 7: Data Visualization: +- Figures: `FPS_LMM_Results_to_Plot.R` diff --git a/SAM_FGT.Rproj b/SAM_FGT.Rproj new file mode 100644 index 0000000..43ea599 --- /dev/null +++ b/SAM_FGT.Rproj @@ -0,0 +1,13 @@ +Version: 1.0 + +RestoreWorkspace: No +SaveWorkspace: No +AlwaysSaveHistory: No + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: ASCII + +RnwWeave: Sweave +LaTeX: pdfLaTeX diff --git a/data/README.md b/data/README.md new file mode 100644 index 0000000..07eb636 --- /dev/null +++ b/data/README.md @@ -0,0 +1 @@ +Data can be downloaded from Dataverse diff --git a/processed_data/README.md b/processed_data/README.md new file mode 100644 index 0000000..1cf516e --- /dev/null +++ b/processed_data/README.md @@ -0,0 +1 @@ +Processed data will be saved here diff --git a/results/README.md b/results/README.md new file mode 100644 index 0000000..03cd2f2 --- /dev/null +++ b/results/README.md @@ -0,0 +1 @@ +Results, tables and figures will be saved here