-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathcreate-pair-links-double-entered.R
128 lines (118 loc) · 5.71 KB
/
create-pair-links-double-entered.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#' @name CreatePairLinks
#'
#' @aliases
#' CreatePairLinksSingleEntered
#' CreatePairLinksDoubleEntered
#' CreatePairLinksDoubleEnteredWithNoOutcomes
#'
#' @export CreatePairLinksSingleEntered CreatePairLinksDoubleEntered CreatePairLinksDoubleEnteredWithNoOutcomes
#'
#' @title Creates a pairs linking file.
#'
#' @description Creates a linking file for BG designs using this file structure (e.g., DF analysis, other ACE modeling).
#' DF analysis requires a double-entered file that contains the `R`
#' value for the pair, and their two outcome variable values.
#'
#' [CreatePairLinksDoubleEnteredWithNoOutcomes()] is intended to be a
#' primarily a helper function for [CreateSpatialNeighbours()].
#'
#' @usage
#'
#' CreatePairLinksDoubleEntered(
#' outcomeDataset,
#' linksPairDataset,
#' outcomeNames,
#' linksNames = c("ExtendedID", "R", "RelationshipPath"),
#' validateOutcomeDataset = TRUE,
#' subject1Qualifier = "_S1",
#' subject2Qualifier = "_S2"
#' )
#'
#' CreatePairLinksSingleEntered(
#' outcomeDataset,
#' linksPairDataset,
#' outcomeNames,
#' linksNames = c("ExtendedID", "R", "RelationshipPath"),
#' validateOutcomeDataset = TRUE,
#' subject1Qualifier = "_S1",
#' subject2Qualifier = "_S2"
#' )
#'
#' CreatePairLinksDoubleEnteredWithNoOutcomes(
#' linksPairDataset,
#' linksNames = c("ExtendedID", "R", "RelationshipPath")
#' )
#'
#' @param outcomeDataset A data frame containing the outcome variable(s)
#' @param linksPairDataset A data frame containing the `SubjectTag`s of
#' each subject in the pair and their `R` coefficient.
#' @param outcomeNames The column names of the outcome variable(s)
#' @param linksNames The column names desired to be present in the newly
#' created data frame. `SubjectTag_S1` and `SubjectTag_S2` are included
#' automatically.
#' @param validateOutcomeDataset Indicates if characteristics of the outcomeDataset should be validated.
#' @param subject1Qualifier Indicates how the outcome variable for the pair's first subject is distinguished from the other subject. The default is `_S1`.
#' @param subject2Qualifier Indicates how the outcome variable for the pair's second subject is distinguished from the other subject. The default is `_S2`.
#'
#' @author Will Beasley
#'
#' @references For more information about a DF analysis, see Rodgers, Joseph Lee, & Kohler, Hans-Peter (2005).
#' [Reformulating and simplifying the DF analysis model.](https://pubmed.ncbi.nlm.nih.gov/15685433/)
#' *Behavior Genetics, 35* (2), 211-217.
#'
#' @examples
#' dsSingleLinks <- data.frame(
#' ExtendedID = c(1, 1, 1, 2),
#' SubjectTag_S1 = c(101, 101, 102, 201),
#' SubjectTag_S2 = c(102, 103, 103, 202),
#' R = c(.5, .25, .25, .5),
#' RelationshipPath = rep("Gen2Siblings", 4)
#' )
#' dsSingleOutcomes <- data.frame(
#' SubjectTag = c(101, 102, 103, 201, 202),
#' DV1 = c(11, 12, 13, 41, 42),
#' DV2 = c(21, 22, 23, 51, 52)
#' )
#' dsDouble <- CreatePairLinksDoubleEntered(
#' outcomeDataset = dsSingleOutcomes,
#' linksPairDataset = dsSingleLinks,
#' outcomeNames = c("DV1", "DV2"),
#' validateOutcomeDataset = TRUE
#' )
#' dsDouble # Show the 8 rows in the double-entered pair links
#' summary(dsDouble) # Summarize the variables
#'
#' ValidatePairLinksAreSymmetric(dsDouble) # Should return TRUE.
CreatePairLinksDoubleEntered <- function(
outcomeDataset, linksPairDataset, outcomeNames,
linksNames = c("ExtendedID", "R", "RelationshipPath"), validateOutcomeDataset = TRUE,
subject1Qualifier = "_S1", subject2Qualifier = "_S2") {
ValidatePairLinks(linksPairDataset)
if (validateOutcomeDataset) {
ValidateOutcomeDataset(dsOutcome = outcomeDataset, outcomeNames = outcomeNames)
}
dsLinksLeftHand <- base::subset(linksPairDataset, select = c("SubjectTag_S1", "SubjectTag_S2", linksNames)) #' Lefthand' is my slang for Subjec1Tag is less than the SubjectTag_S2
dsLinksRightHand <- base::subset(linksPairDataset, select = c("SubjectTag_S1", "SubjectTag_S2", linksNames))
base::colnames(dsLinksRightHand)[base::colnames(dsLinksRightHand) == "SubjectTag_S1"] <- "SubjectTempTag"
base::colnames(dsLinksRightHand)[base::colnames(dsLinksRightHand) == "SubjectTag_S2"] <- "SubjectTag_S1"
base::colnames(dsLinksRightHand)[base::colnames(dsLinksRightHand) == "SubjectTempTag"] <- "SubjectTag_S2"
dsOutcomeSubject1 <- base::subset(outcomeDataset, select = c("SubjectTag", outcomeNames))
dsOutcomeSubject2 <- base::subset(outcomeDataset, select = c("SubjectTag", outcomeNames))
for (j in seq_along(dsOutcomeSubject1)) {
columnName <- base::colnames(dsOutcomeSubject1)[j]
if (columnName %in% outcomeNames) {
base::colnames(dsOutcomeSubject1)[colnames(dsOutcomeSubject1) == columnName] <- base::paste0(columnName, subject1Qualifier)
base::colnames(dsOutcomeSubject2)[colnames(dsOutcomeSubject2) == columnName] <- base::paste0(columnName, subject2Qualifier)
}
}
dsLeftHand <- base::merge(x = dsLinksLeftHand, y = dsOutcomeSubject1, by.x = "SubjectTag_S1", by.y = "SubjectTag", all.x = TRUE)
dsLeftHand <- base::merge(x = dsLeftHand, y = dsOutcomeSubject2, by.x = "SubjectTag_S2", by.y = "SubjectTag", all.x = TRUE)
dsRightHand <- base::merge(x = dsLinksRightHand, y = dsOutcomeSubject2, by.x = "SubjectTag_S2", by.y = "SubjectTag", all.x = TRUE)
dsRightHand <- base::merge(x = dsRightHand, y = dsOutcomeSubject1, by.x = "SubjectTag_S1", by.y = "SubjectTag", all.x = TRUE)
base::rm(dsLinksLeftHand, dsLinksRightHand, dsOutcomeSubject1, dsOutcomeSubject2)
ds <- base::rbind(dsLeftHand, dsRightHand) #' RowBind' the two datasets
firstTwoNames <- c("SubjectTag_S1", "SubjectTag_S2")
remaining <- base::setdiff(colnames(ds), firstTwoNames)
ds <- ds[, c(firstTwoNames, remaining)]
return(ds)
}