-
Notifications
You must be signed in to change notification settings - Fork 0
/
centerfactor.R
121 lines (105 loc) · 4.1 KB
/
centerfactor.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
centerfactor <- function(x,referencelevels=1:(numlevels-1)) {
# Centers the Helmert coding of a factor and returns the
# appropriate contrast coding
#
# sample use:
# contrasts(Alice$BEFOREKP) = centerfactor(Alice$BEFOREKP)
#
# The 2nd parameter, REFERENCELEVELS, controls which level is the
# reference level for each of the contrasts. There should be
# k-1 reference levels, where k is the number of levels of the factor.
#
# Reference levels may either be specified with the numerical indices
# of the factor levels, or the names of the levels.
#
# Suppose the three levels of MyData$CUETYPE were 'InvalidCue',
# 'NoCue', and 'ValidCue' (N.B. R puts factor levels in alphabetical
# order by default).
# Then you could do:
# contrasts(MyData$CUETYPE) = centerfactor(MyData$CUETYPE,c(2,1))
# or:
# contrasts(MyData$CUETYPE) = centerfactor(MyData$CUETYPE,
# c('NoCue','InvalidCue'))
#
# Contrast 1 would compare ValidCue and InvalidCue against NoCue,
# and Contrast 2 would then compare ValidCue against InvalidCue
#
# The reference level always get the negative weight. This decision
# DOES NOT affect your statistical tests, just the sign
# of the parameter estimate.
#
# DISCLAIMER: I've used this on a number of my datasets and it works
# properly, but I'm sure I haven't tested it on every possible design.
# It would be a good idea to check the contrast codes set by the
# function to make sure they are sensible.
#
# 06.19.10 - S.Fraundorf - first version
# 08.08.10 - S.Fraundorf - excludes NAs. more efficient
# processing of default value
# 08.12.10 - S.Fraundorf - bugfixes, supports factor w/ 3 levels!
# 12.21.10 - S.Fraundorf - supports any # of levels! can take
# level NAMES as input. checks to make
# sure the right # of ref. levels specified
# 01.31.11 - S.Fraundorf - fixed a bug when there were >2 levels and
# each had a different number of observations
# 07.12.11 - S.Fraundorf - return error w/ >3 levels since I haven't
# got this working
# if this is not a factor
if (is.factor(x) == FALSE) {
message = paste('converted into a factor from',class(x),sep=' ')
warning(message)
x = as.factor(x)
}
# remove NA values
x <- na.omit(x)
# get number of levels
numlevels = length(levels(x))
numcontrasts = numlevels - 1
# get the number of observation at each level
numobs <- summary(x)
# get the TOTAL number of observations
totalobs <- sum(numobs)
# right now, only supports up to 3 levels
if (numlevels > 3) {
stop('centerfactor only supports up to 3 levels right now')
}
# check to make sure the right number of reference levels were specified
if (length(referencelevels) != numcontrasts) {
# if not, display error message and stop
message = paste('Wrong number of reference levels! There are',
numcontrasts, 'contrast(s) needed, but', length(referencelevels),
'reference level(s) specified', sep= ' ')
stop(message)
}
# convert the reference levels from characters to numbers, if needed
if (is.character(referencelevels)) {
newreflevels = vector(length=numcontrasts)
for (i in 1:numcontrasts) {
matches = which(levels(x) == referencelevels[i])
if (length(matches) == 0) {
# no matches
message = paste('Factor does not have a level named',
referencelevels[x], sep = ' ')
stop(message)
}
newreflevels[i] <- matches[1]
}
referencelevels <- newreflevels
}
# set up the contrast matrix
mymatrix = matrix(nrow=numlevels,ncol=numcontrasts)
# do each contrast
for (i in 1:numcontrasts) {
# previous reference levels - to be zeroed out
prevreferencelevels <- referencelevels[1:i-1]
mymatrix[prevreferencelevels,i] <- 0
# current reference level - always a -.5
mymatrix[referencelevels[i],i] <- -.5
# positive weighted levels:
mymatrix[is.na(mymatrix[,i]),i] <- .5 # was <- posweight
# mean-center
contrastmean <- (sum(numobs * mymatrix[,i]))/totalobs
mymatrix[,i] <- mymatrix[,i] - contrastmean
}
return(mymatrix)
}