-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path2016pifgi2_timelapse3.Rmd
218 lines (194 loc) · 9.77 KB
/
2016pifgi2_timelapse3.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
---
title: "2016-2017 pifgi2 timelapse"
author: "Kazu Nozue"
date: "Feb_9,2017"
output: html_document
---
# pif3 gi2 hypocotyl timelapse growth analyis
# gi2 pif4 pif5 hyp (second experiments; exp4, 5, 7)
## written by Kazunari Nozue (Maloof lab)
## collaboration with Marian (Kay lab)
### history
* for exp4, exp5, and exp6 (pif4/5 vs gi2) (Dec 2016;Jan 12, 2017
### outline
### prerequisite
```{r}
library(ggplot2)
# functions used in this script
## vlookup
#Vlookup in R (https://gist.github.com/jnmaloof/7367450)
#Version 0.3 November 12, 2013
#Return senesical results if return column is a factor
#Version 0.2 November 11, 2013
#Require first column of table to be numeric if range lookup is being done
#Change defaults to larger=FALSE
#Julin Maloof
vlookup <- function(ref, #the value or values that you want to look for
table, #the table where you want to look for it; will look in first column
column, #the column that you want the return data to come from,
range=FALSE, #if there is not an exact match, return the closest?
larger=FALSE) #if doing a range lookup, should the smaller or larger key be used?)
{
if(!is.numeric(column) & !column %in% colnames(table)) {
stop(paste("can't find column",column,"in table"))
}
if(range) {
if(!is.numeric(table[,1])) {
stop(paste("The first column of table must be numeric when using range lookup"))
}
table <- table[order(table[,1]),]
index <- findInterval(ref,table[,1])
if(larger) {
index <- ifelse(ref %in% table[,1],index,index+1)
}
output <- table[index,column]
output[!index <= dim(table)[1]] <- NA
} else {
output <- table[match(ref,table[,1]),column]
output[!ref %in% table[,1]] <- NA #not needed?
}
dim(output) <- dim(ref)
output
}
```
# data formating
```{r}
# setup working directory
if(Sys.info()["user"]=="nozue") {
homedir<-"/Volumes/data_work/Data6/data_JM4/2016pif3gi2_timelapse"
} # customize homedir
setwd(homedir)
#
# reading file time stamp
## This file was created in MSDOS (Celia cam computer) by "dir" command
filetime<-read.table("2016pifgi2filetime2s.txt",skip=8)#,nrows=869)
head(filetime)
tail(filetime)
# reading file name used in exp1
exp4.filename<-read.table("exp4.file.txt",skip=1) # 540 files
# add 336 to 1 (frmae number in exp1 avi )
exp4.filename$frame<-1:nrow(exp4.filename)
# reading file name used in exp5
exp5.filename<-read.table("exp5.file.txt",nrow=659) # skip the last line "exp5.file.txt"
#exp2.filename$frame<-1:nrow(exp2.filename) # is this true? no
exp5.filename.number<-gsub("(exp4000)([[:digit:]]+)(.jpg)","\\2",exp5.filename[,1])
exp5.filename$frame<- as.integer(exp5.filename.number) - min(as.integer(exp5.filename.number))+1 # more general formula
# reading file name used in exp6
exp6.filename<-read.table("exp6.file.txt",nrow=1061) # skip the last line "exp6.file.txt"
exp6.filename.number<-gsub("(exp5000)([[:digit:]]+)(.jpg)","\\2",exp6.filename[,1])
exp6.filename$frame<- as.integer(exp6.filename.number) - min(as.integer(exp6.filename.number))+1 #
# reading hypocotyl length data
## part1
hyp.data<-read.csv("Marian_gi2pif3_hyp_data - 2nd sets.csv")
# need to clean up data (no duplicate)
hyp.data$id<-paste(sub("(2016pifgi2exp)(1a_|1b_|2a_|2b_|3a_|3b_)(Col|gi2|pif4pif5|gi2pif4|gi2pif5|gi2pif4pif5)(.avi:)([[:digit:]]+)([[:print:]]+)","\\1\\2\\3\\4\\5",hyp.data$label),hyp.data$rep,sep="_")
hyp.data[duplicated(hyp.data$id),] # should be none. If duplicated, remove duplicated data from Google Sheet
# part2 (data exceeded limit of Google sheet)
hyp.data.part2<-read.csv("Marian_gi2pif3_hyp_data - 2nd sets_part2.csv")
# need to clean up data (no duplicate)
hyp.data.part2$id<-paste(sub("(2016pifgi2exp)(1a_|1b_|2a_|2b_|3a_|3b_)(Col|gi2|pif4pif5|gi2pif4|gi2pif5|gi2pif4pif5)(.avi:)([[:digit:]]+)([[:print:]]+)","\\1\\2\\3\\4\\5",hyp.data.part2$label),hyp.data.part2$rep,sep="_")
hyp.data.part2[duplicated(hyp.data.part2$id),] # should be none. If duplicated, remove duplicated data from Google Sheet
####### data cleaning on going #########
# combine part1 and part2
hyp.data<-rbind(hyp.data,hyp.data.part2)
ftable(hyp.data[,c("exp","plate","genotype","rep")],col.vars=c("plate","rep"),row.vars=c("genotype","exp"))
# change pixel into mm
scale<- 95/1220.3213 # 95 mm is 1220.3213 pixel
hyp.data$mm<-hyp.data$pixel * scale
# merge time stamp data and hyp.data
## extract avi movie frame name
hyp.data$frame<-as.integer(sub("(2016pifgi2exp)(1a_|1b_|2a_|2b_|3a_|3b_)(Col|gi2|pif4pif5|gi2pif4|gi2pif5|gi2pif4pif5)(.avi:)([[:digit:]]+)([[:print:]]+)","\\5",hyp.data$label))
# obtaining file date/time (exp4)
hyp.data[hyp.data$exp==4,"file"]<-as.character(vlookup(hyp.data[hyp.data$exp==4,"frame"],exp4.filename[,2:1],2))
str(hyp.data)
# obtaining file date/time (exp5)
hyp.data[hyp.data$exp==5,"file"]<-as.character(vlookup(hyp.data[hyp.data$exp==5,"frame"],exp5.filename[,2:1],2))
# obtaining file date/time (exp6)
hyp.data[hyp.data$exp==6,"file"]<-as.character(vlookup(hyp.data[hyp.data$exp==6,"frame"],exp6.filename[,2:1],2))
## obtain file date/time stamp (exp4,exp5 and exp6)
hyp.data$date<-vlookup(hyp.data$file,filetime[,c("V4","V1")],2)
hyp.data$time<-vlookup(hyp.data$file,filetime[,c("V4","V2")],2)
## converted to "POSIXlt" (see ?strptime)
hyp.data$date.time<-strptime(paste(hyp.data$date,hyp.data$time,sep="/"),"%m/%d/%Y/%H:%M")
#
summary(hyp.data)
## day1
exp4.start.date.time<-strptime("11/7/2016/8:00","%m/%d/%Y/%H:%M") # start date time for exp4
exp5.start.date.time<-strptime("11/17/2016/8:00","%m/%d/%Y/%H:%M") # start date time for exp5
exp6.start.date.time<-strptime("12/14/2016/8:00","%m/%d/%Y/%H:%M") # start date time for exp6
## day-night cycle (7:00 - 15:00 is day). No. 8:00 - 16:00 was day (after correction of daylight saving on Nov 1)
# calculate hours after stratification
# cf. hyp.data$date.time - start.date.time gave me units="days"
# for exp4
hyp.data[hyp.data$exp==4,"after.stratification_hr"]<-
as.numeric(gsub(" hours","",difftime(hyp.data[hyp.data$exp==4,"date.time"],exp4.start.date.time,units="hours"))) # see more in ?difftime
# for exp5
hyp.data[hyp.data$exp==5,"after.stratification_hr"]<-
as.numeric(gsub(" hours","",difftime(hyp.data[hyp.data$exp==5,"date.time"],exp5.start.date.time,units="hours"))) # see more in ?difftime
#
# for exp6
hyp.data[hyp.data$exp==6,"after.stratification_hr"]<-
as.numeric(gsub(" hours","",difftime(hyp.data[hyp.data$exp==6,"date.time"],exp6.start.date.time,units="hours"))) # see more in ?difftime
str(hyp.data)
head(hyp.data)
summary(hyp.data)
```
# calculate growth rate. how to ? exp1 and exp2 has different. And then smoothing curve as Julind did in Holtan (2011) Plant Physiol?
```{r}
# from Julin's scripts "TLsmoothMND MAR.R"
calcRate <- function(x) { #no running average
x <- x[order(x$after.stratification_hr),]
x$rate <- NA
for (i in 2:nrow(x)) {
x$rate[i] <- (x$y[i]-x$y[i-1]) / (x$after.stratification_hr[i] -x$after.stratification_hr[i-1])
}
x
}
# under construction
#names(hyp.data)<-sub("after.stratification_hr","time",names(hyp.data))
names(hyp.data)<-sub("mm","y",names(hyp.data))
hyp.data$sample<-with(hyp.data,paste(exp,plate,genotype,rep,sep="_"))
rate.list <- by(hyp.data,hyp.data$sample,calcRate)
tl.rates <- rate.list[[1]] #need to recompile the list of dataframes into one dataframe. this will hold it
for (i in 2:length(rate.list)) tl.rates <- rbind(tl.rates,rate.list[[i]]) #merge into one frame
# tl.rates2 <- tl.rates[complete.cases(tl.rates),] #remove rows with no data.(from J). Error in complete.cases(tl.rates) : not all arguments have the same length (Kazu)
tl.rates3<-tl.rates[!is.na(tl.rates$rate),]
#
tl.rates3.Col<-tl.rates3[tl.rates3$genotype=="Col",]
#tl.rates3.Col[tl.rates3.Col$rate< -2,c("exp","plate","genotype","rep",)] # why???
```
# plot data
```{r,fig.height=11,fig.width=20}
# plot (height version)
p.height<-ggplot(hyp.data,aes(x=after.stratification_hr,y=y,color=as.factor(rep),shape=factor(exp))) + geom_point()+facet_grid(genotype~exp)
p.height
# plot (growth rate version)
night <- seq(8,max(tl.rates$after.stratification_hr),24)
day <- seq(24,max(tl.rates$after.stratification_hr),24)
if (length(night)>length(day)) day <- c(day,max(tl.rates$after.stratification_hr))
bottom <- rep(-0.02,length(night))
top <- rep(0.11,length(night)) # for mm
#top <- rep(1,length(night)) # for pixel
#data frame to hold coordinates for nighttime rectangles
rectangles <- data.frame(night,day,top,bottom,rate=0,after.stratification_hr=0)
rm(night,day,top,bottom)
# remove data not this time range, which did not solve an error in geom_smooth()
# tl.rates4<-tl.rates3[tl.rates3$after.stratification_hr>24*5|tl.rates3$after.stratification_hr<24*12,]
#
#p.growthrate<- ggplot(data=tl.rates,aes(y=rate,x=after.stratification_hr)) + facet_wrap(~genotype,ncol=1) + theme_classic()
p.growthrate<- ggplot(data=tl.rates3,aes(y=rate,x=after.stratification_hr)) + facet_grid(genotype~.)
p.growthrate <- p.growthrate + geom_rect(data=rectangles,mapping=aes(xmin=night,xmax=day,ymin=bottom,ymax=top),fill="grey80")
#p.growthrate <- p.growthrate + geom_smooth(stat="smooth",fill="grey20",method="loess",span=.1,n=200) # original
p.growthrate <- p.growthrate + geom_smooth(stat="smooth",fill="grey20",method="loess",span=.05,n=200) # needs to play with span and n
#p.growthrate <- p.growthrate + scale_x_continuous(limits=c(24*5,24*12))
p.growthrate <- p.growthrate + theme(
axis.text.x=element_text(size=18),
#axis.ticks = element_blank(),
panel.background = element_rect(fill = "white")) +
labs(x="Time (h)",y="growth rate (mm/hr)")
p.growthrate <- p.growthrate + coord_cartesian(xlim=c(24*1.5,24*9),ylim=c(-0.001,0.15))
p.growthrate
ggsave(p.growthrate,file="Marian_pifgi3_timelapse_exp2_v1.png",dpi=300,width=11,height=8) # needs to clean up graph
# Time (h): time after stratification (hr) (cf. Nozue (2007) use "zero indicates dawn of the fourth day")
#
```