-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAnalyze RNAseq for Snps.R
69 lines (46 loc) · 2.45 KB
/
Analyze RNAseq for Snps.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#Looking for interesting SNPs in RNAseq data
#Julin Maloof
#December 2, 2013
data <- read.delim("anotated_M82_tie1.varscanNEW.vcf",comment.char="#",sep="\t",header=F,as.is=T)
head(data)
colnames(data) <- unlist(strsplit("CHROM POS ID REF ALT QUAL FILTER INFO FORMAT M82 tie1",split=" +"))
head(data,20)
data$tie1 <- sub("(\\./\\.:\\.:[0-9]+$)","\\1::::::::::::",data$tie1)
tie1 <- as.data.frame(matrix(unlist(strsplit(data[,"tie1"],split=":")),ncol=14,byrow=T))
head(tie1)
colnames(tie1) <- paste("tie1",c("genotype","genotypeQuality","raw","cov",
"ref","alt","percent","pval","r.qual","alt.qual",
"ref.for","ref.rev","alt.for","alt.rev"),sep=".")
head(tie1)
tie1$tie1.percent <- sub("%","",tie1$tie1.percent)
tie1[,-1] <- apply(tie1[,-1],2,as.numeric)
data$M82 <- sub("(\\./\\.:\\.:[0-9]+$)","\\1::::::::::::",data$M82)
M82 <- as.data.frame(matrix(unlist(strsplit(data[,"M82"],split=":")),ncol=14,byrow=T))
head(M82)
colnames(M82) <- paste("M82",c("genotype","genotypeQuality","raw","cov",
"ref","alt","percent","pval","r.qual","alt.qual",
"ref.for","ref.rev","alt.for","alt.rev"),sep=".")
head(M82)
M82$M82.percent <- sub("%","",M82$M82.percent)
M82[,-1] <- apply(M82[,-1],2,as.numeric)
summary(M82)
data.large <- cbind(data,tie1[,1:8],M82[,1:8])
data.small <- data.large[data.large$M82.genotype!="./." & data.large$tie1.genotype!="./.",]
head(data.small,30)
summary(data.small)
data.small <- data.small[as.character(data.small$M82.genotype) != as.character(data.small$tie1.genotype),]
head(data.small,30)
data.small <- data.small[data.small$tie1.genotype=="1/1",]
data.small <- data.small[data.small$tie1.percent>90,]
data.small <- data.small[grep("EFF",data.small$INFO),]
data.small <- data.small[!grepl("=SYNONYMOUS_CODING",data.small$INFO),]
data.small <- data.small[data.small$M82.percent < 20 | data.small$M82.percent > 80,] # limit to nonsegregating M82 sites
head(data.small)
data.small$ITAG <- regmatches(data.small$INFO,regexpr("Solyc[0-9]{2}g[0-9]{6}",data.small$INFO))
head(data.small)
HRD <- read.delim("/Users/jmaloof/Documents/Lab Notebook support/2011/Tomato Annotation and GO/ITAG2.3 annotation/ITAG2.3_HRD.tsv",
sep="\t")
head(HRD)
HRD$ITAG <- substr(HRD$ITAG,1,14)
data.small.annotated <- merge(data.small,HRD,by="ITAG",all.x=T)
write.csv(data.small.annotated,"tie1Candidates_RNAseq.csv")