Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mattbranch #28

Open
wants to merge 35 commits into
base: devel
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
845a392
add locus name and signif_level options
mconomos Apr 8, 2020
841819e
put track_file back in
mconomos Apr 8, 2020
61a9450
convert signif_level to numeric
mconomos Apr 8, 2020
65f7473
add option to adjust number of gene rows in locus zoom plots
mconomos Apr 30, 2020
f3d4ebd
attempting to get command syntax correct
mconomos Apr 30, 2020
7189db4
wip
mconomos Apr 30, 2020
735c7c2
attempt to add ability to label rsID in locus zoom
mconomos Apr 30, 2020
77e6dea
make locuszoom title optional
mconomos Apr 30, 2020
aff2558
wip
mconomos Apr 30, 2020
b78fa10
wip
mconomos Apr 30, 2020
79161de
wip
mconomos Apr 30, 2020
0520886
wip
mconomos Apr 30, 2020
8a72b52
wip
mconomos Apr 30, 2020
2a35331
wip
mconomos Apr 30, 2020
c5385ec
wip
mconomos Apr 30, 2020
a9a2583
wip
mconomos Apr 30, 2020
ff187d6
add option to specify the variant and region for plotting
mconomos May 6, 2020
8acdab5
wip
mconomos May 7, 2020
7e65d78
match a couple aesthetics to Oasis
mconomos May 7, 2020
bf12997
add marker file option
mconomos May 7, 2020
3d189b5
add ref snp line
mconomos May 7, 2020
8f5d462
updates to ref snp plotting
mconomos May 7, 2020
6ad4e33
wip
mconomos May 7, 2020
0df596f
wip
mconomos May 8, 2020
6ecabd4
wip
mconomos May 8, 2020
93e5f46
wip
mconomos May 8, 2020
340d8e5
wip
mconomos May 8, 2020
cf4310c
wip
mconomos May 8, 2020
c90cf08
wip
mconomos May 8, 2020
a354f71
wip
mconomos May 8, 2020
3711014
wip
mconomos May 8, 2020
32661d8
wip
mconomos May 8, 2020
8dfe6f2
wip
mconomos May 8, 2020
66ca30b
update calculateLD to avoid long vector issue; allows for larger data…
mconomos May 8, 2020
8e039af
cleanup; add option for NA rsid in locus_file
mconomos May 8, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 77 additions & 20 deletions R/locuszoom.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,12 @@ optional <- c("flanking_region"=500,
"track_file"=NA,
"track_file_type"="window",
"track_label"="",
"track_threshold"=5e-8)
"track_threshold"=5e-8,
"signif_level"=5e-8,
"gene_rows"=4,
"title"=TRUE,
"variant_label_file"=NA,
"variant_line_color"="transparent")
config <- setConfigDefaults(config, required, optional)
print(config)

Expand All @@ -47,28 +52,44 @@ assoc <- getobj(assocfile)
if (config["locus_type"] == "variant") {
stopifnot("variant.id" %in% names(locus))
variant <- locus$variant.id
flank <- as.numeric(config["flanking_region"]) * 1000
var.pos <- assoc$pos[assoc$variant.id == variant]
start <- var.pos - flank
if (start < 1) start <- 1
end <- var.pos + flank

lz.name <- paste0("chr", var.chr, ":", var.pos)
ld.region <- paste0("--refsnp \"", lz.name, "\"", " --flank ", config["flanking_region"], "kb")
prefix <- paste0(config["out_prefix"], "_var", variant, "_ld_", pop)

if("rsid" %in% names(locus) && !is.na(locus$rsid)){
lz.name <- locus$rsid
}else{
lz.name <- paste0("chr", var.chr, ":", var.pos)
}

if(all(c("variant.id", "start", "end") %in% names(locus))){
start <- locus$start
end <- locus$end
ld.region <- paste("--refsnp", lz.name, "--chr", var.chr, "--start", start, "--end", end)
}else{
flank <- as.numeric(config["flanking_region"]) * 1000
start <- var.pos - flank
if (start < 1) start <- 1
end <- var.pos + flank
ld.region <- paste0("--refsnp \"", lz.name, "\"", " --flank ", config["flanking_region"], "kb")
}

freq <- assoc$freq[assoc$variant.id == variant]
maf <- min(freq, 1-freq)
mac <- assoc$MAC[assoc$variant.id == variant]
title <- paste(lz.name, "- MAF:", formatC(maf, digits=3), "- MAC:", mac)

prefix <- paste0(config["out_prefix"], "_var", variant, "_ld_", pop)

} else if (config["locus_type"] == "region") {
stopifnot(all(c("start", "end") %in% names(locus)))
start <- locus$start
end <- locus$end

ld.region <- paste("--chr", var.chr, "--start", start, "--end", end)

prefix <- paste0(config["out_prefix"], "_ld_", pop)
title <- ""
}

if("locus_name" %in% names(locus)){
prefix <- paste(prefix, locus$locus_name, sep = "_")
}

## construct METAL-format file
Expand All @@ -77,20 +98,18 @@ assoc <- assoc %>%
select(variant.id, chr, pos, ends_with("pval"))
names(assoc)[4] <- "pval"

##### NOTE: i added the following two lines of code #####
## remove duplicate chr:pos rows, removing the variant with the less significant (higher) pvalue
assoc <- assoc[order(assoc$pval,decreasing=FALSE),]
assoc <- assoc[!duplicated(assoc$pos),]
assoc <- assoc[order(assoc$pos),]
#####

assoc.filename <- tempfile()
writeMETAL(assoc, file=assoc.filename)


# LD
if (pop != "TOPMED") {
ld.cmd <- paste("--pop", pop, "--source 1000G_Nov2014")
ld.title <- paste("LD: 1000G", pop)
} else {
if (!is.na(config["ld_sample_include"])) {
sample.id <- getobj(config["ld_sample_include"])
Expand All @@ -108,13 +127,16 @@ if (pop != "TOPMED") {
}
gdsfile <- insertChromString(config["gds_file"], var.chr)
ld <- calculateLD(gdsfile, variant.id=assoc$variant.id, ref.var=ref.var, sample.id=sample.id)

ld.filename <- tempfile()
writeLD(assoc, ld, ref.var, file=ld.filename)
if("rsid" %in% names(locus) && !is.na(locus$rsid)){
writeLD(assoc, ld, ref.var, file=ld.filename, rsid = locus$rsid)
}else{
writeLD(assoc, ld, ref.var, file=ld.filename, rsid = NULL)
}

ld.cmd <- paste("--ld", ld.filename)
ld.title <- "LD: TOPMed"
}
title <- if (title == "") ld.title else paste(ld.title, title, sep=" - ")

## construct BED track file
if (!is.na(config["track_file"])) {
Expand All @@ -128,6 +150,28 @@ if (!is.na(config["track_file"])) {
track.cmd <- ""
}


## Plot Title
if(config["title"]){
if(config["locus_type"] == "variant"){
title <- ifelse("locus_name" %in% names(locus), locus$locus_name, lz.name)
maf.title <- paste("- MAF:", formatC(maf, digits=3), "- MAC:", mac)
}else if(config["locus_type"] == "region"){
title <- ifelse("locus_name" %in% names(locus), locus$locus_name, "")
maf.title <- ""
}
if(pop != "TOPMED"){
ld.title <- paste("- LD: 1000G", pop)
}else{
ld.title <- "- LD: TOPMed"
}
title <- paste(title, ld.title, maf.title)
}else{
title <- NULL
}



command <- paste("locuszoom",
"theme=publication",
"--cache None",
Expand All @@ -141,9 +185,22 @@ command <- paste("locuszoom",
ld.cmd,
ld.region,
"--prefix ", prefix,
paste0("title=\"", title, "\""),
paste0("signifLine=\"", -log10(5e-8), "\" signifLineColor=\"gray\" signifLineWidth=\"2\""),
"ylab=\"-log10(p-value) from single variant test\"")
paste0("signifLine=\"", -log10(as.numeric(config["signif_level"])), "\""),
"signifLineColor=\"gray\"",
"signifLineWidth=\"2\"",
"ylab=\"-log10(p-value)\"",
paste0("refsnpLineColor=\"", config["variant_line_color"], "\""),
"refsnpLineWidth=\"2\"",
"refsnpTextSize=\"1\"",
paste0("rfrows=\"", as.numeric(config["gene_rows"]), "\""))

if(!is.null(title)){
command <- paste(command, paste0("title=\"", title, "\""))
}

if(!is.na(config["variant_label_file"])){
command <- paste(command, "--denote-markers-file", config["variant_label_file"])
}

cat(paste(command, "\n"))
system(command)
Expand Down
27 changes: 23 additions & 4 deletions TopmedPipeline/R/locuszoom.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,20 @@ calculateLD <- function(gdsfile, variant.id, ref.var=NULL, sample.id=NULL) {
geno.ref <- geno[,as.character(ref.var)]
}

# drop unnecessary dimensions (if ref.var is a single variant)
r <- drop(cor(geno, geno.ref, use="pairwise.complete.obs"))
# check to avoid long vectors
nr <- as.numeric(nrow(geno))
nc <- as.numeric(ncol(geno))
nblock <- ceiling(nr*nc/2^31)
if(nblock > 1){
# break into blocks to compute correlation if needed
blocks <- unname(split(1:nc, cut(1:nc, nblock)))
r <- unlist(lapply(blocks, function(b) {
drop(cor(geno[,b], geno.ref, use="pairwise.complete.obs"))
}))
}else{
# drop unnecessary dimensions (if ref.var is a single variant)
r <- drop(cor(geno, geno.ref, use="pairwise.complete.obs"))
}
r^2
}

Expand All @@ -78,10 +90,17 @@ calculateLD <- function(gdsfile, variant.id, ref.var=NULL, sample.id=NULL) {
#'
#' @importFrom dplyr "%>%" mutate_ select_
#' @export
writeLD <- function(x, ld, ref.var, file) {
writeLD <- function(x, ld, ref.var, file, rsid = NULL) {
x <- x %>%
mutate_(snp1=~paste0("chr", chr, ":", pos))
x$snp2 <- x$snp1[x$variant.id == ref.var]

if(!is.null(rsid)){
x$snp1[x$variant.id == ref.var] <- rsid
x$snp2 <- rsid
}else{
x$snp2 <- x$snp1[x$variant.id == ref.var]
}

x$dprime <- 0
x$rsquare <- ld
x <- x %>%
Expand Down