Skip to content

Commit

Permalink
49 spacemarkers nextflow module (#50)
Browse files Browse the repository at this point in the history
* init add nextflow module

* replace to nextflow dir for clarity
  • Loading branch information
dimalvovs authored Nov 22, 2024
1 parent ac95ee8 commit 573e925
Show file tree
Hide file tree
Showing 2 changed files with 245 additions and 0 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ Dockerfile
.gitignore
.github
.dockerignore
nextflow
/vignettes/test-args.Rmd
244 changes: 244 additions & 0 deletions nextflow/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
process SPACEMARKERS {
tag "$meta.id"
label 'process_high_memory'
container 'ghcr.io/deshpandelab/spacemarkers:main'

input:
tuple val(meta), path(features), path(data)
output:
tuple val(meta), path("${prefix}/spPatterns.rds"), emit: spPatterns
tuple val(meta), path("${prefix}/optParams.rds"), emit: optParams
tuple val(meta), path("${prefix}/spaceMarkers.rds"), emit: spaceMarkers
path "versions.yml", emit: versions

stub:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
"""
mkdir "${prefix}"
touch "${prefix}/spPatterns.rds"
touch "${prefix}/optParams.rds"
touch "${prefix}/spaceMarkers.rds"
cat <<-END_VERSIONS > versions.yml
"${task.process}":
SpaceMarkers: \$(Rscript -e 'print(packageVersion("SpaceMarkers"))' | awk '{print \$2}')
R: \$(Rscript -e 'print(packageVersion("base"))' | awk '{print \$2}')
END_VERSIONS
"""
script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
"""
#!/usr/bin/env Rscript
dir.create("${prefix}", showWarnings = FALSE)
library("SpaceMarkers")
#load spatial coords from tissue positions, deconvolved patterns, and expression
coords <- load10XCoords("$data")
features <- getSpatialFeatures("$features")
dataMatrix <- load10XExpr("$data")
#add spatial coordinates to deconvolved data, only use barcodes present in data
spPatterns <- merge(coords, features, by.x = "barcode", by.y = "row.names")
spPatterns <- spPatterns[which(spPatterns[,"barcode"] %in% colnames(dataMatrix)),]
saveRDS(spPatterns, file = "${prefix}/spPatterns.rds")
#remove genes with low expression, only barcodes present in spatial data
keepGenes <- which(apply(dataMatrix, 1, sum) > 10)
keepBarcodes <- which(colnames(dataMatrix) %in% spPatterns[,"barcode"])
dataMatrix <- dataMatrix[keepGenes, keepBarcodes]
#compute optimal parameters for spatial patterns
optParams <- getSpatialParameters(spPatterns);
saveRDS(optParams, file = "${prefix}/optParams.rds")
#find genes that are differentially expressed in spatial patterns
spaceMarkers <- getPairwiseInteractingGenes(data = dataMatrix,
optParams = optParams,
spPatterns = spPatterns,
mode = "DE",
analysis="enrichment",
workers=$task.cpus)
saveRDS(spaceMarkers, file = "${prefix}/spaceMarkers.rds")
# Get the versions of the packages
spaceMarkersVersion <- packageVersion("SpaceMarkers")
rVersion <- packageVersion("base")
cat(sprintf('"%s":\n SpaceMarkers: %s\n R: %s\n',
"${task.process}", spaceMarkersVersion, rVersion),
file = "versions.yml")
"""
}

process SPACEMARKERS_MQC {
tag "$meta.id"
label 'process_low'
container 'ghcr.io/deshpandelab/spacemarkers:main'

input:
tuple val(meta), path(spaceMarkers)
output:
tuple val(meta), path("${prefix}/spacemarkers_mqc.json"), emit: spacemarkers_mqc
path "versions.yml", emit: versions

script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
"""
#!/usr/bin/env Rscript
dir.create("${prefix}", showWarnings = FALSE)
#[['']] notation needed to allow nextflow var susbtitution
#init all report variables
n_pairs_total <- NA
n_pairs_interact <- NA
min_spacemarker_metric <- NA
max_spacemarker_metric <- NA
min_genes <- NA
max_genes <- NA
avg_hotspot_area <- NA
sm <- readRDS("$spaceMarkers")
smi <- sm[which(sapply(sm, function(x) length(x[['interacting_genes']]))>0)]
#interacting patterns stats
n_pairs_total <- length(sm)
n_pairs_interact <- length(smi)
if(n_pairs_interact >0 ) {
#spacemarker metric
max_spacemarker_metric <- max(sapply(smi, function(x) {
max(x[['interacting_genes']][[1]][['SpaceMarkersMetric']])
}))
min_spacemarker_metric <- min(sapply(smi, function(x) {
min(x[['interacting_genes']][[1]][['SpaceMarkersMetric']])
}))
#average number of genes in each pair
min_genes <- min(sapply(smi, function(x) {
nrow(x[['interacting_genes']][[1]])
}))
#average number of genes in each pair
max_genes <- max(sapply(smi, function(x) {
nrow(x[['interacting_genes']][[1]])
}))
#average percent overlap across interacting patterns
avg_hotspot_area <- mean(sapply(smi, function(x) {
sum(!is.na(x[['hotspots']]))/length(x[['hotspots']][,1])
}))
}
#report
report_data <- list(
"${prefix}" = list(
'Pairs Total' = n_pairs_total,
'Pairs Interact' = n_pairs_interact,
'SpaceMarker Metric' = sprintf('%0.1f - %0.1f', min_spacemarker_metric, max_spacemarker_metric),
'Gene Count' = sprintf('%0.f - %0.f', min_genes, max_genes),
'Mean Hotspot Area' = avg_hotspot_area
)
)
report <- list(
id = "spacemarkers_mqc",
section_name = "SpaceMarkers",
description = "Tool to identify genes associated with latent space interactions in spatial transcriptomics.",
plot_type = "table",
pconfig = list(
id = "custom_data_table",
title = "SpacemMarkers Stats"
),
data = report_data
)
jsonlite::write_json(
x=report,
path = "${prefix}/spacemarkers_mqc.json",
auto_unbox = TRUE,
pretty = TRUE)
# Get the versions of the packages
spaceMarkersVersion <- packageVersion("SpaceMarkers")
rVersion <- packageVersion("base")
cat(sprintf('"%s":\n SpaceMarkers: %s\n R: %s\n',
"${task.process}", spaceMarkersVersion, rVersion),
file = "versions.yml")
"""

stub:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
"""
mkdir "${prefix}"
touch "${prefix}/spacemarkers_mqc.json"
cat <<-END_VERSIONS > versions.yml
"${task.process}":
SpaceMarkers: \$(Rscript -e 'print(packageVersion("SpaceMarkers"))' | awk '{print \$2}')
R: \$(Rscript -e 'print(packageVersion("base"))' | awk '{print \$2}')
END_VERSIONS
"""
}

process SPACEMARKERS_IMSCORES {
tag "$meta.id"
label 'process_low'
container 'ghcr.io/deshpandelab/spacemarkers:main'

input:
tuple val(meta), path(spaceMarkers)
output:
tuple val(meta), path("${prefix}/imscores.csv"), emit: spacemarkers_imscores
path "versions.yml", emit: versions

script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
"""
#!/usr/bin/env Rscript
dir.create("${prefix}", showWarnings = FALSE)
sm <- readRDS("$spaceMarkers")
smi <- sm[which(sapply(sm, function(x) length(x[['interacting_genes']]))>0)]
fields <- c('Gene', 'SpaceMarkersMetric')
imscores <- lapply(seq_along(smi), function(x) {
df <- smi[[x]][['interacting_genes']][[1]][,fields]
#rename to metric to its parent item name
setNames(df, c('Gene', names(smi)[x]))
})
imscores <- Reduce(function(x, y) {
merge(x, y, by="Gene", all=TRUE)
}, x=imscores, right=FALSE)
if(is.null(imscores)) {
imscores <- data.frame(Gene=character(0))
}
write.csv(imscores, file = "${prefix}/imscores.csv", row.names = FALSE)
# Get the versions of the packages
spaceMarkersVersion <- packageVersion("SpaceMarkers")
rVersion <- packageVersion("base")
cat(sprintf('"%s":\n SpaceMarkers: %s\n R: %s\n',
"${task.process}", spaceMarkersVersion, rVersion),
file = "versions.yml")
"""

stub:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
"""
mkdir "${prefix}"
touch "${prefix}/imscores.csv"
cat <<-END_VERSIONS > versions.yml
"${task.process}":
SpaceMarkers: \$(Rscript -e 'print(packageVersion("SpaceMarkers"))' | awk '{print \$2}')
R: \$(Rscript -e 'print(packageVersion("base"))' | awk '{print \$2}')
END_VERSIONS
"""
}

0 comments on commit 573e925

Please sign in to comment.