From 1b133f1fef383f4054074d936f8fb7c35a480331 Mon Sep 17 00:00:00 2001 From: orian116 Date: Mon, 14 Oct 2024 14:20:30 -0400 Subject: [PATCH 1/6] Updated getSpaceMarkersMetric to Include AND in Zsign instead of OR and transformed the SpaceMarkersMetric with log2(abs()+1) to prevent skewing of the scale. Updated Package version --- R/getInteractingGenes.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/getInteractingGenes.R b/R/getInteractingGenes.R index d22aed3..711cc88 100644 --- a/R/getInteractingGenes.R +++ b/R/getInteractingGenes.R @@ -83,12 +83,12 @@ getSpaceMarkersMetric <- function(interacting.genes){ for (i in seq(1,length(interacting_genes))) { if (all(dim(interacting_genes[[i]])>1)) { - Zsign <- (2*(-1+((interacting_genes[[i]]$Dunn.zP1_Int<0)| + Zsign <- (2*(-1+((interacting_genes[[i]]$Dunn.zP1_Int<0)& (interacting_genes[[i]]$Dunn.zP2_Int<0))*1)+1) - Zmag <- (interacting_genes[[i]]$Dunn.zP1_Int)* + Zmag <- abs((interacting_genes[[i]]$Dunn.zP1_Int)* (interacting_genes[[i]]$Dunn.zP2_Int)/ - (pmax(abs(interacting_genes[[i]]$Dunn.zP2_P1),1)) - interacting_genes[[i]]$SpaceMarkersMetric <- Zsign*Zmag + (pmax(abs(interacting_genes[[i]]$Dunn.zP2_P1),1))) + interacting_genes[[i]]$SpaceMarkersMetric <- Zsign*log2(Zmag+1) od <- order( interacting_genes[[i]]$SpaceMarkersMetric,decreasing=TRUE) interacting_genes[[i]] <- interacting_genes[[i]][od,] From 5a4d3efb8b6cfadb59517156281f437e939581ce Mon Sep 17 00:00:00 2001 From: orian116 Date: Mon, 14 Oct 2024 14:47:36 -0400 Subject: [PATCH 2/6] Update to find_pattern_hotspots for includeSelf=TRUE. This mode considers the spatial pattern itself in calculating influence instead of only considering the spots around it. Updated default kernel threshold to be slightly more stringent since more hotspots will be produced. Updated Package version --- R/getInteractingGenes.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/R/getInteractingGenes.R b/R/getInteractingGenes.R index 711cc88..fb1ac41 100644 --- a/R/getInteractingGenes.R +++ b/R/getInteractingGenes.R @@ -4,10 +4,10 @@ find_pattern_hotspots <- function( spPatterns, params = NULL, patternName = "Pattern_1", outlier = "positive", - nullSamples = 100,...){ + nullSamples = 100, includeSelf = TRUE,...){ if (is.null(params)){ sigmaPair <- 10 - kernelthreshold <- 2 + kernelthreshold <- 3 } else { sigmaPair <- params["sigmaOpt"] kernelthreshold <- params["threshOpt"] @@ -21,6 +21,11 @@ find_pattern_hotspots <- function( x=spPatterns$x,y = spPatterns$y, window = allwin,marks = patternVector) Kact1 <- spatstat.explore::Smooth( X, at = "points", sigma = sigmaPair[1], ...) + if (includeSelf == TRUE){ + Kact1 <- spPatterns[,patternName] + Kact1 + } else { + Kact1 <- Kact1 + } Karr1 <- vapply(seq(1,nullSamples),function(i){ Xr<-X; spatstat.geom::marks(Xr) <- sample(spatstat.geom::marks(X)); From 77015d22fa3102196018626d4cb75c50a9a1928b Mon Sep 17 00:00:00 2001 From: orian116 Date: Mon, 14 Oct 2024 14:57:16 -0400 Subject: [PATCH 3/6] Added a getSpatialParametersExternal function to allow the user to specify sigma for kernel width for smoothing and outlier threshold for hotspots. The default behavior is to pull the spot diameter from a 10xVisium .json file from the usual spatial folder created by Visium. --- NAMESPACE | 1 + R/getSpatialParameters.R | 64 +++++++++++++++++++++++++++++ man/getSpatialParametersExternal.Rd | 60 +++++++++++++++++++++++++++ 3 files changed, 125 insertions(+) create mode 100644 man/getSpatialParametersExternal.Rd diff --git a/NAMESPACE b/NAMESPACE index a73c9b5..5485fba 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ export(getInteractingGenes) export(getPairwiseInteractingGenes) export(getSpatialFeatures) export(getSpatialParameters) +export(getSpatialParametersExternal) export(load10XCoords) export(load10XExpr) importFrom(Matrix,sparseMatrix) diff --git a/R/getSpatialParameters.R b/R/getSpatialParameters.R index 2331260..f8989b6 100644 --- a/R/getSpatialParameters.R +++ b/R/getSpatialParameters.R @@ -121,3 +121,67 @@ getSpatialParameters <- function(spatialPatterns,...){ return(optParams) } +#=================== +#' getSpatialParametersExternal +#' Manually obtain Optimal Parameters for Interacting cells +#' +#' This function calculates obtains a specified width of a distribution +#' (sigmaOpt) as well as the outlier threshold around the set of spots +#' (thresOpt) for each pattern from a latent feature space. +#' +#' @export +#' +#' @param spatialPatterns A data frame that contains the spatial coordinates +#' for each cell type. The column names must include 'x' and 'y' as well as a +#' set of numbered columns named 'Pattern_1.....N'. +#' @param visiumDir A string path specifying the location of the 10xVisium +#' directory +#' @param spatial A string path specifying the location of the spatial folder +#' containing the .json file of the spot characteristics +#' @param pattern A string specifying the name of the .json file +#' @param spotDiameter A numeric value specifying your desired sigma +#' @param threshold A numeric value specifying your hotspot threshold +#' @return a numeric matrix of sigmaOpts - the optimal width of the gaussian +#' distribution, and the thresOpt - outlier threshold around the set of spots +#' for each pattern +#' @examples +#' library(SpaceMarkers) +#' # Create test data +#' cells <- c() +#' test_num <- 500 +#' for(i in 1:test_num){ +#' cells[length(cells)+1] <- paste0("cell_",i) +#' } +#' spPatterns <- data.frame(barcode = cells, +#' y = runif(test_num, min=0, max=test_num), +#' x = runif(test_num, min=0, max=test_num), +#' Pattern_1 = runif(test_num, min=0, max=1), +#' Pattern_2 = runif(test_num, min=0, max=1) ) +#' # Call the getSpatialParameters function with the test data +#' optParams <- getSpatialParametersExternal(spPatterns, spotDiameter = 10) +#' + +getSpatialParametersExternal <- function(spatialPatterns,visiumDir = ".", + spatialDir ="spatial", + pattern = "scalefactors_json.json", + spotDiameter = NULL,threshold = 3) { + patternList <- setdiff(colnames(spatialPatterns),c("barcode","x","y")) + if (!is.null(spotDiameter)) { + sigmaOpt <- spotDiameter + threshOpt <- threshold + } else if (is.null(spotDiameter)) { + message("Assuming Visium folder with .json file and spot diamater exists") + scale_json <- dir(paste0(visiumDir,"/",spatialDir), + pattern = pattern,full.names = TRUE) + scale_values <- jsonlite::read_json(scale_json) + sigmaOpt <- scale_values[[4]] + threshOpt <- threshold + + } else { + stop("Please specify the spot diameter") + } + optParams <-matrix(c(sigmaOpt,threshOpt),nrow = 2,ncol = length(patternList)) + colnames(optParams) <- patternList + rownames(optParams) <- c("sigmaOpt","threshOpt") + return(optParams) +} diff --git a/man/getSpatialParametersExternal.Rd b/man/getSpatialParametersExternal.Rd new file mode 100644 index 0000000..53cbf83 --- /dev/null +++ b/man/getSpatialParametersExternal.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/getSpatialParameters.R +\name{getSpatialParametersExternal} +\alias{getSpatialParametersExternal} +\title{getSpatialParametersExternal +Manually obtain Optimal Parameters for Interacting cells} +\usage{ +getSpatialParametersExternal( + spatialPatterns, + visiumDir = ".", + spatialDir = "spatial", + pattern = "scalefactors_json.json", + spotDiameter = NULL, + threshold = 3 +) +} +\arguments{ +\item{spatialPatterns}{A data frame that contains the spatial coordinates +for each cell type. The column names must include 'x' and 'y' as well as a +set of numbered columns named 'Pattern_1.....N'.} + +\item{visiumDir}{A string path specifying the location of the 10xVisium +directory} + +\item{pattern}{A string specifying the name of the .json file} + +\item{spotDiameter}{A numeric value specifying your desired sigma} + +\item{threshold}{A numeric value specifying your hotspot threshold} + +\item{spatial}{A string path specifying the location of the spatial folder +containing the .json file of the spot characteristics} +} +\value{ +a numeric matrix of sigmaOpts - the optimal width of the gaussian +distribution, and the thresOpt - outlier threshold around the set of spots +for each pattern +} +\description{ +This function calculates obtains a specified width of a distribution +(sigmaOpt) as well as the outlier threshold around the set of spots +(thresOpt) for each pattern from a latent feature space. +} +\examples{ +library(SpaceMarkers) +# Create test data +cells <- c() +test_num <- 500 +for(i in 1:test_num){ + cells[length(cells)+1] <- paste0("cell_",i) +} +spPatterns <- data.frame(barcode = cells, +y = runif(test_num, min=0, max=test_num), +x = runif(test_num, min=0, max=test_num), +Pattern_1 = runif(test_num, min=0, max=1), +Pattern_2 = runif(test_num, min=0, max=1) ) +# Call the getSpatialParameters function with the test data +optParams <- getSpatialParametersExternal(spPatterns, spotDiameter = 10) + +} From d3f1b4a8bd1c96e719382a5a62ebcb85cca32f24 Mon Sep 17 00:00:00 2001 From: orian116 Date: Mon, 14 Oct 2024 18:28:19 -0400 Subject: [PATCH 4/6] Updated load10XCoords to be able to read tissue positions and load10Expr to read expression from VisiumHD which are stored in .parquet files. Added the nanoparquet function to depends in DESCRIPTION as well as updated package version. Updated NEWS.md --- DESCRIPTION | 3 ++- NAMESPACE | 1 + NEWS.md | 9 +++++++++ R/preprocessing.R | 32 ++++++++++++++++++++++++++------ man/load10XExpr.Rd | 8 +++++++- 5 files changed, 45 insertions(+), 8 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e5f1fe7..3850642 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: SpaceMarkers Title: Spatial Interaction Markers -Version: 1.1.3 +Version: 1.1.4 Authors@R: c( person(given = "Atul", family = "Deshpande", email = "adeshpande@jhu.edu", role = c("aut", "cre"), comment = c(ORCID="0000-0001-5144-6924")), person(given = "Ludmila", family = "Danilova", email = "ldanilo1@jhmi.edu", role = "ctb"), @@ -26,6 +26,7 @@ Imports: spatstat.geom, ape, hdf5r, + nanoparquet, jsonlite, Matrix, qvalue, diff --git a/NAMESPACE b/NAMESPACE index 5485fba..0086a24 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,6 +14,7 @@ importFrom(jsonlite,read_json) importFrom(matrixStats,count) importFrom(matrixTests,row_kruskalwallis) importFrom(methods,slot) +importFrom(nanoparquet,read_parquet) importFrom(qvalue,qvalue) importFrom(rstatix,filter) importFrom(spatstat.explore,Smooth) diff --git a/NEWS.md b/NEWS.md index c667b0e..c89919a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,12 @@ +# SpaceMarkers 1.1.4 +* Updated SpaceMarkersMetric by fixing signage and log transformed to scale +magnitude +* Added getSpatialParameters_External which enables getting spatial parameters +from file or from the user +* Enabled includeSelf = TRUE in getInteractingGenes.R to improve hotspot +detection +* Enabled preprocessing.R to read expression and coordinates from VisiumHD + # SpaceMarkers 1.1.3 * Optimized the long running row.dunn.test() function diff --git a/R/preprocessing.R b/R/preprocessing.R index 40faffd..c3ec178 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -1,5 +1,6 @@ #' @importFrom hdf5r h5file #' @importFrom jsonlite read_json +#' @importFrom nanoparquet read_parquet #' @importFrom utils read.csv #' @importFrom Matrix sparseMatrix #' @importFrom methods slot @@ -21,6 +22,7 @@ #' #' @param visiumDir A string path to the h5 file with expression information. #' @param h5filename A string of the name of the h5 file in the directory. +#' @param version A string specifying the version of the spaceranger data. #' @return A matrix of class dgeMatrix or Matrix that contains the expression #' info for each sample (cells) across multiple features (genes) #' @examples @@ -39,7 +41,12 @@ #' load10XExpr<- function(visiumDir=NULL, - h5filename='filtered_feature_bc_matrix.h5'){ + h5filename='filtered_feature_bc_matrix.h5', + version = NULL){ + if (version == "HD"){ + message("Assuming VisiumHD with 008um resolution as default") + visiumDir <- file.path(visiumDir,"binned_outputs/square_008um") + } h5FilePath <- dir(path = visiumDir,pattern = h5filename,full.names = TRUE) hf <- hdf5r::h5file(filename = h5FilePath, mode='r') mat <- names(hf) @@ -103,8 +110,15 @@ load10XCoords <- function(visiumDir, resolution = "lowres", version = NULL){ if("probe_set.csv" %in% dir(visiumDir)){ config_line <- readLines(paste0(visiumDir,"/probe_set.csv"), 1) version <- strsplit(config_line, "=")[[1]][2] + } else if ("tissue_positions.parquet" %in% dir( + visiumDir,"binned_outputs/square_008um/spatial")) { + version <- "HD" + visiumDir <- file.path(visiumDir,"binned_outputs/square_008um") + message(".parquet file found. + Assuming VisiumHD with 008um resolution as default") } else { - message("probe_set.csv not found. Assuming version 1.0.") + message("probe_set.csv or .parquet not found. + Assuming version 1.0.") version <- "1.0" } } @@ -116,15 +130,21 @@ load10XCoords <- function(visiumDir, resolution = "lowres", version = NULL){ has_header <- TRUE tissue_pos_name <- "tissue_positions.csv" } - spatial_dir <- paste0(visiumDir,'/spatial') + spatial_dir <- paste0(visiumDir,"/spatial") scale_json <- dir(spatial_dir, pattern = "scalefactors_json.json",full.names = TRUE) scale_values <- jsonlite::read_json(scale_json) scale_factor <- scale_values[grepl(resolution, names(scale_values))][[1]] - coord_file <- dir(spatial_dir, + if (version == "HD") { + tissue_pos_name <- "tissue_positions.parquet" + coord_file <- dir(spatial_dir, pattern = tissue_pos_name, full.names = TRUE) - - coord_values <- read.csv(coord_file, header = has_header) + coord_values <- nanoparquet::read_parquet(coord_file) + } else { + coord_file <- dir(spatial_dir, + pattern = tissue_pos_name, full.names = TRUE) + coord_values <- read.csv(coord_file, header = has_header) + } coord_values <- coord_values[,c(1,5,6)] coord_values[,2:3] <- coord_values[,2:3]*scale_factor names(coord_values) <- c("barcode","y","x") diff --git a/man/load10XExpr.Rd b/man/load10XExpr.Rd index 30fddd0..d60e17d 100755 --- a/man/load10XExpr.Rd +++ b/man/load10XExpr.Rd @@ -5,12 +5,18 @@ \title{load10XExpr load 10X Visium Expression Data} \usage{ -load10XExpr(visiumDir = NULL, h5filename = "filtered_feature_bc_matrix.h5") +load10XExpr( + visiumDir = NULL, + h5filename = "filtered_feature_bc_matrix.h5", + version = NULL +) } \arguments{ \item{visiumDir}{A string path to the h5 file with expression information.} \item{h5filename}{A string of the name of the h5 file in the directory.} + +\item{version}{A string specifying the version of the spaceranger data.} } \value{ A matrix of class dgeMatrix or Matrix that contains the expression From 2165ae0845705a53d8669271f9db4556fdbb94bc Mon Sep 17 00:00:00 2001 From: orian116 Date: Mon, 14 Oct 2024 19:13:34 -0400 Subject: [PATCH 5/6] Updated load10XCoords to be able to read tissue positions from VisiumHD which are stored in .parquet files. Added the nanoparquet function to depends in DESCRIPTION as well as updated package version. Updated NEWS.md. --- NEWS.md | 2 +- R/preprocessing.R | 9 ++------- man/load10XExpr.Rd | 8 +------- 3 files changed, 4 insertions(+), 15 deletions(-) diff --git a/NEWS.md b/NEWS.md index c89919a..6c9d62c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,7 +5,7 @@ magnitude from file or from the user * Enabled includeSelf = TRUE in getInteractingGenes.R to improve hotspot detection -* Enabled preprocessing.R to read expression and coordinates from VisiumHD +* Enabled load10XCoords to read coordinates from VisiumHD directory # SpaceMarkers 1.1.3 diff --git a/R/preprocessing.R b/R/preprocessing.R index c3ec178..a797287 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -22,7 +22,6 @@ #' #' @param visiumDir A string path to the h5 file with expression information. #' @param h5filename A string of the name of the h5 file in the directory. -#' @param version A string specifying the version of the spaceranger data. #' @return A matrix of class dgeMatrix or Matrix that contains the expression #' info for each sample (cells) across multiple features (genes) #' @examples @@ -41,12 +40,8 @@ #' load10XExpr<- function(visiumDir=NULL, - h5filename='filtered_feature_bc_matrix.h5', - version = NULL){ - if (version == "HD"){ - message("Assuming VisiumHD with 008um resolution as default") - visiumDir <- file.path(visiumDir,"binned_outputs/square_008um") - } + h5filename='filtered_feature_bc_matrix.h5'){ + h5FilePath <- dir(path = visiumDir,pattern = h5filename,full.names = TRUE) hf <- hdf5r::h5file(filename = h5FilePath, mode='r') mat <- names(hf) diff --git a/man/load10XExpr.Rd b/man/load10XExpr.Rd index d60e17d..30fddd0 100755 --- a/man/load10XExpr.Rd +++ b/man/load10XExpr.Rd @@ -5,18 +5,12 @@ \title{load10XExpr load 10X Visium Expression Data} \usage{ -load10XExpr( - visiumDir = NULL, - h5filename = "filtered_feature_bc_matrix.h5", - version = NULL -) +load10XExpr(visiumDir = NULL, h5filename = "filtered_feature_bc_matrix.h5") } \arguments{ \item{visiumDir}{A string path to the h5 file with expression information.} \item{h5filename}{A string of the name of the h5 file in the directory.} - -\item{version}{A string specifying the version of the spaceranger data.} } \value{ A matrix of class dgeMatrix or Matrix that contains the expression From e2b2c335aa39c2a067813333437ead8b55106ad5 Mon Sep 17 00:00:00 2001 From: orian116 Date: Mon, 14 Oct 2024 19:17:35 -0400 Subject: [PATCH 6/6] Updated load10XCoords to be able to read tissue positions from VisiumHD which are stored in .parquet files. Added the nanoparquet function to depends in DESCRIPTION as well as updated package version. Updated NEWS.md. NB: load10XExpr does not need to be updated explicitly --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 6c9d62c..2d60f9d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,5 @@ # SpaceMarkers 1.1.4 + * Updated SpaceMarkersMetric by fixing signage and log transformed to scale magnitude * Added getSpatialParameters_External which enables getting spatial parameters