diff --git a/DESCRIPTION b/DESCRIPTION index e5f1fe7..3850642 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: SpaceMarkers Title: Spatial Interaction Markers -Version: 1.1.3 +Version: 1.1.4 Authors@R: c( person(given = "Atul", family = "Deshpande", email = "adeshpande@jhu.edu", role = c("aut", "cre"), comment = c(ORCID="0000-0001-5144-6924")), person(given = "Ludmila", family = "Danilova", email = "ldanilo1@jhmi.edu", role = "ctb"), @@ -26,6 +26,7 @@ Imports: spatstat.geom, ape, hdf5r, + nanoparquet, jsonlite, Matrix, qvalue, diff --git a/NAMESPACE b/NAMESPACE index a73c9b5..0086a24 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ export(getInteractingGenes) export(getPairwiseInteractingGenes) export(getSpatialFeatures) export(getSpatialParameters) +export(getSpatialParametersExternal) export(load10XCoords) export(load10XExpr) importFrom(Matrix,sparseMatrix) @@ -13,6 +14,7 @@ importFrom(jsonlite,read_json) importFrom(matrixStats,count) importFrom(matrixTests,row_kruskalwallis) importFrom(methods,slot) +importFrom(nanoparquet,read_parquet) importFrom(qvalue,qvalue) importFrom(rstatix,filter) importFrom(spatstat.explore,Smooth) diff --git a/NEWS.md b/NEWS.md index c667b0e..c89919a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,12 @@ +# SpaceMarkers 1.1.4 +* Updated SpaceMarkersMetric by fixing signage and log transformed to scale +magnitude +* Added getSpatialParameters_External which enables getting spatial parameters +from file or from the user +* Enabled includeSelf = TRUE in getInteractingGenes.R to improve hotspot +detection +* Enabled preprocessing.R to read expression and coordinates from VisiumHD + # SpaceMarkers 1.1.3 * Optimized the long running row.dunn.test() function diff --git a/R/getInteractingGenes.R b/R/getInteractingGenes.R index d22aed3..fb1ac41 100644 --- a/R/getInteractingGenes.R +++ b/R/getInteractingGenes.R @@ -4,10 +4,10 @@ find_pattern_hotspots <- function( spPatterns, params = NULL, patternName = "Pattern_1", outlier = "positive", - nullSamples = 100,...){ + nullSamples = 100, includeSelf = TRUE,...){ if (is.null(params)){ sigmaPair <- 10 - kernelthreshold <- 2 + kernelthreshold <- 3 } else { sigmaPair <- params["sigmaOpt"] kernelthreshold <- params["threshOpt"] @@ -21,6 +21,11 @@ find_pattern_hotspots <- function( x=spPatterns$x,y = spPatterns$y, window = allwin,marks = patternVector) Kact1 <- spatstat.explore::Smooth( X, at = "points", sigma = sigmaPair[1], ...) + if (includeSelf == TRUE){ + Kact1 <- spPatterns[,patternName] + Kact1 + } else { + Kact1 <- Kact1 + } Karr1 <- vapply(seq(1,nullSamples),function(i){ Xr<-X; spatstat.geom::marks(Xr) <- sample(spatstat.geom::marks(X)); @@ -83,12 +88,12 @@ getSpaceMarkersMetric <- function(interacting.genes){ for (i in seq(1,length(interacting_genes))) { if (all(dim(interacting_genes[[i]])>1)) { - Zsign <- (2*(-1+((interacting_genes[[i]]$Dunn.zP1_Int<0)| + Zsign <- (2*(-1+((interacting_genes[[i]]$Dunn.zP1_Int<0)& (interacting_genes[[i]]$Dunn.zP2_Int<0))*1)+1) - Zmag <- (interacting_genes[[i]]$Dunn.zP1_Int)* + Zmag <- abs((interacting_genes[[i]]$Dunn.zP1_Int)* (interacting_genes[[i]]$Dunn.zP2_Int)/ - (pmax(abs(interacting_genes[[i]]$Dunn.zP2_P1),1)) - interacting_genes[[i]]$SpaceMarkersMetric <- Zsign*Zmag + (pmax(abs(interacting_genes[[i]]$Dunn.zP2_P1),1))) + interacting_genes[[i]]$SpaceMarkersMetric <- Zsign*log2(Zmag+1) od <- order( interacting_genes[[i]]$SpaceMarkersMetric,decreasing=TRUE) interacting_genes[[i]] <- interacting_genes[[i]][od,] diff --git a/R/getSpatialParameters.R b/R/getSpatialParameters.R index 2331260..f8989b6 100644 --- a/R/getSpatialParameters.R +++ b/R/getSpatialParameters.R @@ -121,3 +121,67 @@ getSpatialParameters <- function(spatialPatterns,...){ return(optParams) } +#=================== +#' getSpatialParametersExternal +#' Manually obtain Optimal Parameters for Interacting cells +#' +#' This function calculates obtains a specified width of a distribution +#' (sigmaOpt) as well as the outlier threshold around the set of spots +#' (thresOpt) for each pattern from a latent feature space. +#' +#' @export +#' +#' @param spatialPatterns A data frame that contains the spatial coordinates +#' for each cell type. The column names must include 'x' and 'y' as well as a +#' set of numbered columns named 'Pattern_1.....N'. +#' @param visiumDir A string path specifying the location of the 10xVisium +#' directory +#' @param spatial A string path specifying the location of the spatial folder +#' containing the .json file of the spot characteristics +#' @param pattern A string specifying the name of the .json file +#' @param spotDiameter A numeric value specifying your desired sigma +#' @param threshold A numeric value specifying your hotspot threshold +#' @return a numeric matrix of sigmaOpts - the optimal width of the gaussian +#' distribution, and the thresOpt - outlier threshold around the set of spots +#' for each pattern +#' @examples +#' library(SpaceMarkers) +#' # Create test data +#' cells <- c() +#' test_num <- 500 +#' for(i in 1:test_num){ +#' cells[length(cells)+1] <- paste0("cell_",i) +#' } +#' spPatterns <- data.frame(barcode = cells, +#' y = runif(test_num, min=0, max=test_num), +#' x = runif(test_num, min=0, max=test_num), +#' Pattern_1 = runif(test_num, min=0, max=1), +#' Pattern_2 = runif(test_num, min=0, max=1) ) +#' # Call the getSpatialParameters function with the test data +#' optParams <- getSpatialParametersExternal(spPatterns, spotDiameter = 10) +#' + +getSpatialParametersExternal <- function(spatialPatterns,visiumDir = ".", + spatialDir ="spatial", + pattern = "scalefactors_json.json", + spotDiameter = NULL,threshold = 3) { + patternList <- setdiff(colnames(spatialPatterns),c("barcode","x","y")) + if (!is.null(spotDiameter)) { + sigmaOpt <- spotDiameter + threshOpt <- threshold + } else if (is.null(spotDiameter)) { + message("Assuming Visium folder with .json file and spot diamater exists") + scale_json <- dir(paste0(visiumDir,"/",spatialDir), + pattern = pattern,full.names = TRUE) + scale_values <- jsonlite::read_json(scale_json) + sigmaOpt <- scale_values[[4]] + threshOpt <- threshold + + } else { + stop("Please specify the spot diameter") + } + optParams <-matrix(c(sigmaOpt,threshOpt),nrow = 2,ncol = length(patternList)) + colnames(optParams) <- patternList + rownames(optParams) <- c("sigmaOpt","threshOpt") + return(optParams) +} diff --git a/R/preprocessing.R b/R/preprocessing.R index 40faffd..c3ec178 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -1,5 +1,6 @@ #' @importFrom hdf5r h5file #' @importFrom jsonlite read_json +#' @importFrom nanoparquet read_parquet #' @importFrom utils read.csv #' @importFrom Matrix sparseMatrix #' @importFrom methods slot @@ -21,6 +22,7 @@ #' #' @param visiumDir A string path to the h5 file with expression information. #' @param h5filename A string of the name of the h5 file in the directory. +#' @param version A string specifying the version of the spaceranger data. #' @return A matrix of class dgeMatrix or Matrix that contains the expression #' info for each sample (cells) across multiple features (genes) #' @examples @@ -39,7 +41,12 @@ #' load10XExpr<- function(visiumDir=NULL, - h5filename='filtered_feature_bc_matrix.h5'){ + h5filename='filtered_feature_bc_matrix.h5', + version = NULL){ + if (version == "HD"){ + message("Assuming VisiumHD with 008um resolution as default") + visiumDir <- file.path(visiumDir,"binned_outputs/square_008um") + } h5FilePath <- dir(path = visiumDir,pattern = h5filename,full.names = TRUE) hf <- hdf5r::h5file(filename = h5FilePath, mode='r') mat <- names(hf) @@ -103,8 +110,15 @@ load10XCoords <- function(visiumDir, resolution = "lowres", version = NULL){ if("probe_set.csv" %in% dir(visiumDir)){ config_line <- readLines(paste0(visiumDir,"/probe_set.csv"), 1) version <- strsplit(config_line, "=")[[1]][2] + } else if ("tissue_positions.parquet" %in% dir( + visiumDir,"binned_outputs/square_008um/spatial")) { + version <- "HD" + visiumDir <- file.path(visiumDir,"binned_outputs/square_008um") + message(".parquet file found. + Assuming VisiumHD with 008um resolution as default") } else { - message("probe_set.csv not found. Assuming version 1.0.") + message("probe_set.csv or .parquet not found. + Assuming version 1.0.") version <- "1.0" } } @@ -116,15 +130,21 @@ load10XCoords <- function(visiumDir, resolution = "lowres", version = NULL){ has_header <- TRUE tissue_pos_name <- "tissue_positions.csv" } - spatial_dir <- paste0(visiumDir,'/spatial') + spatial_dir <- paste0(visiumDir,"/spatial") scale_json <- dir(spatial_dir, pattern = "scalefactors_json.json",full.names = TRUE) scale_values <- jsonlite::read_json(scale_json) scale_factor <- scale_values[grepl(resolution, names(scale_values))][[1]] - coord_file <- dir(spatial_dir, + if (version == "HD") { + tissue_pos_name <- "tissue_positions.parquet" + coord_file <- dir(spatial_dir, pattern = tissue_pos_name, full.names = TRUE) - - coord_values <- read.csv(coord_file, header = has_header) + coord_values <- nanoparquet::read_parquet(coord_file) + } else { + coord_file <- dir(spatial_dir, + pattern = tissue_pos_name, full.names = TRUE) + coord_values <- read.csv(coord_file, header = has_header) + } coord_values <- coord_values[,c(1,5,6)] coord_values[,2:3] <- coord_values[,2:3]*scale_factor names(coord_values) <- c("barcode","y","x") diff --git a/man/getSpatialParametersExternal.Rd b/man/getSpatialParametersExternal.Rd new file mode 100644 index 0000000..53cbf83 --- /dev/null +++ b/man/getSpatialParametersExternal.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/getSpatialParameters.R +\name{getSpatialParametersExternal} +\alias{getSpatialParametersExternal} +\title{getSpatialParametersExternal +Manually obtain Optimal Parameters for Interacting cells} +\usage{ +getSpatialParametersExternal( + spatialPatterns, + visiumDir = ".", + spatialDir = "spatial", + pattern = "scalefactors_json.json", + spotDiameter = NULL, + threshold = 3 +) +} +\arguments{ +\item{spatialPatterns}{A data frame that contains the spatial coordinates +for each cell type. The column names must include 'x' and 'y' as well as a +set of numbered columns named 'Pattern_1.....N'.} + +\item{visiumDir}{A string path specifying the location of the 10xVisium +directory} + +\item{pattern}{A string specifying the name of the .json file} + +\item{spotDiameter}{A numeric value specifying your desired sigma} + +\item{threshold}{A numeric value specifying your hotspot threshold} + +\item{spatial}{A string path specifying the location of the spatial folder +containing the .json file of the spot characteristics} +} +\value{ +a numeric matrix of sigmaOpts - the optimal width of the gaussian +distribution, and the thresOpt - outlier threshold around the set of spots +for each pattern +} +\description{ +This function calculates obtains a specified width of a distribution +(sigmaOpt) as well as the outlier threshold around the set of spots +(thresOpt) for each pattern from a latent feature space. +} +\examples{ +library(SpaceMarkers) +# Create test data +cells <- c() +test_num <- 500 +for(i in 1:test_num){ + cells[length(cells)+1] <- paste0("cell_",i) +} +spPatterns <- data.frame(barcode = cells, +y = runif(test_num, min=0, max=test_num), +x = runif(test_num, min=0, max=test_num), +Pattern_1 = runif(test_num, min=0, max=1), +Pattern_2 = runif(test_num, min=0, max=1) ) +# Call the getSpatialParameters function with the test data +optParams <- getSpatialParametersExternal(spPatterns, spotDiameter = 10) + +} diff --git a/man/load10XExpr.Rd b/man/load10XExpr.Rd index 30fddd0..d60e17d 100755 --- a/man/load10XExpr.Rd +++ b/man/load10XExpr.Rd @@ -5,12 +5,18 @@ \title{load10XExpr load 10X Visium Expression Data} \usage{ -load10XExpr(visiumDir = NULL, h5filename = "filtered_feature_bc_matrix.h5") +load10XExpr( + visiumDir = NULL, + h5filename = "filtered_feature_bc_matrix.h5", + version = NULL +) } \arguments{ \item{visiumDir}{A string path to the h5 file with expression information.} \item{h5filename}{A string of the name of the h5 file in the directory.} + +\item{version}{A string specifying the version of the spaceranger data.} } \value{ A matrix of class dgeMatrix or Matrix that contains the expression