-
Notifications
You must be signed in to change notification settings - Fork 4
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Hotspots and help(SpaceMarkersFunction) #48
base: main
Are you sure you want to change the base?
Changes from all commits
beb9598
223b6ea
8702519
c5f07bf
d2f8a78
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,75 @@ | ||
## author: Atul Deshpande | ||
## email: [email protected] | ||
|
||
find_pattern_hotspots <- function( | ||
#=================== | ||
#' @title Calculate influence of spatially varying features | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Identify hotspots of spatial pattern influence |
||
#' @description This function calculates 'hotspots' which are regions of high | ||
#' spatial influence based on an outlier threshold from a null distribution. | ||
#' @export | ||
#' @family getIntGenes | ||
#' @param spPatterns A data frame that contains the spatial coordinates | ||
#' and metrics for spatial features (cell types/cell processes). The column | ||
#' names must include 'x' and 'y' as well as the spatially varying features. | ||
#' @param params a named vector of the optimal sigma and threshold for a | ||
#' given spatial pattern. The names are should be 'sigmaOpt' and 'threshOpt'. | ||
#' The default value is NULL. | ||
#' @param patternName a character string that specifies the pattern of | ||
#' interest | ||
#' @param outlier a character string specifying whether to apply the | ||
#' outlier threshold to the kernel density distribution in a one-sided manner | ||
#' (specify 'positive' the default) or in a two sided manner (specify | ||
#' 'two.sided'). | ||
#' @param nullSamples a numeric values specifying the number of spatial patterns | ||
#' to randomly sample for a null distribution. | ||
#' @param includeSelf a logic value specifying whether to consider the | ||
#' spatial influence the pattern has on surrounding regions only (set to FALSE), | ||
#' or whether to also consider the influence of the pattern itself (set to TRUE | ||
#' , the default). | ||
#' @param ... Arguments passed to methods | ||
#' @return a character vector with the spatial feature name if the spatial | ||
#' influence exceeded the threshold for that spot/cell, and NA otherwise | ||
#' @examples | ||
#' library(SpaceMarkers) | ||
#' #Visium data links | ||
#' urls <- read.csv(system.file("extdata","visium_data.txt", | ||
#' package="SpaceMarkers",mustWork = TRUE)) | ||
#' sp_url <- urls[["visium_url"]][2] | ||
#' #Remove present Directories if any | ||
#' unlink(basename(sp_url)) | ||
#' unlink("spatial", recursive = TRUE) | ||
#' #Obtaining CoGAPS Patterns i.e Spatial Features | ||
#' cogaps_result <- readRDS(system.file("extdata","CoGAPS_result.rds", | ||
#' package="SpaceMarkers",mustWork = TRUE)) | ||
#' spFeatures <- slot(cogaps_result,"sampleFactors") | ||
#' #Obtaining Spatial Coordinates | ||
#' download.file(sp_url, basename(sp_url), mode = "wb") | ||
#' untar(basename(sp_url)) | ||
#' spCoords <- load10XCoords(visiumDir = ".", version = "1.0") | ||
#' rownames(spCoords) <- spCoords$barcode | ||
#' #Match Dimensions | ||
#' barcodes <- intersect(rownames(spFeatures),spCoords$barcode) | ||
#' spCoords <- spCoords[barcodes,] | ||
#' spFeatures <- spFeatures[barcodes,] | ||
#' spPatterns <- cbind(spCoords,spFeatures[barcodes,]) | ||
#' spPatterns<-spPatterns[c("barcode","y","x","Pattern_1","Pattern_5")] | ||
#' data("optParams") | ||
#' hotspots <- findPatternHotspots( | ||
#' spPatterns = spPatterns, | ||
#' patternName = "Pattern_1", | ||
#' params = optParams[,"Pattern_1"], | ||
#' outlier = "positive",nullSamples = 100,includeSelf = TRUE) | ||
#' #Remove present Directories if any | ||
#' unlink(basename(sp_url)) | ||
#' unlink("spatial", recursive = TRUE) | ||
#' | ||
|
||
findPatternHotspots <- function( | ||
spPatterns, params = NULL, patternName = "Pattern_1", | ||
outlier = "positive", | ||
nullSamples = 100, includeSelf = TRUE,...){ | ||
if (is.null(params)){ | ||
sigmaPair <- 10 | ||
kernelthreshold <- 3 | ||
kernelthreshold <- 4 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just curious, why not 5? |
||
} else { | ||
sigmaPair <- params["sigmaOpt"] | ||
kernelthreshold <- params["threshOpt"] | ||
|
@@ -106,9 +168,8 @@ getSpaceMarkersMetric <- function(interacting.genes){ | |
|
||
|
||
#=================== | ||
#' getInteractingGenes | ||
#' Calculate Interaction Regions and Associated Genes | ||
#' This function calculates statistically significant genes using a | ||
#' @title Calculate Interaction Regions and Associated Genes | ||
#' @description This function calculates statistically significant genes using a | ||
#' non-parametric Kruskal-Wallis test for genes in any one region | ||
#' of influence and a post hoc Dunn's test is used for analysis of | ||
#' genes between regions. | ||
|
@@ -118,17 +179,13 @@ getSpaceMarkersMetric <- function(interacting.genes){ | |
#' @param reconstruction reconstruction of the data matrix from latent | ||
#' spaces. Required for "residual" mode. | ||
#' @param spPatterns A data frame that contains the spatial coordinates | ||
#' for each cell type. The column names must include 'x' and 'y' as well as a | ||
#' set of numbered columns named 'Pattern_1.....N'. | ||
#' @param optParams a matrix with dimensions 2 X N, | ||
#' where N is the number | ||
#' of patterns with optimal parameters for outlier | ||
#' detection calculated from function getSpatialParameters(). The first row | ||
#' contains the kernel width sigmaOpt for each | ||
#' pattern, and the second row is the threshOpt (outlier threshold) for each | ||
#' pattern. Users can also input their | ||
#' preferred param values. | ||
#' The default value is NULL. | ||
#' and metrics for spatial features (cell types/cell processes). The column | ||
#' names must include 'x' and 'y' as well as the spatially varying features. | ||
#' @param optParams a matrix with dimensions 2 X N, where N is the number | ||
#' of spatial patterns with optimal parameters. The first row contains the | ||
#' kernel width 'sigmaOpt' for each pattern, and the second row is the | ||
#' threshOpt (outlier threshold) for each pattern. Users can also input their | ||
#' preferred param values. The default value is NULL. | ||
#' @param refPattern a character string that specifies the pattern whose | ||
#' "interaction" with every other pattern we want | ||
#' to study. The default value is "Pattern_1". | ||
|
@@ -145,7 +202,6 @@ getSpaceMarkersMetric <- function(interacting.genes){ | |
#' and "overlap". In enrichment mode, all genes are returned, ranked by | ||
#' the SpaceMarkers metric. In overlap mode, only the genes which are | ||
#' significantly overexpressed in the interaction region are returned. | ||
#' | ||
#' @param ... Arguments passed to methods | ||
#' @return a list of data frames with information about the interacting genes | ||
#' of the refPattern and each latent feature pattern matrix | ||
|
@@ -226,7 +282,7 @@ getInteractingGenes <- function(data, spPatterns, refPattern="Pattern_1", | |
hotspots <- c() | ||
for (patternName in pattList) { | ||
hotspots <- cbind( | ||
hotspots,find_pattern_hotspots( | ||
hotspots,findPatternHotspots( | ||
spPatterns=spPatterns,patternName=patternName, | ||
params=optParams[,patternName],outlier = "positive") ) | ||
} | ||
|
@@ -428,10 +484,9 @@ getPatternPairs <- function(patternList, patternPairs) { | |
else | ||
stop("patternPairs must either be a 2-column matrix | ||
or a list of vectors.") | ||
|
||
mess <- paste0(setdiff(patternPairs, patternList)," ") | ||
if (!all(patternPairs %in% patternList)) | ||
stop("Following are not pattern names: ", | ||
paste0(setdiff(patternPairs, patternList)," ")) | ||
stop("Following are not pattern names: ",mess ) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. stop mess :) (just noticing, no call to action) |
||
} | ||
return(patternPairs) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -74,22 +74,20 @@ getOptimalSigmaThresh <- function(pattern, locs, sigVec, threshVec,...){ | |
return(data.frame( | ||
sigmaOpt=smallsigVec[sigOpt1_ind],threshOpt=threshVec[threshOpt1_ind])) | ||
} | ||
|
||
#=================== | ||
#' getSpatialParameters | ||
#' Calculate the Optimal Parameters for Interacting Cells | ||
#' | ||
#' This function calculates the optimal width of the gaussian distribution | ||
#' (sigmaOpt) as well as the outlier threshold around the set of spots | ||
#' (thresOpt) for each pattern from a latent feature space. | ||
#' | ||
#' Calculate the optimal parameters from spatial kernel density for | ||
#' cell-cell interactions | ||
#' @description This function uses Morans.I to calculate the optimal width of | ||
#' the kernel density (sigmaOpt) as well as the outlier threshold around the set | ||
#' of spots (threshOpt) for a null distribution. | ||
#' @export | ||
#' | ||
#' @param spatialPatterns A data frame that contains the spatial coordinates | ||
#' for each cell type. The column names must include 'x' and 'y' as well as a | ||
#' set of numbered columns named 'Pattern_1.....N'. | ||
#' @param ... Arguments passed to methods | ||
#' @return a numeric matrix of sigmaOpts - the optimal width of the gaussian | ||
#' distribution, and the thresOpt - outlier threshold around the set of spots | ||
#' distribution, and the threshOpt - outlier threshold around the set of spots | ||
#' for each pattern | ||
#' @examples | ||
#' library(SpaceMarkers) | ||
|
@@ -107,30 +105,28 @@ getOptimalSigmaThresh <- function(pattern, locs, sigVec, threshVec,...){ | |
#' # Call the getSpatialParameters function with the test data | ||
#' optParams <- getSpatialParameters(spPatterns) | ||
#' | ||
|
||
getSpatialParameters <- function(spatialPatterns,...){ | ||
good_gene_threshold <- 3; | ||
.Deprecated(new = "getSpatialParamsExternal") | ||
sigmaRes <- max(floor(min(diff(range(spatialPatterns$x)), | ||
diff(range(spatialPatterns$y)))/250),1) | ||
sigVec <- seq(2,40*sigmaRes,sigmaRes) | ||
threshVec <- seq(1,3,0.1) | ||
patternList <- setdiff(colnames(spatialPatterns),c("barcode","x","y")) | ||
optParams<-vapply(patternList,function(i) unlist(getOptimalSigmaThresh( | ||
pattern=spatialPatterns[,i],locs=data.frame( | ||
x=spatialPatterns$x,y=spatialPatterns$y),sigVec,threshVec)),numeric(2)) | ||
x=spatialPatterns$x,y=spatialPatterns$y),sigVec,threshVec,...)),numeric(2)) | ||
return(optParams) | ||
} | ||
|
||
#=================== | ||
#' getSpatialParametersExternal | ||
#' Manually obtain Optimal Parameters for Interacting cells | ||
#' | ||
#' This function calculates obtains a specified width of a distribution | ||
#' (sigmaOpt) as well as the outlier threshold around the set of spots | ||
#' (thresOpt) for each pattern from a latent feature space. | ||
#' Read optimal parameters for spatial kernel density from user input or .json | ||
#' file | ||
#' | ||
#' @description This function obtains the width of a spatial kernel density | ||
#' (sigma) from either the user input or from a scale factors .json file. | ||
#' The outlier threshold around the set of spots (threshold) for each pattern is | ||
#' specified by the user (default is 3). | ||
#' @export | ||
#' | ||
#' @param spatialPatterns A data frame that contains the spatial coordinates | ||
#' for each cell type. The column names must include 'x' and 'y' as well as a | ||
#' set of numbered columns named 'Pattern_1.....N'. | ||
|
@@ -139,10 +135,12 @@ getSpatialParameters <- function(spatialPatterns,...){ | |
#' @param spatialDir A string path specifying the location of the spatial folder | ||
#' containing the .json file of the spot characteristics | ||
#' @param pattern A string specifying the name of the .json file | ||
#' @param spotDiameter A numeric value specifying your desired sigma | ||
#' @param sigma A numeric value specifying your desired sigma | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd rather note what sigma is and how it affects the result instead of kind of obvious "A numeric value specifying your desired sigma" |
||
#' @param threshold A numeric value specifying your hotspot threshold | ||
#' @param resolution A string specifying image resolution | ||
#' @param ... Arguments passed to methods | ||
#' @return a numeric matrix of sigmaOpts - the optimal width of the gaussian | ||
#' distribution, and the thresOpt - outlier threshold around the set of spots | ||
#' distribution, and the threshOpt - outlier threshold around the set of spots | ||
#' for each pattern | ||
#' @examples | ||
#' library(SpaceMarkers) | ||
|
@@ -157,29 +155,50 @@ getSpatialParameters <- function(spatialPatterns,...){ | |
#' x = runif(test_num, min=0, max=test_num), | ||
#' Pattern_1 = runif(test_num, min=0, max=1), | ||
#' Pattern_2 = runif(test_num, min=0, max=1) ) | ||
#' # Call the getSpatialParameters function with the test data | ||
#' optParams <- getSpatialParametersExternal(spPatterns, spotDiameter = 10) | ||
#' # Call the getSpatialParamsExternal function with the test data | ||
#' optParams <- getSpatialParamsExternal(spPatterns, sigma = 10) | ||
#' | ||
|
||
getSpatialParametersExternal <- function(spatialPatterns,visiumDir = ".", | ||
getSpatialParamsExternal <- function(spatialPatterns,visiumDir = ".", | ||
spatialDir ="spatial", | ||
pattern = "scalefactors_json.json", | ||
spotDiameter = NULL,threshold = 3) { | ||
sigma = NULL,threshold = 3, | ||
resolution = | ||
c("lowres","hires","fullres"), ...) { | ||
patternList <- setdiff(colnames(spatialPatterns),c("barcode","x","y")) | ||
if (!is.null(spotDiameter)) { | ||
sigmaOpt <- spotDiameter | ||
if (!is.null(sigma)) { | ||
sigmaOpt <- sigma | ||
threshOpt <- threshold | ||
} else if (is.null(spotDiameter)) { | ||
message("Assuming Visium folder with .json file and spot diamater exists") | ||
scale_values <- jsonlite::read_json(file.path(visiumDir,spatialDir, | ||
} else if (is.null(sigma) & | ||
file.exists(file.path(visiumDir,spatialDir,pattern))) { | ||
message("Reading spot diameter from specified .json file") | ||
scale_values <- jsonlite::read_json(file.path(visiumDir,spatialDir, | ||
pattern)) | ||
sigmaOpt <- scale_values$spot_diameter_fullres | ||
if ("lowres" %in% resolution){ | ||
resolution <- "lowres" | ||
} else if ("hires" %in% resolution){ | ||
resolution <- "hires" | ||
} else if ("fullres" %in% resolution){ | ||
resolution <- "fullres" | ||
} else { | ||
stop("Resolution argument not recognized. | ||
Please supply either lowres, hires or fullres.") | ||
} | ||
|
||
threshOpt <- threshold | ||
sigmaOpt <- as.numeric(scale_values$spot_diameter_fullres) | ||
if (resolution != "fullres") { | ||
scale_factor <- scale_values[grepl(resolution, | ||
names(scale_values))][[1]] | ||
sigmaOpt <- sigmaOpt * as.numeric(scale_factor) | ||
} | ||
|
||
|
||
} else { | ||
stop("Please specify the spot diameter or correct path to .json") | ||
stop("Please specify the sigma or correct path to .json") | ||
} | ||
optParams <-matrix(c(sigmaOpt,threshOpt),nrow = 2,ncol = length(patternList)) | ||
optParams <-matrix(c(sigmaOpt,threshOpt),nrow = 2, | ||
ncol = length(patternList)) | ||
colnames(optParams) <- patternList | ||
rownames(optParams) <- c("sigmaOpt","threshOpt") | ||
return(optParams) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Great, it's comforting to see we're getting back to harmonized fun names