mod_docstrings5

theislab · Jan 3, 2025 · 62c0697 · 62c0697
1 parent e9f7a9f
commit 62c0697
Show file tree

Hide file tree

Showing 8 changed files with 97 additions and 220 deletions.
diff --git a/src/troutpy/pl/plotting.py b/src/troutpy/pl/plotting.py
@@ -153,10 +153,10 @@ def plot_crosstab(data, xvar: str = '', yvar: str = '', normalize=True, axis=1,
     normalize (bool, optional) Whether to normalize the cross-tabulated data (percentages). If True, the data will be normalized.
     axis (int): The axis to normalize across. Use `1` for row normalization and `0` for column normalization.
     kind (str, optional): The kind of plot to generate. Options include:
-        - 'barh': Horizontal bar plot
-        - 'bar': Vertical bar plot
-        - 'heatmap': Heatmap visualization
-        - 'clustermap': Clustermap visualization
+    - 'barh': Horizontal bar plot
+    - 'bar': Vertical bar plot
+    - 'heatmap': Heatmap visualization
+    - 'clustermap': Clustermap visualization
     save (bool):If True, the plot will be saved to a file.
     figures_path (str, optional): The directory path where the figure should be saved. If not specified, the plot will be saved in the current directory.
     stacked (bool, optional): If True, the bar plots will be stacked. Only applicable for 'barh' and 'bar' plot kinds.

diff --git a/src/troutpy/pp/compute.py b/src/troutpy/pp/compute.py
@@ -45,9 +45,9 @@ def define_extracellular(
     sdata (SpatialData): A spatial data object containing transcriptomic information.
     layer (str): The layer in `sdata.points` containing the transcript data to process.
     method (str):The method to define extracellular transcripts. Options:
-        - 'segmentation_free': Uses segmentation-free clustering results.
-        - 'nuclei': Uses overlap with nuclear annotations to classify extracellular transcripts.
-        - 'cells': Classifies transcripts not assigned to a cell as extracellular.
+    - 'segmentation_free': Uses segmentation-free clustering results.
+    - 'nuclei': Uses overlap with nuclear annotations to classify extracellular transcripts.
+    - 'cells': Classifies transcripts not assigned to a cell as extracellular.
     min_prop_of_extracellular (float, optional): Minimum proportion of transcripts in a cluster required to be extracellular for it to be classified as such (used only with 'segmentation_free' method).
     unassigned_to_cell_tag (str, optional): Tag indicating transcripts not assigned to any cell.
     copy (bool): If True, returns a copy of the updated spatial data. If False, updates the `sdata` object in-place.

diff --git a/src/troutpy/tl/NMF.py b/src/troutpy/tl/NMF.py
@@ -15,24 +15,15 @@ def apply_nmf_to_adata(adata, n_components=20, subsample_percentage=1.0, save=Fa
     Applies Non-Negative Matrix Factorization (NMF) to an AnnData object to reduce the dimensionality of gene expression data.
 
     Parameters:
-    adata : AnnData
-        The AnnData object containing the gene expression matrix (`adata.X`) along with cell and gene annotations.
-    n_components : int, optional, default: 20
-        The number of components (latent factors) to extract from the NMF model.
-    subsample_percentage : float, optional, default: 1.0
-        The percentage of cells to sample before applying NMF. A value of 1.0 means no subsampling.
-    save : bool, optional, default: False
-        If True, the factor loadings (`H`) and factor scores (`W`) will be saved as Parquet files to the specified output path.
-    output_path : str, optional, default: ''
-        The directory where the factor loadings and scores will be saved if `save` is True.
-    random_state : int, optional, default: None
-        The random seed used for initializing the NMF model. If None, the random seed is not fixed.
+    adata (AnnData):The AnnData object containing the gene expression matrix (`adata.X`) along with cell and gene annotations.
+    n_components (int, optional): The number of components (latent factors) to extract from the NMF model.
+    subsample_percentage (float): The percentage of cells to sample before applying NMF. A value of 1.0 means no subsampling.
+    save (bool, optional): If True, the factor loadings (`H`) and factor scores (`W`) will be saved as Parquet files to the specified output path.
+    output_path (str, optional): The directory where the factor loadings and scores will be saved if `save` is True.
+    random_state (int, optional): The random seed used for initializing the NMF model. If None, the random seed is not fixed.
 
     Returns:
-    adata : AnnData
-        The input AnnData object with the NMF results added:
-        - `adata.obsm['W_nmf']` contains the cell factors (factor scores for each cell).
-        - `adata.uns['H_nmf']` contains the gene loadings (factor loadings for each gene).
+    adata (AnnData): The input AnnData object with the NMF results added:`adata.obsm['W_nmf']` contains the cell factors (factor scores for each cell) and `adata.uns['H_nmf']` contains the gene loadings (factor loadings for each gene).
 
     Notes:
     - The NMF algorithm is initialized using a random method for factorization (`init='random'`).
@@ -73,22 +64,14 @@ def nmf(
     """Applies Non-negative Matrix Factorization (NMF) on filtered data based on feature_name and bin_id.
 
     Parameters:
-    - sdata : spatial data object
-        Input spatial data containing transcript and bin data.
-    - layer : str, optional
-        Layer name of the data that contains extracellular transcripts (default: 'extracellular_transcripts_enriched').
-    - feature_key : str, optional
-        Column name for the transcript feature (default: 'feature_name').
-    - bin_key : str, optional
-        Column name for bin IDs (default: 'bin_id').
-    - density_table_key : str, optional
-        Key to retrieve the density table from sdata (default: 'segmentation_free_table').
-    - n_components : int, optional
-        Number of components for NMF (default: 20).
-    - subsample_percentage : float, optional
-        Percentage of data to use for NMF (default: 0.1).
-    - random_state : int, optional
-        Random state for NMF initialization for reproducibility (default: None).
+    - sdata (SpatialData): Input spatial data containing transcript and bin data.
+    - layer (str, optional): Layer name of the data that contains extracellular transcripts (default: 'extracellular_transcripts_enriched').
+    - feature_key (str, optional): Column name for the transcript feature (default: 'feature_name').
+    - bin_key (str, optional): Column name for bin IDs (default: 'bin_id').
+    - density_table_key (str, optional): Key to retrieve the density table from sdata (default: 'segmentation_free_table').
+    - n_components (int, optional): Number of components for NMF (default: 20).
+    - subsample_percentage (float, optional): Percentage of data to use for NMF (default: 0.1).
+    - random_state (int, optional): Random state for NMF initialization for reproducibility (default: None).
 
     Returns:
     - sdata : Updated spatial data object with NMF components stored.

diff --git a/src/troutpy/tl/interactions.py b/src/troutpy/tl/interactions.py
@@ -54,7 +54,6 @@ def get_number_of_communication_genes(
     number_interactions_df.index.name='Source cell type'  
     return number_interactions_df
 
-
 def get_gene_interaction_strength(
     source_proportions: pd.DataFrame,  # gene by source cell type
     target_proportions: pd.DataFrame,  # gene by target cell type
@@ -68,30 +67,16 @@ def get_gene_interaction_strength(
     This function calculates the interaction strength between source and target cell types for a specified gene by multiplying the proportions of the gene in the source and target cell types. The interaction matrix can be visualized using a chord diagram, with the option to save the resulting plot.
 
     Parameters:
-    - source_proportions : pd.DataFrame
-        A DataFrame where rows represent genes and columns represent source cell types. Each value indicates the proportion of the gene in the respective source cell type.
-
-    - target_proportions : pd.DataFrame
-        A DataFrame where rows represent genes and columns represent target cell types. Each value indicates the proportion of the gene in the respective target cell type.
-
-    - gene_symbol : str, optional
-        The gene symbol for which the interaction strength is to be computed and visualized (default: '').
-
-    - return_interactions : bool, optional
-        If True, returns the interaction matrix as a NumPy array (default: False).
-
-    - save : bool, optional
-        If True, saves the chord diagram plot to the specified output path (default: False).
-
-   - output_path : str, optional
-        The directory path where the plot will be saved. If `save=True`, this path will be used to store the file (default: ''). A 'figures' subdirectory is created if it doesn't exist.
-
-    - format : str, optional
-        The file format for saving the plot (e.g., 'pdf', 'png'). This is used only if `save=True` (default: 'pdf').
+    - source_proportions (pd.DataFrame): A DataFrame where rows represent genes and columns represent source cell types. Each value indicates the proportion of the gene in the respective source cell type.
+    - target_proportions (pd.DataFrame): A DataFrame where rows represent genes and columns represent target cell types. Each value indicates the proportion of the gene in the respective target cell type.
+    - gene_symbol (str, optional): The gene symbol for which the interaction strength is to be computed and visualized (default: '').
+    - return_interactions (bool, optional): If True, returns the interaction matrix as a NumPy array (default: False).
+    - save (bool, optional): If True, saves the chord diagram plot to the specified output path (default: False).
+    - output_path (str, optional): The directory path where the plot will be saved. If `save=True`, this path will be used to store the file (default: ''). A 'figures' subdirectory is created if it doesn't exist.
+    - format (str, optional): The file format for saving the plot (e.g., 'pdf', 'png'). This is used only if `save=True` (default: 'pdf').
 
     Returns:
     - None or np.ndarray
-        If `return_interactions=True`, the function returns the interaction matrix as a NumPy array. Otherwise, the function generates a chord diagram plot.
 
     Notes:
     - The function computes the interaction matrix by multiplying the proportions of the gene in the source and target cell types.

diff --git a/src/troutpy/tl/quantify_xrna.py b/src/troutpy/tl/quantify_xrna.py
@@ -26,26 +26,17 @@ def spatial_variability(
     Computes spatial variability of extracellular RNA using Moran's I.
 
     Parameters:
-    - sdata : SpatialData
-        The spatial transcriptomics dataset in SpatialData format.
-    - coords_keys : list of str, optional
-        The keys for spatial coordinates in the dataset (default: ['x', 'y']).
-    - gene_id_key : str, optional
-        The key for gene identifiers in the dataset (default: 'feature_name').
-    - n_neighbors : int, optional
-        Number of neighbors to use for computing spatial neighbors (default: 10).
-    - resolution : int, optional
-        The resolution for kernel density estimation (default: 1000).
-    - binsize : int, optional
-        The binsize for kernel density estimation (default: 20).
-    - n_threads : int, optional
-        The number of threads for LazyKDE processing (default: 1).
-    - spatial_autocorr_mode : str, optional
-        The mode for spatial autocorrelation computation (default: "moran").
+    - sdata (SpatialData): The spatial transcriptomics dataset in SpatialData format.
+    - coords_keys (list of str): The keys for spatial coordinates in the dataset (default: ['x', 'y']).
+    - gene_id_key (str, optional): The key for gene identifiers in the dataset (default: 'feature_name').
+    - n_neighbors (int, optional): Number of neighbors to use for computing spatial neighbors (default: 10).
+    - resolution (int, optional): The resolution for kernel density estimation (default: 1000).
+    - binsize (int, optional): The binsize for kernel density estimation (default: 20).
+    - n_threads (int, optional): The number of threads for LazyKDE processing (default: 1).
+    - spatial_autocorr_mode (str, optional): The mode for spatial autocorrelation computation (default: "moran").
 
     Returns:
-    pd.DataFrame
-        A DataFrame containing Moran's I values for each gene, indexed by gene names.
+    - sdata(SpatialData):Sdata containing Moran's I values for each gene, indexed by gene names.
     """
     # Step 1: Extract and preprocess data
     data = sdata.points['transcripts'][coords_keys + ['extracellular', gene_id_key]].compute()
@@ -109,25 +100,13 @@ def create_xrna_metadata(
     Creates a new table within the SpatialData object that contains a 'gene' column with the unique gene names extracted from the specified points layer.
 
     Parameters:
-    - sdata : SpatialData
-        The SpatialData object to modify.
-    - points_layer : str, optional
-        The name of the layer in `sdata.points` from which to extract gene names. Default is 'transcripts'.
-    - gene_key : str, optional
-        The key in the `points_layer` dataframe that contains the gene names.Default is 'feature_name'.
-    - copy : bool, optional
-        - If `True`, returns a copy of the `SpatialData` object with the new table added.
-        - If `False`, modifies the original `SpatialData` object in place. Default is `False`.
+    - sdata (SpatialData): The SpatialData object to modify.
+    - points_layer (str, optional): The name of the layer in `sdata.points` from which to extract gene names. Default is 'transcripts'.
+    - gene_key (str, optional): The key in the `points_layer` dataframe that contains the gene names.Default is 'feature_name'.
+    - copy : If `True`, returns a copy of the `SpatialData` object with the new table added.
 
     Returns:
-    - SpatialData | None
-        If `copy` is `True`, returns a copy of the modified `SpatialData` object. Otherwise, returns `None`.
-
-    Raises:
-    ValueError
-        - If the specified points layer does not exist in `sdata.points`.
-        - If the `gene_key` column is not present in the specified points layer.
-
+    - SpatialData | None: If `copy` is `True`, returns a copy of the modified `SpatialData` object. Otherwise, returns `None`.
     """
     # Check if the specified points layer exists
     if points_layer not in sdata.points:
@@ -270,26 +249,17 @@ def spatial_colocalization(
     Computes spatial variability of extracellular RNA using Moran's I.
 
     Parameters:
-    - sdata : SpatialData
-        The spatial transcriptomics dataset in SpatialData format.
-    - coords_keys : list of str, optional
-        The keys for spatial coordinates in the dataset (default: ['x', 'y']).
-    - gene_id_key : str, optional
-        The key for gene identifiers in the dataset (default: 'feature_name').
-    - n_neighbors : int, optional
-        Number of neighbors to use for computing spatial neighbors (default: 10).
-    - resolution : int, optional
-        The resolution for kernel density estimation (default: 1000).
-    - binsize : int, optional
-        The binsize for kernel density estimation (default: 20).
-    - n_threads : int, optional
-        The number of threads for LazyKDE processing (default: 1).
-    - spatial_autocorr_mode : str, optional
-        The mode for spatial autocorrelation computation (default: "moran").
+    - sdata (SpatialData): The spatial transcriptomics dataset in SpatialData format.
+    - coords_keys (list of str, optional): The keys for spatial coordinates in the dataset (default: ['x', 'y']).
+    - gene_id_key (str, optional): The key for gene identifiers in the dataset (default: 'feature_name').
+    - n_neighbors (int, optional): Number of neighbors to use for computing spatial neighbors (default: 10).
+    - resolution (int, optional): The resolution for kernel density estimation (default: 1000).
+    - binsize (int, optional): The binsize for kernel density estimation (default: 20).
+    - n_threads (int, optional): The number of threads for LazyKDE processing (default: 1).
+    - spatial_autocorr_mode (str, optional): The mode for spatial autocorrelation computation (default: "moran").
 
     Returns:
-    - pd.DataFrame
-        A DataFrame containing Moran's I values for each gene, indexed by gene names.
+    - sdata(SpatialData): A DataFrame containing Moran's I values for each gene, indexed by gene names.
     """
     # Step 1: Extract and preprocess data
     data = sdata.points['transcripts'][coords_keys + ['extracellular', gene_id_key]].compute()

diff --git a/src/troutpy/tl/segmentation_free.py b/src/troutpy/tl/segmentation_free.py
@@ -18,31 +18,22 @@ def segmentation_free_clustering(
     This function clusters transcriptomic data without relying on pre-defined cell or tissue segmentations.It supports multiple clustering methods, with Points2Regions being the default.
 
     Parameters:
-    - sdata : SpatialData
-        A spatial data object containing transcriptomic information.
-    - params : dict, optional (default: {})
-        A dictionary of parameters for the selected clustering method.For `points2regions`:
-            - 'num_clusters' (int): Number of clusters (default: 300).
-            - 'pixel_width' (float): Pixel width parameter (default: 0.4).
-            - 'pixel_smoothing' (float): Pixel smoothing parameter (default: 3.5).
-    - x : str, optional (default: 'x')
-        Column name for the x-coordinates of transcripts.
-    - y : str, optional (default: 'y')
-        Column name for the y-coordinates of transcripts.
-    - feature_name : str, optional (default: 'feature_name')
-        Column name for the feature names.
-    - method : str, optional (default: 'points2regions')
-        Clustering method to use. Options:
+    - sdata (SpatialData): A spatial data object containing transcriptomic information.
+    - params (dict): A dictionary of parameters for the selected clustering method.For `points2regions`:
+        - 'num_clusters' (int): Number of clusters (default: 300).
+        - 'pixel_width' (float): Pixel width parameter (default: 0.4).
+        - 'pixel_smoothing' (float): Pixel smoothing parameter (default: 3.5).
+    - x (str): Column name for the x-coordinates of transcripts.
+    - y (str): Column name for the y-coordinates of transcripts.
+    - feature_name (str): Column name for the feature names.
+    - method (str, optional): Clustering method to use. Options:
         - 'points2regions': Uses the Points2Regions algorithm for clustering.
         - 'sainsc': Placeholder for another clustering method.
-    - transcript_id : str, optional (default: 'transcript_id')
-        Column name for the transcript IDs.
-    - copy : bool, optional (default: False)
-        If True, returns a copy of the clustering results. If False, updates `sdata` in-place.
+    - transcript_id (str, optional): Column name for the transcript IDs.
+    - copy (bool): If True, returns a copy of the clustering results. If False, updates `sdata` in-place.
 
     Returns:
-    Optional[anndata.AnnData]:
-        If `copy` is True, returns an AnnData object containing the clustering results.Otherwise, updates the `sdata` object in-place and returns None.
+    Optional[anndata.AnnData]: If `copy` is True, returns an AnnData object containing the clustering results.Otherwise, updates the `sdata` object in-place and returns None.
     """
     # Reset transcript indexing if not unique
     sdata.points['transcripts'] = sdata.points['transcripts'].reset_index(drop=True)