making the release

cantinilab · Jan 11, 2024 · 709aeec · 709aeec
1 parent 1059448
commit 709aeec
Show file tree

Hide file tree

Showing 8 changed files with 103 additions and 37 deletions.
diff --git a/README.md b/README.md
@@ -3,7 +3,7 @@
 [![codecov](https://codecov.io/gh/cantinilab/GRnnData/branch/main/graph/badge.svg?token=GRnnData_token_here)](https://codecov.io/gh/cantinilab/GRnnData)
 [![CI](https://github.com/cantinilab/GRnnData/actions/workflows/main.yml/badge.svg)](https://github.com/cantinilab/GRnnData/actions/workflows/main.yml)
 
-Awesome gene regulatory network enhanced anndata created by jkobject & remi trimbour
+Awesome gene regulatory network enhanced anndata created by @jkobject & remi trimbour
 
 grnndata works similarly to anndata. The goal was to use the .varm of anndata to store the GRN data associated with a dataset and have a formal way to work with GRNs.
 

diff --git a/docs/grnanndata.md b/docs/grnanndata.md
@@ -0,0 +1,4 @@
+# Documentation for `GRNAnnData` module
+
+::: grnndata.GRNAnnData
+    handler: python
diff --git a/docs/utils.md b/docs/utils.md
@@ -0,0 +1,4 @@
+# Documentation for `utils` module
+
+::: grnndata.utils
+    handler: python
diff --git a/grnndata/GRNAnnData.py b/grnndata/GRNAnnData.py
@@ -42,32 +42,49 @@ def __getitem__(self, *args):
 
     @property
     def grn(self):
+        """
+        Property that returns the gene regulatory network (GRN) as a pandas DataFrame.
+        The index and columns of the DataFrame are the gene names stored in 'var_names'.
+
+        Returns:
+            pd.DataFrame: The GRN as a DataFrame with gene names as index and columns.
+        """
         return pd.DataFrame(
             data=self.varp["GRN"], index=self.var_names, columns=self.var_names
         )
 
     # add return list of genes and corresponding weights
     def extract_links(
-        adata,  # AnnData object
+        self,
         columns=[
-            "row",
-            "col",
+            "regulator",
+            "target",
             "weight",
         ],  # output col names (e.g. 'TF', 'gene', 'score')
     ):
         """
-        little function to extract scores from anndata.varp['key'] as a pd.DataFrame :
+        This function extracts scores from anndata.varp['key'] and returns them as a pandas DataFrame.
+
+        The resulting DataFrame has the following structure:
             TF   Gene   Score
-            A        B          5
-            C        D         8
+            A    B      5
+            C    D      8
+
+        Where 'TF' and 'Gene' are the indices of the genes in the regulatory network, and 'Score' is the corresponding weight.
+
+        Args:
+            columns (list, optional): The names of the columns in the resulting DataFrame. Defaults to ['regulator', 'target', 'weight'].
+
+        Returns:
+            pd.DataFrame: The extracted scores as a DataFrame.
         """
         return pd.DataFrame(
             [
                 a
                 for a in zip(
-                    [adata.var_names[i] for i in adata.varp["GRN"].row],
-                    [adata.var_names[i] for i in adata.varp["GRN"].col],
-                    adata.varp["GRN"].data,
+                    [self.var_names[i] for i in self.varp["GRN"].row],
+                    [self.var_names[i] for i in self.varp["GRN"].col],
+                    self.varp["GRN"].data,
                 )
             ],
             columns=columns,

diff --git a/grnndata/base.py b/grnndata/base.py
diff --git a/grnndata/utils.py b/grnndata/utils.py
@@ -24,7 +24,11 @@ def get_centrality(grn, top_k=30):
     also prints the top K most central nodes in the GRN.
 
     Args:
-        grn (_type_): _description_
+        grn (GRNAnnData): The gene regulatory network to analyze.
+        top_k (int, optional): The number of top results to return. Defaults to 30.
+
+    Returns:
+        (list): A list of the top K most central genes in the GRN (sorted by centrality
     """
     G = nx.from_numpy_array(grn.varp["GRN"])
     centrality = nx.eigenvector_centrality(G)
@@ -45,13 +49,16 @@ def get_centrality(grn, top_k=30):
 
 def enrichment(grn, of="Targets", doplot=True, top_k=30, **kwargs):
     """
-    enrichment uses the gseapy library to calculate the enrichment of the target genes in the adata
-    the enrichment is returned and plotted
+    This function performs enrichment analysis on a given gene regulatory network (grn).
 
-    Args:
-        grn (_type_): _description_
-        of (str, optional): either ['Targets', 'Regulators', 'Central']. Defaults to "Targets".
-        for_ (str, optional): _description_. Defaults to "TFs".
+    Parameters:
+        grn (GRNAnnData): The gene regulatory network to analyze.
+        of (str, optional): The specific component of the grn to focus on.
+        top_k (int, optional): The number of top results to return. Defaults to 10.
+        doplot (bool, optional): Whether to generate a plot of the results. Defaults to False.
+
+    Returns:
+        pandas.DataFrame: A DataFrame containing the results of the enrichment analysis.
     """
     if of == "Targets":
         rnk = grn.grn.sum(1).sort_values(ascending=False)
@@ -109,6 +116,19 @@ def enrichment(grn, of="Targets", doplot=True, top_k=30, **kwargs):
 
 
 def similarity(grn, other_grn):
+    """
+    This function calculates the similarity between two gene regulatory networks (grns).
+
+    Parameters:
+        grn (GRNAnnData): The first gene regulatory network.
+        other_grn (GRNAnnData): The second gene regulatory network.
+
+    Returns:
+        dict : A dictionary containing the similarity metrics between the two grns.
+            {"spearman_corr", "precision", "precision_rand", "recall", 
+            "recall_rand", "accuracy", "accuracy_rand", "sim_expr", 
+            "intra_similarity_self", "intra_similarity_other"}
+    """
     # similarity in expression
     selfX = grn.X
     selfXrand = selfX.copy()
@@ -181,3 +201,6 @@ def similarity(grn, other_grn):
         "intra_similarity_self": intra_similarity_self,
         "intra_similarity_other": intra_similarity_other,
     }
+
+def scalefreeness():
+
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -1,2 +1,31 @@
 site_name: grnndata
-theme: readthedocs
+theme: 
+  name: readthedocs
+  # analytics:
+    # gtag: G-ABC123
+site_url: https://www.jkobject.com/grnndata/
+nav:
+  - Home: index.md
+  - example notebook: notebooks/example.ipynb
+  - documentation:
+    - utils: utils.md
+    - GRNAnnData: grnanndata.md
+plugins: 
+  - search
+  - mkdocstrings:
+      handlers:
+        python:
+          paths: [src]
+          options:
+            show_root_heading: true
+            show_source: true
+            summary: true
+            merge_init_into_class: true
+            show_signature: false
+            do_heading: true
+      default_handler: python
+  - git-revision-date-localized
+  - git-authors
+  - mkdocs-jupyter:
+      include_source: True
+      include_requirejs: true
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,14 +1,20 @@
 [tool.poetry]
 name = "grnndata"
 version = "0.1.0"
-description = ""
+description = "Awesome gene regulatory network enhanced anndata"
 authors = ["jkobject"]
-readme = "README.md"
+readme = ["README.md", "LICENSE"]
+license = "GPL3"
+repository = "https://github.com/cantinilab/GRnnData"
+keywords = ["scRNAseq", "gene regulatory networks", "anndata", "scPrint"]
 
 [tool.poetry.dependencies]
 python = "^3.10"
 anndata = "*"
 scipy = "*"
+networkx = "*"
+gseapy = "1.*"
+numpy = "*"
 
 [tool.poetry.group.dev.dependencies]
 pytest = "^7.4.4"