aristoteleo · AlexanderCaichen · Oct 27, 2023 · Oct 27, 2023 · Oct 30, 2023 · Oct 30, 2023
diff --git a/dynamo/preprocessing/Preprocessor.py b/dynamo/preprocessing/Preprocessor.py
@@ -71,6 +71,7 @@ def __init__(
         regress_out_kwargs: Dict[List[str], Any] = {},
         cell_cycle_score_enable: bool = False,
         cell_cycle_score_kwargs: Dict[str, Any] = {},
+        normalized: bool = False
     ) -> None:
         """Preprocessor constructor.
 
@@ -106,6 +107,7 @@ def __init__(
             force_gene_list: use this gene list as selected genes across all the recipe pipeline. Defaults to None.
             sctransform_kwargs: arguments passed into sctransform function. Defaults to {}.
             regress_out_kwargs: arguments passed into regress_out function. Defaults to {}.
+            normalized: set true in case data being processed is already normalized
         """
 
         self.basic_stats = basic_stats
@@ -125,6 +127,7 @@ def __init__(
         self.regress_out = regress_out_parallel
         self.pca = pca_function
         self.pca_kwargs = pca_kwargs
+        self.skip_normalize = normalized
 
         # self.n_top_genes = n_top_genes
         self.convert_gene_name = convert_gene_name_function
@@ -359,7 +362,9 @@ def _normalize_selected_genes(self, adata: AnnData) -> None:
             adata: an AnnData object.
         """
 
-        if callable(self.normalize_selected_genes):
+        if self.skip_normalize:
+            main_info("Data already normalized. Skipping gene-wise normalization.")
+        elif callable(self.normalize_selected_genes):
             main_debug("normalizing selected genes...")
             self.normalize_selected_genes(adata, **self.normalize_selected_genes_kwargs)
 
@@ -370,7 +375,9 @@ def _normalize_by_cells(self, adata: AnnData) -> None:
             adata: an AnnData object.
         """
 
-        if callable(self.normalize_by_cells):
+        if self.skip_normalize:
+            main_info("Data already normalized. Skipping cell-wise normalization.")
+        elif callable(self.normalize_by_cells):
             main_debug("applying normalize by cells function...")
             self.normalize_by_cells(adata, **self.normalize_by_cells_function_kwargs)
 
@@ -381,7 +388,9 @@ def _norm_method(self, adata: AnnData) -> None:
             adata: an AnnData object.
         """
 
-        if callable(self.norm_method):
+        if self.skip_normalize:
+            main_info("Data already normalized. Skipping normalization.")
+        elif callable(self.norm_method):
             main_debug("applying a normalization method transformation on expression matrix data...")
             self.norm_method(adata, **self.norm_method_kwargs)
 
@@ -612,6 +621,7 @@ def preprocess_adata_seurat(
             self._regress_out(adata)
 
         self._pca(adata)
+
         temp_logger.finish_progress(progress_name="Preprocessor-seurat")
 
     def config_sctransform_recipe(self, adata: AnnData) -> None:
@@ -679,6 +689,7 @@ def preprocess_adata_sctransform(
         self._normalize_by_cells(adata)
         if len(self.regress_out_kwargs["obs_keys"]) > 0:
             self._regress_out(adata)
+
         self._pca(adata)
 
         temp_logger.finish_progress(progress_name="Preprocessor-sctransform")
@@ -795,7 +806,8 @@ def preprocess_adata_monocle_pearson_residuals(
         if len(self.regress_out_kwargs["obs_keys"]) > 0:
             self._regress_out(adata)
 
-        self.pca(adata, **self.pca_kwargs)
+        self._pca(adata)
+
         temp_logger.finish_progress(progress_name="Preprocessor-monocle-pearson-residual")
 
     def preprocess_adata(