Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Scvelo/Dynamo conversion function #551

Merged
merged 3 commits into from
Oct 9, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 137 additions & 0 deletions dynamo/tools/velocyto_scvelo.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from ..dynamo_logger import main_info
from scipy.sparse import csr_matrix

from ..configuration import DKM
Expand Down Expand Up @@ -212,6 +213,142 @@ def colormap_fun(x: np.ndarray) -> np.ndarray:
return data_out


def scv_dyn_convertor(adata: anndata, mode: Literal["to_dyn", "to_scv"] = "to_dyn"):
"""Convert the adata object used in Scvelo to the adata object used by Dynamo, or vice versa.

The use case of this method includes but not limited to:
- Preprocess AnnData with Scvelo then estimate velocity with Dynamo.
- Preprocess AnnData with Dynamo then estimate velocity with Scvelo.
- Apply Dynamo analyses and visualization to the velocity estimated from Scvelo.
- Apply Scvelo analyses and visualization to the velocity estimated from Dynamo.
Conversion may need manual adjustments based on the use case because of the difference of velocity estimation method.
For example, all Dynamo anndata objects will be set to the conventional experiment with the stochastic model. The
`pp` information needs modification to enable methods not supported by Scvelo including twostep/direct kinetics,
one-shot method, degradation method. They can be specified by setting `adata.uns["pp"]["experiment_type"]`,
`adata.uns["pp"]["model"]`, `adata.uns["pp"]["est_method"]`.

Args:
adata: the adata object to be converted.
mode: the string indicates the mode. Mode `to_dyn` will convert Scvelo anndata object to Dynamo anndata object.
mode `to_scv` will convert Dynamo anndata to Scvelo anndata.

Returns:
The adata object after conversion.
"""
main_info("Dynamo and scvelo have different preprocessing procedures and velocity estimation methods. "
"The conversion of adata may not be optimal for every use case, requiring potential manual adjustments.")
if mode == "to_dyn":
main_info("Start converting Scvelo adata into Dynamo adata...")
main_info("Scvelo data wil be converted into Dynamo adata with the conventional assumption and the"
"stochastic model. If this is not what you want, please change them manually.")
if "highly_variable_genes" in adata.var.columns:
adata.var["pass_basic_filter"] = adata.var.pop("highly_variable_genes")
adata.var["pass_basic_filter"] = [True if item == 'True' else False for item in
adata.var["pass_basic_filter"]]
Xiaojieqiu marked this conversation as resolved.
Show resolved Hide resolved
if "spliced" in adata.layers.keys():
adata.layers["X_spliced"] = adata.layers.pop("spliced")
if "unspliced" in adata.layers.keys():
adata.layers["X_unspliced"] = adata.layers.pop("unspliced")
if "highly_variable" in adata.var.columns:
adata.var["use_for_pca"] = adata.var.pop("highly_variable")
if "recover_dynamics" in adata.uns.keys():
adata.uns.pop("recover_dynamics")
adata.uns["dynamics"] = {
"filter_gene_mode": None,
"t": None,
"group": None,
"X_data": None,
"X_fit_data": None,
"asspt_mRNA": "ss",
"experiment_type": "conventional",
"normalized": True,
"model": "stochastic",
"est_method": "gmm",
"has_splicing": True,
"has_labeling": False,
"splicing_labeling": False,
"has_protein": False,
"use_smoothed": True,
"NTR_vel": False,
"log_unnormalized": True,
"fraction_for_deg": False,
}
adata.uns["pp"] = {
"has_splicing": True,
"has_labeling": False,
"splicing_labeling": False,
"has_protein": False,
"tkey": None,
"experiment_type": "conventional",
"X_norm_method": None,
"layers_norm_method": None,
}
if "Ms" in adata.layers.keys():
adata.layers["M_s"] = adata.layers.pop("Ms")
if "Mu" in adata.layers.keys():
adata.layers["M_u"] = adata.layers.pop("Mu")
if "velocity_u" in adata.layers.keys():
adata.layers["velocity_U"] = adata.layers.pop("velocity_u")
if "velocity" in adata.layers.keys():
adata.layers["velocity_S"] = adata.layers.pop("velocity")
if "fit_alpha" in adata.var.columns:
adata.var["alpha"] = adata.var.pop("fit_alpha")
if "fit_beta" in adata.var.columns:
adata.var["beta"] = adata.var.pop("fit_beta")
if "fit_gamma" in adata.var.columns:
adata.var["gamma"] = adata.var.pop("fit_gamma")
if "fit_r2" in adata.var.columns:
adata.var["gamma_r2"] = adata.var.pop("fit_r2")
if "fit_u0" in adata.var.columns:
adata.var["u0"] = adata.var.pop("fit_u0")
if "fit_s0" in adata.var.columns:
Comment on lines +293 to +304
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add these to varm?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will do that after we merge the "Save velocity parameters to .varm instead .var" branch. Then we can utilize the helper function like get_vel_params() or update_vel_params().

adata.var["s0"] = adata.var.pop("fit_s0")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also this one

elif mode == "to_scv":
main_info("Start converting Dynamo adata into Scvelo adata...")
if "pass_basic_filter" in adata.var.columns:
adata.var["highly_variable_genes"] = adata.var.pop("pass_basic_filter")
adata.var["highly_variable_genes"] = ["True" if item else "False" for item in
adata.var["highly_variable_genes"]]
if "X_spliced" in adata.layers.keys():
adata.layers["spliced"] = adata.layers.pop("X_spliced")
if "X_unspliced" in adata.layers.keys():
adata.layers["unspliced"] = adata.layers.pop("X_unspliced")
if "use_for_pca" in adata.var.columns:
adata.var["highly_variable"] = adata.var.pop("use_for_pca")
if "pp" in adata.uns.keys():
adata.uns.pop("pp")
if "dynamics" in adata.uns.keys():
adata.uns.pop("dynamics")
adata.uns["recover_dynamics"] = {
"fit_connected_states": True,
"fit_basal_transcription": None,
"use_raw": False,
}
if "M_s" in adata.layers.keys():
adata.layers["Ms"] = adata.layers.pop("M_s")
if "M_u" in adata.layers.keys():
adata.layers["Mu"] = adata.layers.pop("M_u")
if "velocity_U" in adata.layers.keys():
adata.layers["velocity_u"] = adata.layers.pop("velocity_U")
if "velocity_S" in adata.layers.keys():
adata.layers["velocity"] = adata.layers.pop("velocity_S")
if "alpha" in adata.var.columns:
adata.var["fit_alpha"] = adata.var.pop("alpha")
if "beta" in adata.var.columns:
adata.var["fit_beta"] = adata.var.pop("beta")
if "gamma" in adata.var.columns:
adata.var["fit_gamma"] = adata.var.pop("gamma")
if "gamma_r2" in adata.var.columns:
adata.var["fit_r2"] = adata.var.pop("gamma_r2")
if "u0" in adata.var.columns:
adata.var["fit_u0"] = adata.var.pop("u0")
if "s0" in adata.var.columns:
adata.var["fit_s0"] = adata.var.pop("s0")
else:
raise NotImplementedError(f"Mode: {mode} is not implemented.")
return adata


def run_velocyto(adata: anndata.AnnData) -> anndata.AnnData:
"""Run velocyto over the AnnData object.

Expand Down
Loading