Skip to content

Commit

Permalink
chore: Update resource data URLs from dropbox to figshare
Browse files Browse the repository at this point in the history
  • Loading branch information
chansigit committed Jul 5, 2024
1 parent d2b8c53 commit facac04
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 20 deletions.
8 changes: 4 additions & 4 deletions dynamo/external/scifate.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ def scifate_glmnet(
cell_filter_UMI: int = 10000,
core_n_lasso: int = 1,
core_n_filtering: int = 1,
motif_ref: str = "https://www.dropbox.com/s/s8em539ojl55kgf/motifAnnotations_hgnc.csv?dl=1",
TF_link_ENCODE_ref: str = "https://www.dropbox.com/s/bjuope41pte7mf4/df_gene_TF_link_ENCODE.csv?dl=1",
motif_ref: str = "https://figshare.com/ndownloader/files/47439455",
TF_link_ENCODE_ref: str = "https://figshare.com/ndownloader/files/47439458",
nt_layers: list = ["X_new", "X_total"],
) -> AnnData:
"""Perform scifate analysis using glmnet.
Expand Down Expand Up @@ -58,11 +58,11 @@ def scifate_glmnet(
motif_ref: The path to the TF binding motif data as described above. It provides the list of TFs gene names and
is used to process adata object to generate the TF expression and target new expression matrix for glmnet
based TF-target synthesis rate linkage analysis. But currently it is not used for motif based filtering.
By default, it is a dropbox link that store the data from us. Other motif reference can bed downloaded from
By default, it is a cloud link that store the data from us. Other motif reference can bed downloaded from
RcisTarget: https://resources.aertslab.org/cistarget/. For human motif matrix, it can be downloaded from
June's shared folder:
https://shendure-web.gs.washington.edu/content/members/cao1025/public/nobackup/sci_fate/data/hg19-tss-centered-10kb-7species.mc9nr.feather
TF_link_ENCODE_ref: The path to the TF chip-seq data. By default, it is a dropbox link from us that stores the
TF_link_ENCODE_ref: The path to the TF chip-seq data. By default, it is a cloud link from us that stores the
data. Other data can be downloaded from:
https://amp.pharm.mssm.edu/Harmonizome/dataset/ENCODE+Transcription+Factor+Targets.
nt_layers: The layers that will be used for the network inference. Note that the layers can be changed flexibly.
Expand Down
8 changes: 4 additions & 4 deletions dynamo/external/scribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@ def scribe(
Targets: Union[list, None] = None,
gene_filter_rate: float = 0.1,
cell_filter_UMI: int = 10000,
motif_ref: str = "https://www.dropbox.com/s/s8em539ojl55kgf/motifAnnotations_hgnc.csv?dl=1",
motif_ref: str = "https://figshare.com/ndownloader/files/47439455",
nt_layers: list = ["X_new", "X_total"],
normalize: bool = False,
do_CLR: bool = True,
drop_zero_cells: bool = True,
TF_link_ENCODE_ref: str = "https://www.dropbox.com/s/bjuope41pte7mf4/df_gene_TF_link_ENCODE.csv?dl=1",
TF_link_ENCODE_ref: str = "https://figshare.com/ndownloader/files/47439458",
) -> AnnData:
"""Apply Scribe to calculate causal network from spliced/unspliced, metabolic labeling based and other "real" time
series datasets.
Expand All @@ -49,7 +49,7 @@ def scribe(
cell_filter_UMI: Minimum number of UMIs for cell filtering.
motif_ref: It provides the list of TFs gene names and is used to parse the data to get the list of TFs and
Targets for the causal network inference from those TFs to Targets. But currently the motif based filtering
is not implemented. By default, it is a dropbox link that store the data from us. Other motif reference can
is not implemented. By default, it is a cloud link that store the data from us. Other motif reference can
bed downloaded from RcisTarget: https://resources.aertslab.org/cistarget/. For human motif matrix, it can be
downloaded from June's shared folder:
https://shendure-web.gs.washington.edu/content/members/cao1025/public/nobackup/sci_fate/data/hg19-tss-
Expand All @@ -64,7 +64,7 @@ def scribe(
target. This can signify the relationship between potential regulators and targets, speed up the calculation,
but at the risk of ignoring strong inhibition effects from certain regulators to targets.
do_CLR: Whether to perform context likelihood relatedness analysis on the reconstructed causal network
TF_link_ENCODE_ref: The path to the TF chip-seq data. By default, it is a dropbox link from us that stores the
TF_link_ENCODE_ref: The path to the TF chip-seq data. By default, it is a cloud link from us that stores the
data. Other data can be downloaded from:
https://amp.pharm.mssm.edu/Harmonizome/dataset/ENCODE+Transcription+Factor+Targets.
Expand Down
1 change: 1 addition & 0 deletions dynamo/preprocessing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -840,6 +840,7 @@ def relative2abs(
"""

if ERCC_annotation is None:
#TODO: outdated link. consider replacing or removing it.
ERCC_annotation = pd.read_csv(
"https://www.dropbox.com/s/cmiuthdw5tt76o5/ERCC_specification.txt?dl=1",
sep="\t",
Expand Down
29 changes: 17 additions & 12 deletions dynamo/sample_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,27 +65,32 @@ def get_adata(url: str, filename: Optional[str] = None) -> Optional[AnnData]:

# add our toy sample data
def Gillespie():
#TODO: add data here
pass


def HL60():
#TODO: add data here
pass


def NASCseq():
#TODO: add data here
pass


def scSLAMseq():
#TODO: add data here
pass


def scifate():
#TODO: add data here
pass


def scNT_seq_neuron_splicing(
url: str = "https://www.dropbox.com/s/g1afqdcsczgyj2m/neuron_splicing_4_11.h5ad?dl=1",
url: str = "https://figshare.com/ndownloader/files/47439605",
filename: str = "neuron_splicing.h5ad",
) -> AnnData:
"""The neuron splicing data is from Qiu, et al (2020).
Expand All @@ -98,7 +103,7 @@ def scNT_seq_neuron_splicing(


def scNT_seq_neuron_labeling(
url: str = "https://www.dropbox.com/s/lk9cl63yd28mfuq/neuron_labeling.h5ad?dl=1",
url: str = "https://figshare.com/ndownloader/files/47439629",
filename: str = "neuron_labeling.h5ad",
) -> AnnData:
"""The neuron splicing data is from Qiu, et al (2020).
Expand All @@ -115,7 +120,7 @@ def cite_seq():


def zebrafish(
url: str = "https://www.dropbox.com/scl/fi/3zt89ee0j5twxk4ttzmij/zebrafish.h5ad?rlkey=phwg0b7aqiizd9kf69l2kciak&dl=1",
url: str = "https://figshare.com/ndownloader/files/47420257",
filename: str = "zebrafish.h5ad",
) -> AnnData:
"""The zebrafish is from Saunders, et al (2019).
Expand Down Expand Up @@ -180,7 +185,7 @@ def hgForebrainGlutamatergic(


def chromaffin(
url: str = "https://www.dropbox.com/s/awevuz836tlclvw/onefilepercell_A1_unique_and_others_J2CH1.loom?dl=1",
url: str = "https://figshare.com/ndownloader/files/47439620",
filename: str = "onefilepercell_A1_unique_and_others_J2CH1.loom",
) -> AnnData: #
"""The chromaffin dataset used in http://pklab.med.harvard.edu/velocyto/notebooks/R/chromaffin2.nb.html
Expand Down Expand Up @@ -224,7 +229,7 @@ def pancreatic_endocrinogenesis(


def DentateGyrus_scvelo(
url: str = "https://www.dropbox.com/s/3w1wzb0b68fhdsw/dentategyrus_scv.h5ad?dl=1",
url: str = "https://figshare.com/ndownloader/files/47439623",
filename: str = "dentategyrus_scv.h5ad",
) -> AnnData:
"""The Dentate Gyrus dataset used in https://github.com/theislab/scvelo_notebooks/tree/master/data/DentateGyrus.
Expand All @@ -238,10 +243,10 @@ def DentateGyrus_scvelo(


def scEU_seq_rpe1(
url: str = "https://www.dropbox.com/s/25enev458c8egn7/rpe1.h5ad?dl=1",
url: str = "https://figshare.com/ndownloader/files/47439641",
filename: str = "rpe1.h5ad",
):
"""Download rpe1 dataset from Battich, et al (2020) via Dropbox link.
"""Download rpe1 dataset from Battich, et al (2020) via a cloud link.
This data consists of 13,913 genes across 2,930 cells.
"""
Expand All @@ -251,10 +256,10 @@ def scEU_seq_rpe1(


def scEU_seq_organoid(
url: str = "https://www.dropbox.com/s/es7sroy5ceb7wwz/organoid.h5ad?dl=1",
url: str = "https://figshare.com/ndownloader/files/47439632",
filename: str = "organoid.h5ad",
):
"""Download organoid dataset from Battich, et al (2020) via Dropbox link.
"""Download organoid dataset from Battich, et al (2020) via a cloud link.
This data consists of 9,157 genes across 3,831 cells.
"""
Expand All @@ -264,7 +269,7 @@ def scEU_seq_organoid(


def hematopoiesis(
url: str = "https://www.dropbox.com/s/n9mx9trv1h78q0r/hematopoiesis_v1.h5ad?dl=1",
url: str = "https://figshare.com/ndownloader/files/47439635",
# url: str = "https://pitt.box.com/shared/static/kyh3s4wrxdywupn9wk9r2j27vzlvk8vf.h5ad", # with box
# url: str = "https://pitt.box.com/shared/static/efqa8icu1m6d1ghfcc3s9tj0j91pky1h.h5ad", # v0: umap_ori version
filename: str = "hematopoiesis.h5ad",
Expand All @@ -276,7 +281,7 @@ def hematopoiesis(


def hematopoiesis_raw(
url: str = "https://www.dropbox.com/s/rvkxvq8694xnxz3/hsc_raw_with_metadata.h5ad?dl=1",
url: str = "https://figshare.com/ndownloader/files/47439626",
# url: str = "https://pitt.box.com/shared/static/bv7q0kgxjncc5uoget5wvmi700xwntje.h5ad", # with box
filename: str = "hematopoiesis_raw.h5ad",
) -> AnnData:
Expand All @@ -287,7 +292,7 @@ def hematopoiesis_raw(


def human_tfs(
url: str = "https://www.dropbox.com/scl/fi/pyocgrhvglg6p7q8yf9ol/human_tfs.txt?rlkey=kbc8vfzf72f8ez0xldrb5nb2d&dl=1",
url: str = "https://figshare.com/ndownloader/files/47439617",
filename: str = "human_tfs.txt",
) -> pd.DataFrame:
"""Download human transcription factors."""
Expand Down

0 comments on commit facac04

Please sign in to comment.