From 4287e6664a3708f7144a147a2d1919b482210c55 Mon Sep 17 00:00:00 2001 From: Jinyang Zhang Date: Wed, 8 Nov 2023 14:07:33 +0800 Subject: [PATCH] add support for CIRI3 --- .gitignore | 2 +- CIRIquant/pipeline.py | 2 +- CIRIquant/replicate.py | 14 ++++++++------ libs/CIRI_DE.R | 34 ++++++++++++++++------------------ requirements.txt | 2 +- setup.py | 2 +- 6 files changed, 28 insertions(+), 28 deletions(-) diff --git a/.gitignore b/.gitignore index ba654a1..ddde9a2 100644 --- a/.gitignore +++ b/.gitignore @@ -105,5 +105,5 @@ ENV/ .vscode/ # pycharm -.DS_Store/ +.DS_Store .idea/ diff --git a/CIRIquant/pipeline.py b/CIRIquant/pipeline.py index a9cb947..e7729a3 100644 --- a/CIRIquant/pipeline.py +++ b/CIRIquant/pipeline.py @@ -145,7 +145,7 @@ def clean_tmp(outdir, prefix): tmp_files = [ '{}/circ/{}_unmapped.sam'.format(outdir, prefix), '{}/circ/{}_denovo.bam'.format(outdir, prefix), - '{}/align/{}.bam'.format(outdir, prefix), + # '{}/align/{}.bam'.format(outdir, prefix), ] for f in tmp_files: if os.path.exists(f) and os.path.isfile(f): diff --git a/CIRIquant/replicate.py b/CIRIquant/replicate.py index 6cb4ab0..5642ff5 100644 --- a/CIRIquant/replicate.py +++ b/CIRIquant/replicate.py @@ -16,7 +16,7 @@ def main(): from utils import check_file # Init argparser - parser = argparse.ArgumentParser(prog="prep_CIRIquant") + parser = argparse.ArgumentParser(prog="CIRIquant_DE_replicate") parser.add_argument('--lib', dest='lib', metavar='FILE', required=True, help='library information') @@ -25,28 +25,30 @@ def main(): parser.add_argument('--gene', dest='gene', metavar='FILE', required=True, help='gene expression matrix', ) parser.add_argument('--out', dest='out', metavar='FILE', required=True, - help='output result of differential expression analysis', ) - + help='output result of circRNA differential expression analysis', ) + parser.add_argument('--out2', dest='out2', metavar='FILE', required=True, + help='output result of gene differential expression analysis', ) # Optional parameters args = parser.parse_args() LOGGER = get_logger('CIRI_DE', None, False) lib_path = os.path.dirname(os.path.split(os.path.realpath(__file__))[0]) + '/libs' os.environ['PATH'] = lib_path + ':' + os.environ['PATH'] - os.chmod(lib_path + '/CIRI_DE.R', 0o755) # Load GTF lib_file = check_file(args.lib) bsj_file = check_file(args.bsj) gene_file = check_file(args.gene) out_file = os.path.abspath(args.out) + out_file2 = os.path.abspath(args.out2) LOGGER.info('Library information: {}'.format(lib_file)) LOGGER.info('circRNA expression matrix: {}'.format(bsj_file)) LOGGER.info('gene expression matrix: {}'.format(gene_file)) - LOGGER.info('Output DE results: {}'.format(out_file)) + LOGGER.info('Output circ DE results: {}'.format(out_file)) + LOGGER.info('Output gene DE results: {}'.format(out_file2)) - de_cmd = 'CIRI_DE.R --lib={} --bsj={} --gene={} --out={}'.format(lib_file, bsj_file, gene_file, out_file) + de_cmd = 'Rscript {}/CIRI_DE.R --lib={} --bsj={} --gene={} --out={} --out2={}'.format(lib_path, lib_file, bsj_file, gene_file, out_file, out_file2) subprocess.call(de_cmd, shell=True) LOGGER.info('Finished!') diff --git a/libs/CIRI_DE.R b/libs/CIRI_DE.R index 3cc8b0b..21bcd1f 100644 --- a/libs/CIRI_DE.R +++ b/libs/CIRI_DE.R @@ -14,7 +14,9 @@ parser <- add_option(parser, c("--bsj"), action="store", default=NA, type='chara parser <- add_option(parser, c("--gene"), action="store", default=NA, type='character', help="gene count matrix") parser <- add_option(parser, c("--out"), action="store", default=NA, type='character', - help="output file") + help="circ DE output file") +parser <- add_option(parser, c("--out2"), action="store", default=NA, type='character', + help="gene DE output file") # opt <- parse_args(parser, args = c("--lib=AD_lib.csv", # "--bsj=AD_bsj.csv", @@ -23,8 +25,8 @@ parser <- add_option(parser, c("--out"), action="store", default=NA, type='chara opt <- parse_args(parser) # main point of program is here, do this whether or not "verbose" is set -if (is.na(opt$lib) || is.na(opt$bsj) || is.na(opt$gene) || is.na(opt$out)) { - cat("Please specify --lib/--bsj/--gene/--out, refer to the manual for detailed instruction!\n", +if (is.na(opt$lib) || is.na(opt$bsj) || is.na(opt$gene) || is.na(opt$out) || is.na(opt$out2) ) { + cat("Please specify --lib/--bsj/--gene/--out/--out2, refer to the manual for detailed instruction!\n", file=stderr()) quit() } @@ -40,7 +42,6 @@ gene_idx <- filterByExpr(gene_DGE) gene_DGE <- gene_DGE[gene_idx, keep.lib.sizes=FALSE] gene_DGE <- calcNormFactors(gene_DGE) - if ("Subject" %in% colnames(lib_mtx)) { subject <- factor(lib_mtx$Subject) treat <- factor(lib_mtx$Group, levels=c("C", "T")) @@ -51,26 +52,12 @@ if ("Subject" %in% colnames(lib_mtx)) { design <- model.matrix(~treat) } -# design <- model.matrix(~factor(lib_mtx$Group)) -# gene_DGE <- estimateDisp(gene_DGE, design, robust = TRUE) -# gene_fit <- glmFit(gene_DGE, design) -# gene_lrt <- glmLRT(gene_fit) -# -# gene_df <- gene_lrt$table -# gene_order <- order(gene_lrt$table$PValue) -# gene_df$DE <- decideTestsDGE(gene_lrt) -# gene_df <- gene_df[gene_order, ] -# gene_df$FDR <- p.adjust(gene_df$PValue, method="fdr") - circ_DGE <- DGEList(counts = bsj_mtx, group = lib_mtx$Group, lib.size = gene_DGE$samples[, "lib.size"], norm.factors = gene_DGE$samples[, "norm.factors"]) - # circ_idx <- filterByExpr(circ_DGE, min.count=) # circ_DGE <- circ_DGE[circ_idx, , keep.lib.sizes=TRUE] -# head(circ_df[rowSums(bsj_mtx >= 2) >= nrow(lib_mtx) / 2,]) - circ_DGE <- estimateDisp(circ_DGE, design, robust = TRUE) circ_fit <- glmFit(circ_DGE, design) circ_lrt <- glmLRT(circ_fit) @@ -81,3 +68,14 @@ circ_df$DE <- as.vector(decideTestsDGE(circ_lrt)) circ_df <- circ_df[circ_order, ] circ_df$FDR <- p.adjust(circ_df$PValue, method="fdr") write.csv(circ_df, file=opt$out, quote = FALSE) + +gene_DGE <- estimateDisp(gene_DGE, design, robust = TRUE) +gene_fit <- glmFit(gene_DGE, design) +gene_lrt <- glmLRT(gene_fit) + +gene_df <- gene_lrt$table +gene_order <- order(gene_lrt$table$PValue) +gene_df$DE <- decideTestsDGE(gene_lrt) +gene_df <- gene_df[gene_order, ] +gene_df$FDR <- p.adjust(gene_df$PValue, method="fdr") +write.csv(gene_df, file=opt$out2, quote = FALSE) diff --git a/requirements.txt b/requirements.txt index 23d4135..2c59392 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ numexpr==2.6.9 -numpy==1.22.0 +numpy==1.16.6 pysam==0.15.2 PyYAML==5.4 scikit-learn==0.20.3 diff --git a/setup.py b/setup.py index eca59de..29121af 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ def read(infile): include_package_data=True, zip_safe=False, install_requires=[ - 'argparse>=1.2.1', 'PyYAML==5.4', 'pysam==0.15.2', 'numpy==1.22.0', + 'argparse>=1.2.1', 'PyYAML==5.4', 'pysam==0.15.2', 'numpy==1.16.6', 'scipy==1.2.2', 'scikit-learn==0.20.3', 'numexpr==2.6.9', ], )