diff --git a/cnvlib/commands.py b/cnvlib/commands.py index f5812b3f..55428c2d 100644 --- a/cnvlib/commands.py +++ b/cnvlib/commands.py @@ -946,6 +946,7 @@ def _cmd_fix(args): args.do_edge, args.do_rmask, args.cluster, + args.smoothing_window_fraction, ) tabio.write(target_table, args.output or tgt_raw.sample_id + ".cnr") @@ -992,6 +993,15 @@ def _cmd_fix(args): ) P_fix.add_argument("-o", "--output", metavar="FILENAME", help="Output file name.") add_diploid_parx_genome(P_fix) +P_fix.add_argument( + "--smoothing-window-fraction", + type=float, + help=( + "If specified, sets the smoothing window fraction for rolling median bias smoothing" + " based on traits. Otherwise, defaults to 1/sqrt(len(data))." + ), + default=None +) P_fix.set_defaults(func=_cmd_fix) diff --git a/cnvlib/fix.py b/cnvlib/fix.py index 06cfe600..93419093 100644 --- a/cnvlib/fix.py +++ b/cnvlib/fix.py @@ -16,16 +16,31 @@ def do_fix( do_edge=True, do_rmask=True, do_cluster=False, + smoothing_window_fraction=None, ): """Combine target and antitarget coverages and correct for biases.""" # Load, recenter and GC-correct target & antitarget probes separately logging.info("Processing target: %s", target_raw.sample_id) cnarr, ref_matched = load_adjust_coverages( - target_raw, reference, True, do_gc, do_edge, False, diploid_parx_genome + target_raw, + reference, + True, + do_gc, + do_edge, + False, + diploid_parx_genome, + smoothing_window_fraction=smoothing_window_fraction, ) logging.info("Processing antitarget: %s", antitarget_raw.sample_id) anti_cnarr, ref_anti = load_adjust_coverages( - antitarget_raw, reference, False, do_gc, False, do_rmask, diploid_parx_genome + antitarget_raw, + reference, + False, + do_gc, + False, + do_rmask, + diploid_parx_genome, + smoothing_window_fraction=smoothing_window_fraction, ) if len(anti_cnarr): # Combine target and antitarget bins @@ -71,7 +86,16 @@ def do_fix( return cnarr -def load_adjust_coverages(cnarr, ref_cnarr, skip_low, fix_gc, fix_edge, fix_rmask, diploid_parx_genome): +def load_adjust_coverages( + cnarr, + ref_cnarr, + skip_low, + fix_gc, + fix_edge, + fix_rmask, + diploid_parx_genome, + smoothing_window_fraction=None +): """Load and filter probe coverages; correct using reference and GC.""" if "gc" in cnarr: # Don't choke on Picard-derived files that have the GC column @@ -101,7 +125,9 @@ def load_adjust_coverages(cnarr, ref_cnarr, skip_low, fix_gc, fix_edge, fix_rmas ) else: # Smoothing window fraction converges on percentile binning, like Picard - frac = max(0.01, len(cnarr) ** -0.5) + frac = smoothing_window_fraction + if frac is None: + frac = max(0.01, len(cnarr) ** -0.5) cnarr_index_reset = False if fix_gc: if "gc" in ref_matched: