Skip to content

Commit

Permalink
add mutation rate
Browse files Browse the repository at this point in the history
  • Loading branch information
pdimens committed May 7, 2024
1 parent dcac0ab commit af014da
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 15 deletions.
33 changes: 20 additions & 13 deletions src/harpy/rules/simulate-linkedreads.smk
Original file line number Diff line number Diff line change
Expand Up @@ -101,14 +101,15 @@ rule create_molecules_hap:
readpairs = int(config["read_pairs"] * 500000),
outerdist = config["outer_distance"],
distsd = config["distance_sd"],
mutationrate = config["mutation_rate"],
prefix = lambda wc: outdir + "/dwgsim_simulated/dwgsim." + wc.get("hap") + ".12"
conda:
f"{envdir}/simulations.yaml"
message:
"Creating reads from {input}"
"Simulating reads reads from {input}"
shell:
"""
dwgsim -N {params.readpairs} -e 0.0001,0.0016 -E 0.0001,0.0016 -d {params.outerdist} -s {params.distsd} -1 135 -2 151 -H -y 0 -S 0 -c 0 -R 0 -r 0 -F 0 -o 1 -m /dev/null {input} {params.prefix} 2> {log}
dwgsim -N {params.readpairs} -e 0.0001,0.0016 -E 0.0001,0.0016 -d {params.outerdist} -s {params.distsd} -1 135 -2 151 -H -y 0 -S 0 -c 0 -R 0 -r {params.mutationrate} -F 0 -o 1 -m /dev/null {input} {params.prefix} 2> {log}
"""

rule interleave_dwgsim_output:
Expand Down Expand Up @@ -213,15 +214,19 @@ rule log_workflow:
input:
collect(outdir + "/sim_hap{hap}_haplotag.R{fw}.fq.gz", hap = [0,1], fw = [1,2])
params:
static = "-o 1 -d 2 -u 4",
proj_dir = f"-r {outdir}",
prefix = f"-p {outdir}/sim",
outdist = f"""-i {config["outer_distance"]}""",
dist_sd = f"""-s {config["distance_sd"]}""",
n_pairs = f"""-x {config["read_pairs"]}""",
mol_len = f"""-f {config["molecule_length"]}""",
parts = f"""-t {config["partitions"]}""",
mols_per = f"""-m {config["molecules_per_partition"]}"""
lrsproj_dir = f"{outdir}",
lrsoutdist = config["outer_distance"],
lrsdist_sd = config["distance_sd"],
lrsn_pairs = config["read_pairs"],
lrsmol_len = config["molecule_length"],
lrsparts = config["partitions"],
lrsmols_per = config["molecules_per_partition"],
lrsstatic = "-o 1 -d 2 -u 4"
dwgreadpairs = int(config["read_pairs"] * 500000),
dwgouterdist = config["outer_distance"],
dwgdistsd = config["distance_sd"],
dwgmutationrate = config["mutation_rate"],
dwgprefix = lambda wc: outdir + "/dwgsim_simulated/dwgsim." + wc.get("hap") + ".12"
message:
"Summarizing the workflow: {output}"
run:
Expand All @@ -230,9 +235,11 @@ rule log_workflow:
_ = f.write(f"Genome haplotype 1: {gen_hap1}\n")
_ = f.write(f"Genome haplotype 2: {gen_hap2}\n")
_ = f.write(f"Barcode file: {barcodefile}\n")
_ = f.write("Reads were simulated from the provided genomes using:\n")
_ = f.write(" " + f"dwgsim -N {params.dwgreadpairs} -e 0.0001,0.0016 -E 0.0001,0.0016 -d {params.dwgouterdist} -s {params.dwgdistsd} -1 135 -2 151 -H -y 0 -S 0 -c 0 -R 0 -r {params.dwgmutationrate} -F 0 -o 1 -m /dev/null GENO PREFIX\n")
_ = f.write("LRSIM was started from step 3 (-u 3) with these parameters:\n")
_ = f.write(" " + f"LRSIMharpy.pl -g genome1,genome2 " + " ".join(params) + "\n")
_ = f.write(" " + f"LRSIMharpy.pl -g genome1,genome2 -p {params.lrsproj_dir}/lrsim/sim -b BARCODES -r {params.lrsproj_dir} -i {params.lrsoutdist} -s {params.lrsdist_sd} -x {params.lrsn_pairs} -f {params.lrsmol_len} -t {params.lrsparts} -m {params.lrsmols_per} -z THREADS {params.lrsstatic}" + "\n")
_ = f.write("10X style barcodes were converted in haplotag BX:Z tags using:\n")
_ = f.write(" " + f"10xtoHaplotag.py")
_ = f.write("\nThe Snakemake workflow was called via command line:\n")
_ = f.write(" " + str(config["workflow_call"]) + "\n")
_ = f.write(" " + str(config["workflow_call"]) + "\n")
6 changes: 4 additions & 2 deletions src/harpy/simulatelinkedreads.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
@click.option('-b', '--barcodes', type = click.Path(exists=True, dir_okay=False), help = "File of linked-read barcodes to add to reads")
@click.option('-n', '--read-pairs', type = click.FloatRange(min = 0.001), default = 600, show_default=True, help = "Number (in millions) of read pairs to simulate")
@click.option('-l', '--molecule-length', type = click.IntRange(min = 2), default = 100, show_default=True, help = "Mean molecule length (kbp)")
@click.option('-r', '--mutation-rate', type = click.FloatRange(min = 0), default=0.001, show_default=True, help = "Random mutation rate for simulating reads")
@click.option('-p', '--partitions', type = click.IntRange(min = 1), default=1500, show_default=True, help = "Number (in thousands) of partitions/beads to generate")
@click.option('-m', '--molecules-per', type = click.IntRange(min = 1), default = 10, show_default=True, help = "Average number of molecules per partition")
@click.option('-t', '--threads', default = 4, show_default = True, type = click.IntRange(min = 1, max_open = True), help = 'Number of threads to use')
Expand All @@ -20,11 +21,11 @@
@click.option('-o', '--output-dir', type = str, default = "Simulate/linkedreads", help = 'Name of output directory')
@click.argument('genome_hap1', required=True, type=click.Path(exists=True, dir_okay=False), nargs=1)
@click.argument('genome_hap2', required=True, type=click.Path(exists=True, dir_okay=False), nargs=1)
def linkedreads(genome_hap1, genome_hap2, output_dir, outer_distance, distance_sd, barcodes, read_pairs, molecule_length, partitions, molecules_per, threads, snakemake, quiet, print_only):
def linkedreads(genome_hap1, genome_hap2, output_dir, outer_distance, mutation_rate, distance_sd, barcodes, read_pairs, molecule_length, partitions, molecules_per, threads, snakemake, quiet, print_only):
"""
Create linked reads from a genome
Two haplotype genomes (fasta or gzipped fasta) need to be provided as inputs at the end of the command. If
Two haplotype genomes (un/compressed fasta) need to be provided as inputs at the end of the command. If
you don't have a diploid genome, you can simulate one with `harpy simulate` as described [in the documentation](https://pdimens.github.io/harpy/modules/simulate/simulate-variants/#simulate-diploid-assembly).
If not providing a text file of `--barcodes`, Harpy will download the `4M-with-alts-february-2016.txt`
Expand Down Expand Up @@ -65,6 +66,7 @@ def linkedreads(genome_hap1, genome_hap2, output_dir, outer_distance, distance_s
config.write(f"outer_distance: {outer_distance}\n")
config.write(f"distance_sd: {distance_sd}\n")
config.write(f"read_pairs: {read_pairs}\n")
config.write(f"mutation_rate: {mutation_rate}\n")
config.write(f"molecule_length: {molecule_length}\n")
config.write(f"partitions: {partitions}\n")
config.write(f"molecules_per_partition: {molecules_per}\n")
Expand Down

0 comments on commit af014da

Please sign in to comment.