diff --git a/CHANGELOG.md b/CHANGELOG.md index 0be19d33..fb8159fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 6. Added a sequence labels table below the HiC contact map [#147](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/147) 7. Added parameter `hic_samtools_ext_args` and set its default value to `-F 3852` [#159](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/159) 8. Added the HiC QC report to the final report so that users don't have to navigate to the results folder [#162](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/162) +9. Added the fastp log to the final report [#163](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/163) ### `Fixed` diff --git a/bin/report_modules/parsers/hic_parser.py b/bin/report_modules/parsers/hic_parser.py index 17231e17..45d52fbf 100644 --- a/bin/report_modules/parsers/hic_parser.py +++ b/bin/report_modules/parsers/hic_parser.py @@ -7,6 +7,50 @@ from report_modules.parsers.parsing_commons import sort_list_of_results +def colorize_fastp_log(log: Path): + section_colors = { + "adapter": "color: blue;", + "before_filtering": "color: goldenrod;", + "after_filtering": "color: green;", + "filtering_result": "color: green;", + "duplication": "color: red;", + "fastp": "color: gray;", + "version": "color: blue;", + } + + patterns = { + "adapter": re.compile(r"Detecting adapter sequence for read\d..."), + "before_filtering": re.compile(r"Read\d before filtering:"), + "after_filtering": re.compile(r"Read\d after filtering:"), + "filtering_result": re.compile(r"Filtering result:"), + "duplication": re.compile(r"Duplication rate:"), + "fastp": re.compile(r"fastp --in"), + "version": re.compile(r"fastp v"), + } + + html_log = "
\n"
+
+    for line in log.read_text().split("\n"):
+        colored_line = line.strip()
+        # Apply HTML color style based on section patterns
+        for section, pattern in patterns.items():
+            if pattern.search(line):
+                colored_line = (
+                    f"{line.strip()}"
+                )
+                break
+        else:
+            # Default styling for uncolored lines
+            colored_line = f"{line.strip()}"
+
+        html_log += f"{colored_line}\n"
+
+    # Close HTML tags
+    html_log += "
" + + return html_log + + def parse_hic_folder(folder_name="hic_outputs"): dir = os.getcwdb().decode() hic_folder_path = Path(f"{dir}/{folder_name}") @@ -44,6 +88,15 @@ def parse_hic_folder(folder_name="hic_outputs"): if re.match(rf"[\S]+\.on\.{tag}_qc_report\.pdf", x.name) ][0] + # Get FASTP log if it is there + fastp_log = [x for x in hic_folder_path.glob("*.log")] + + if fastp_log != []: + fastp_log = fastp_log[0] + fastp_log = colorize_fastp_log(fastp_log) + else: + fastp_log = None + data["HIC"].append( { "hap": tag, @@ -57,6 +110,7 @@ def parse_hic_folder(folder_name="hic_outputs"): showindex=False, ), "hicqc_report_pdf": os.path.basename(str(hicqc_report)), + "fastp_log": fastp_log, } ) diff --git a/bin/report_modules/templates/hic/hic.html b/bin/report_modules/templates/hic/hic.html index 868dc089..2af561e7 100644 --- a/bin/report_modules/templates/hic/hic.html +++ b/bin/report_modules/templates/hic/hic.html @@ -1,18 +1,45 @@ diff --git a/bin/report_modules/templates/hic/report_contents.html b/bin/report_modules/templates/hic/report_contents.html index b35fa5dd..c7f19be8 100644 --- a/bin/report_modules/templates/hic/report_contents.html +++ b/bin/report_modules/templates/hic/report_contents.html @@ -8,8 +8,6 @@
- -

Sequence labels and lengths

@@ -22,6 +20,14 @@
+ {% if all_stats_dicts['HIC'][item]['fastp_log'] is not none %} +
+

fastp log

+
+
+ {{ all_stats_dicts['HIC'][item]['fastp_log'] }} +
+ {% endif %}
{% if vars.update({'is_first': False}) %} {% endif %} {% endfor %} diff --git a/docs/images/fastp.png b/docs/images/fastp.png new file mode 100644 index 00000000..b968889a Binary files /dev/null and b/docs/images/fastp.png differ diff --git a/docs/output.md b/docs/output.md index f124df2d..dd5dd998 100644 --- a/docs/output.md +++ b/docs/output.md @@ -199,8 +199,9 @@ Kraken2 [assigns taxonomic labels](https://ccb.jhu.edu/software/kraken2/) to seq Hi-C contact mapping experiments measure the frequency of physical contact between loci in the genome. The resulting dataset, called a “contact map,” is represented using a [two-dimensional heatmap](https://github.com/igvteam/juicebox.js) where the intensity of each pixel indicates the frequency of contact between a pair of loci.
-AssemblyQC - HiC QC report -AssemblyQC - HiC interactive contact map +AssemblyQC - fastp log for HiC reads +AssemblyQC - HiC QC report +AssemblyQC - HiC interactive contact map
AssemblyQC - HiC results
diff --git a/subworkflows/local/fq2hic.nf b/subworkflows/local/fq2hic.nf index 7b00cae2..22eed2ef 100644 --- a/subworkflows/local/fq2hic.nf +++ b/subworkflows/local/fq2hic.nf @@ -34,6 +34,7 @@ workflow FQ2HIC { 1 // min_trimmed_reads ) + ch_fastp_log = FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_log ch_trim_reads = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions) @@ -64,7 +65,7 @@ workflow FQ2HIC { HICQC ( ch_bam_and_ref.map { meta3, bam, fa -> [ meta3, bam ] } ) - ch_hicqc_pdf = HICQC.out.pdf + ch_hicqc_pdf = HICQC.out.pdf ch_versions = ch_versions.mix(HICQC.out.versions) // MODULE: MAKEAGPFROMFASTA | AGP2ASSEMBLY | ASSEMBLY2BEDPE @@ -96,7 +97,8 @@ workflow FQ2HIC { ch_versions = ch_versions.mix(HIC2HTML.out.versions.first()) emit: - hicqc_pdf = ch_hicqc_pdf + fastp_log = ch_fastp_log + hicqc_pdf = ch_hicqc_pdf hic = ch_hic html = HIC2HTML.out.html assembly = AGP2ASSEMBLY.out.assembly diff --git a/workflows/assemblyqc.nf b/workflows/assemblyqc.nf index 103e1618..9116a5ea 100644 --- a/workflows/assemblyqc.nf +++ b/workflows/assemblyqc.nf @@ -590,6 +590,7 @@ workflow ASSEMBLYQC { params.hic_skip_fastqc ) + ch_hic_fastp_log = FQ2HIC.out.fastp_log ch_hicqc_pdf = FQ2HIC.out.hicqc_pdf ch_hic_html = FQ2HIC.out.html ch_hic_assembly = FQ2HIC.out.assembly @@ -600,6 +601,9 @@ workflow ASSEMBLYQC { | mix( ch_hicqc_pdf.map { meta, pdf -> pdf } ) + | mix( + ch_hic_fastp_log.map { meta, log -> log } + ) ch_versions = ch_versions.mix(FQ2HIC.out.versions) // SUBWORKFLOW: FASTA_SYNTENY