diff --git a/CHANGELOG.md b/CHANGELOG.md index 0be19d33..fb8159fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 6. Added a sequence labels table below the HiC contact map [#147](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/147) 7. Added parameter `hic_samtools_ext_args` and set its default value to `-F 3852` [#159](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/159) 8. Added the HiC QC report to the final report so that users don't have to navigate to the results folder [#162](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/162) +9. Added the fastp log to the final report [#163](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/163) ### `Fixed` diff --git a/bin/report_modules/parsers/hic_parser.py b/bin/report_modules/parsers/hic_parser.py index 17231e17..45d52fbf 100644 --- a/bin/report_modules/parsers/hic_parser.py +++ b/bin/report_modules/parsers/hic_parser.py @@ -7,6 +7,50 @@ from report_modules.parsers.parsing_commons import sort_list_of_results +def colorize_fastp_log(log: Path): + section_colors = { + "adapter": "color: blue;", + "before_filtering": "color: goldenrod;", + "after_filtering": "color: green;", + "filtering_result": "color: green;", + "duplication": "color: red;", + "fastp": "color: gray;", + "version": "color: blue;", + } + + patterns = { + "adapter": re.compile(r"Detecting adapter sequence for read\d..."), + "before_filtering": re.compile(r"Read\d before filtering:"), + "after_filtering": re.compile(r"Read\d after filtering:"), + "filtering_result": re.compile(r"Filtering result:"), + "duplication": re.compile(r"Duplication rate:"), + "fastp": re.compile(r"fastp --in"), + "version": re.compile(r"fastp v"), + } + + html_log = "
\n" + + for line in log.read_text().split("\n"): + colored_line = line.strip() + # Apply HTML color style based on section patterns + for section, pattern in patterns.items(): + if pattern.search(line): + colored_line = ( + f"{line.strip()}" + ) + break + else: + # Default styling for uncolored lines + colored_line = f"{line.strip()}" + + html_log += f"{colored_line}\n" + + # Close HTML tags + html_log += "" + + return html_log + + def parse_hic_folder(folder_name="hic_outputs"): dir = os.getcwdb().decode() hic_folder_path = Path(f"{dir}/{folder_name}") @@ -44,6 +88,15 @@ def parse_hic_folder(folder_name="hic_outputs"): if re.match(rf"[\S]+\.on\.{tag}_qc_report\.pdf", x.name) ][0] + # Get FASTP log if it is there + fastp_log = [x for x in hic_folder_path.glob("*.log")] + + if fastp_log != []: + fastp_log = fastp_log[0] + fastp_log = colorize_fastp_log(fastp_log) + else: + fastp_log = None + data["HIC"].append( { "hap": tag, @@ -57,6 +110,7 @@ def parse_hic_folder(folder_name="hic_outputs"): showindex=False, ), "hicqc_report_pdf": os.path.basename(str(hicqc_report)), + "fastp_log": fastp_log, } ) diff --git a/bin/report_modules/templates/hic/hic.html b/bin/report_modules/templates/hic/hic.html index 868dc089..2af561e7 100644 --- a/bin/report_modules/templates/hic/hic.html +++ b/bin/report_modules/templates/hic/hic.html @@ -1,18 +1,45 @@ diff --git a/bin/report_modules/templates/hic/report_contents.html b/bin/report_modules/templates/hic/report_contents.html index b35fa5dd..c7f19be8 100644 --- a/bin/report_modules/templates/hic/report_contents.html +++ b/bin/report_modules/templates/hic/report_contents.html @@ -8,8 +8,6 @@
Sequence labels and lengths
fastp log
+