From af9a257d88d488b8f1b35c809e8e57f31aab8a7a Mon Sep 17 00:00:00 2001 From: Usman Rashid Date: Tue, 15 Oct 2024 11:37:53 +1300 Subject: [PATCH] Added a sequence labels table below the HiC contact map --- CHANGELOG.md | 5 ++-- bin/report_modules/parsers/hic_parser.py | 18 +++++++++++++++ .../templates/hic/report_contents.html | 23 ++++++++++--------- subworkflows/local/fq2hic.nf | 1 + workflows/assemblyqc.nf | 7 +++++- 5 files changed, 40 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d2308569..7a652177 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.2.0dev - [10-Oct-2024] +## v2.2.0dev - [15-Oct-2024] ### `Added` @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 3. Updated `samtools faidx` to 1.21 4. Now using nf-test for pipeline level testing [#153](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/153) 5. Added `text/html` as content mime type for the report file [#146](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/146) +6. Added a sequence labels table below the HiC contact map [#147](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/147) ### `Fixed` @@ -19,7 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Dependencies` -1. Nextflow!>=23.04.0 +1. Nextflow!>=24.04.2 2. nf-schema@2.1.1 ### `Deprecated` diff --git a/bin/report_modules/parsers/hic_parser.py b/bin/report_modules/parsers/hic_parser.py index 825852ec..01992ecd 100644 --- a/bin/report_modules/parsers/hic_parser.py +++ b/bin/report_modules/parsers/hic_parser.py @@ -1,5 +1,7 @@ import os from pathlib import Path +import pandas as pd +from tabulate import tabulate import re from report_modules.parsers.parsing_commons import sort_list_of_results @@ -24,10 +26,26 @@ def parse_hic_folder(folder_name="hic_outputs"): hic_file_name, )[0] + labels_table = pd.read_csv(f"{folder_name}/{file_tokens}.agp.assembly", sep=" ") + + labels_table = labels_table[labels_table.iloc[:, 0].str.startswith(">")].iloc[ + :, [0, 2] + ] + labels_table.columns = ["Sequence", "Length"] + labels_table.Length = labels_table.Length.astype(int) + data["HIC"].append( { "hap": file_tokens, "hic_html_file_name": hic_file_name, + "labels_table": labels_table.to_dict("records"), + "labels_table_html": tabulate( + labels_table, + headers=["Sequence", "Length"], + tablefmt="html", + numalign="left", + showindex=False, + ), } ) diff --git a/bin/report_modules/templates/hic/report_contents.html b/bin/report_modules/templates/hic/report_contents.html index 312a0fdd..4a7f3089 100644 --- a/bin/report_modules/templates/hic/report_contents.html +++ b/bin/report_modules/templates/hic/report_contents.html @@ -1,17 +1,18 @@ {% set vars = {'is_first': True} %} {% for item in range(all_stats_dicts["HIC"]|length) %} {% set active_text = 'display: block' if vars.is_first else 'display: none' %}
-
-
-
{{ all_stats_dicts['HIC'][item]['hap'] }}
-
-
- -
+
+
+
{{ all_stats_dicts['HIC'][item]['hap'] }}
+
+ +
+
+
+
+
{{ all_stats_dicts['HIC'][item]['labels_table_html'] }}
+
+
{% if vars.update({'is_first': False}) %} {% endif %} {% endfor %} diff --git a/subworkflows/local/fq2hic.nf b/subworkflows/local/fq2hic.nf index 34a7fa4e..a37a325a 100644 --- a/subworkflows/local/fq2hic.nf +++ b/subworkflows/local/fq2hic.nf @@ -97,5 +97,6 @@ workflow FQ2HIC { emit: hic = ch_hic html = HIC2HTML.out.html + assembly = AGP2ASSEMBLY.out.assembly versions = ch_versions } diff --git a/workflows/assemblyqc.nf b/workflows/assemblyqc.nf index 4dd9067b..c892263c 100644 --- a/workflows/assemblyqc.nf +++ b/workflows/assemblyqc.nf @@ -591,6 +591,11 @@ workflow ASSEMBLYQC { ) ch_hic_html = FQ2HIC.out.html + ch_hic_assembly = FQ2HIC.out.assembly + ch_hic_report_files = ch_hic_html + | mix( + ch_hic_assembly.map { tag, assembly -> assembly } + ) ch_versions = ch_versions.mix(FQ2HIC.out.versions) // SUBWORKFLOW: FASTA_SYNTENY @@ -822,7 +827,7 @@ workflow ASSEMBLYQC { ch_tidk_outputs .collect().ifEmpty([]), ch_lai_outputs .collect().ifEmpty([]), ch_kraken2_plot .collect().ifEmpty([]), - ch_hic_html .collect().ifEmpty([]), + ch_hic_report_files .collect().ifEmpty([]), ch_synteny_outputs .collect().ifEmpty([]), ch_merqury_outputs .collect().ifEmpty([]), ch_versions_yml,