From 18a415964177634bb5b951fd0082fb07bcffd1c8 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Fri, 22 Dec 2023 17:12:06 -0500 Subject: [PATCH] feat: support specifying circular contigs --- fasta_checksum_utils/entry.py | 3 ++- fasta_checksum_utils/fasta.py | 9 +++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/fasta_checksum_utils/entry.py b/fasta_checksum_utils/entry.py index f68596b..28f02ab 100644 --- a/fasta_checksum_utils/entry.py +++ b/fasta_checksum_utils/entry.py @@ -17,13 +17,14 @@ async def main(): parser.add_argument("fasta", type=str, help="A FASTA path or URI to checksum.") parser.add_argument("--fai", type=str, help="A FASTA FAI index path or URI, if available.") parser.add_argument("--genome-id", type=str, help="Genome ID to include, if --out-format is set to bento-json.") + parser.add_argument("--circular-contigs", type=str, nargs="*", help="Names of circular contigs in this genome.") parser.add_argument( "--out-format", type=str, default="text", choices=("text", "bento-json"), help="Output format for checksum report; either 'text' or 'bento-json' (default: 'text').") args = parser.parse_args() - report = await fasta_report(args.fasta, args.fai, (AlgorithmMD5, AlgorithmGA4GH)) + report = await fasta_report(args.fasta, args.fai, frozenset(args.circular_contigs), (AlgorithmMD5, AlgorithmGA4GH)) if args.out_format == "bento-json": print(report.as_bento_json(genome_id=getattr(args, "genome_id", None))) else: diff --git a/fasta_checksum_utils/fasta.py b/fasta_checksum_utils/fasta.py index 2e85e4b..ba5d6c9 100644 --- a/fasta_checksum_utils/fasta.py +++ b/fasta_checksum_utils/fasta.py @@ -26,12 +26,14 @@ def __init__( file_checksums: dict[ChecksumAlgorithm, str], file_size: int, sequence_checksums_and_lengths: dict[str, tuple[dict[ChecksumAlgorithm, str], int]], + circular_contigs: frozenset[str], ): self._fasta_path_or_uri: str = fasta_path_or_uri self._fai_path_or_uri: Union[str, None] = fai_path_or_uri self._file_checksums = file_checksums self._file_size: int = file_size self._sequence_checksums_and_lengths = sequence_checksums_and_lengths + self._circular_contigs: frozenset[str] = circular_contigs @property def fasta_path_or_uri(self) -> str: @@ -54,8 +56,10 @@ def _checksum_dict(cs: dict[ChecksumAlgorithm, str]) -> dict[str, str]: "contigs": [ { "name": contig, + "aliases": [], **_checksum_dict(checksums), - "length": length + "length": length, + "circular": contig in self._circular_contigs, } for contig, (checksums, length) in self._sequence_checksums_and_lengths.items() ] @@ -103,6 +107,7 @@ def _is_http_url(x: str) -> bool: async def fasta_report( fasta_path_or_uri: Union[Path, str], fai_path_or_uri: Union[Path, str, None], + circular_contigs: frozenset[str], algorithms: tuple[ChecksumAlgorithm, ...], ) -> FastaReport: tmp_file_fa = None @@ -181,4 +186,4 @@ async def fasta_report( os.unlink(tmp_file_fai.name) # Generate and return a final report - return FastaReport(fasta_str, fai_str, file_checksums, file_size, sequence_checksums_and_lengths) + return FastaReport(fasta_str, fai_str, file_checksums, file_size, sequence_checksums_and_lengths, circular_contigs)