Skip to content

Commit

Permalink
feat: support specifying circular contigs
Browse files Browse the repository at this point in the history
  • Loading branch information
davidlougheed committed Dec 22, 2023
1 parent 247c5d2 commit 18a4159
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 3 deletions.
3 changes: 2 additions & 1 deletion fasta_checksum_utils/entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@ async def main():
parser.add_argument("fasta", type=str, help="A FASTA path or URI to checksum.")
parser.add_argument("--fai", type=str, help="A FASTA FAI index path or URI, if available.")
parser.add_argument("--genome-id", type=str, help="Genome ID to include, if --out-format is set to bento-json.")
parser.add_argument("--circular-contigs", type=str, nargs="*", help="Names of circular contigs in this genome.")
parser.add_argument(
"--out-format", type=str, default="text", choices=("text", "bento-json"),
help="Output format for checksum report; either 'text' or 'bento-json' (default: 'text').")

args = parser.parse_args()

report = await fasta_report(args.fasta, args.fai, (AlgorithmMD5, AlgorithmGA4GH))
report = await fasta_report(args.fasta, args.fai, frozenset(args.circular_contigs), (AlgorithmMD5, AlgorithmGA4GH))
if args.out_format == "bento-json":
print(report.as_bento_json(genome_id=getattr(args, "genome_id", None)))
else:
Expand Down
9 changes: 7 additions & 2 deletions fasta_checksum_utils/fasta.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,14 @@ def __init__(
file_checksums: dict[ChecksumAlgorithm, str],
file_size: int,
sequence_checksums_and_lengths: dict[str, tuple[dict[ChecksumAlgorithm, str], int]],
circular_contigs: frozenset[str],
):
self._fasta_path_or_uri: str = fasta_path_or_uri
self._fai_path_or_uri: Union[str, None] = fai_path_or_uri
self._file_checksums = file_checksums
self._file_size: int = file_size
self._sequence_checksums_and_lengths = sequence_checksums_and_lengths
self._circular_contigs: frozenset[str] = circular_contigs

@property
def fasta_path_or_uri(self) -> str:
Expand All @@ -54,8 +56,10 @@ def _checksum_dict(cs: dict[ChecksumAlgorithm, str]) -> dict[str, str]:
"contigs": [
{
"name": contig,
"aliases": [],
**_checksum_dict(checksums),
"length": length
"length": length,
"circular": contig in self._circular_contigs,
}
for contig, (checksums, length) in self._sequence_checksums_and_lengths.items()
]
Expand Down Expand Up @@ -103,6 +107,7 @@ def _is_http_url(x: str) -> bool:
async def fasta_report(
fasta_path_or_uri: Union[Path, str],
fai_path_or_uri: Union[Path, str, None],
circular_contigs: frozenset[str],
algorithms: tuple[ChecksumAlgorithm, ...],
) -> FastaReport:
tmp_file_fa = None
Expand Down Expand Up @@ -181,4 +186,4 @@ async def fasta_report(
os.unlink(tmp_file_fai.name)

# Generate and return a final report
return FastaReport(fasta_str, fai_str, file_checksums, file_size, sequence_checksums_and_lengths)
return FastaReport(fasta_str, fai_str, file_checksums, file_size, sequence_checksums_and_lengths, circular_contigs)

0 comments on commit 18a4159

Please sign in to comment.