Skip to content

Commit

Permalink
Hotfix in init. Re-releasing as 0.8.5
Browse files Browse the repository at this point in the history
  • Loading branch information
MatthewRalston committed Jul 17, 2024
1 parent 09656d4 commit 668c5e0
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 19 deletions.
45 changes: 29 additions & 16 deletions kmerdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1822,21 +1822,18 @@ def profile(arguments):
else:
logger.log_it("Error while processing samplesheet '{0}'...".format(samplesheet), "ERROR")
raise ValueError("Couldn't open sample file '{0}' for reading".format(sample))
arguments.input = samples
else:

raise ValueError("Could not determine file type of input")
elif all(type(s) is str for s in arguments.input):
for sample in arguments.input:
if os.access(sample, os.R_OK):
samples.append(sample)
else:
raise ValueError("Couldn't open sample file '{0}' for reading".format(sample))
else:
raise ValueError("Could not determine POSIX access mode for one or more input files.")


arguments.input = samples

new_args = copy.deepcopy(arguments)



if arguments.k is None:
if arguments.minK is None or arguments.maxK is None:
Expand Down Expand Up @@ -1909,7 +1906,6 @@ def _profile(arguments):
'Parseable' such that the function may be used with 'multiprocessing.Pool'
"""




if arguments.parallel > 1:
Expand All @@ -1918,10 +1914,13 @@ def _profile(arguments):
else:
data = list(map(infile.parsefile, arguments.input))





# the actual 'data' is now a list of 4-tuples
# Each 4-tuple represents a single file
# (edges, header_dictionary<dict>, nullomers<list>, all_kmer_metadata<list>)
# (counts, header_dictionary<dict>, nullomers<list>, list)

# Construct a final_counts array for the composite profile across all inputs

Expand All @@ -1937,11 +1936,18 @@ def _profile(arguments):
logger.log_it("Summing counts from individual fasta/fastq files into a composite profile...", "INFO")


print("Accumulating...")
for d in data:

counts = counts + d[0]


if np.sum(counts) == 0:
raise ValueError("Each element of the array of k-mer counts was 0. Sum of counts == 0. Likely an internal error. Exiting.")



assert np.sum(counts) > 0, "cowardly refusing to print Array of all 0's. More likely an internal error."



sys.stderr.write("\n\n\tCompleted summation and metadata aggregation across all inputs...\n\n")
# unique_kmers = int(np.count_nonzero(counts))
Expand Down Expand Up @@ -1987,12 +1993,17 @@ def _profile(arguments):
unique_nullomers = theoretical_kmers_number - unique_kmers
#unique_nullomers = len(set(nullomer_ids))

print("Theoretical k-mer number: {0} | {1}".format(N, theoretical_kmers_number))
print("Length of count array: {0}".format(counts.size))
print("Number of non-zeroes: {0}".format(unique_kmers))
print("Number of nullomers: {0}".format(unique_nullomers))
logger.log_it("Theoretical k-mer number: {0} | {1}".format(N, theoretical_kmers_number), "DEBUG")
logger.log_it("Length of count array: {0}".format(counts.size), "DEBUG")
logger.log_it("Number of non-zeroes: {0}".format(unique_kmers), "DEBUG")
logger.log_it("Number of nullomers: {0}".format(unique_nullomers), "DEBUG")

# Key assertion

"""
7/15/24 Okay so the assertions are working fine, but something else is getting garbled and the counts are coming out all zero.
"""

assert unique_kmers + unique_nullomers == theoretical_kmers_number, "kmerdb | internal error: unique nullomers ({0}) + unique kmers ({1}) should equal 4^k = {2} (was {3})".format(unique_nullomers, unique_kmers, theoretical_kmers_number, unique_kmers + unique_nullomers)
#logger.info("created a k-mer composite in memory")

Expand Down Expand Up @@ -2048,6 +2059,8 @@ def _profile(arguments):
profile = np.array(range(N), dtype=metadata["profile_dtype"])
counts = np.array(counts, dtype=metadata["count_dtype"])
frequencies = np.divide(counts, metadata["total_kmers"])



logger.log_it("Initialization of profile completed, using approximately {0} bytes for profile".format(counts.nbytes), "INFO")

Expand Down Expand Up @@ -2077,7 +2090,7 @@ def _profile(arguments):
#frequencies = np.zeros(total_kmers, dtype=metadata["frequencies_dtype"])
if arguments.sorted:

kmer_ids_sorted_by_count = np.lexsort(duple_of_arrays)
kmer_ids_sorted_by_count = np.lexsort(kmer_ids)
reverse_kmer_ids_sorted_by_count = list(kmer_ids_sorted_by_count)
reverse_kmer_ids_sorted_by_count.reverse()

Expand Down
2 changes: 1 addition & 1 deletion kmerdb/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@



VERSION="0.8.4"
VERSION="0.8.5"
REQUIRES_PYTHON="3.7.4"
header_delimiter = "\n" + ("="*24) + "\n"

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "kmerdb"
version = "0.8.4"
version = "0.8.5"
description = "Yet another correction to the 'yet another correction to just a k-mer counter...'"
readme = "README.md"
authors = [{name="Matt Ralston <[email protected]>", email="[email protected]"}]
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def can_import(module_name):
AUTHOR = 'Matt Ralston'
#REQUIRES_PYTHON = ">=3.7.4"
REQUIRES_PYTHON = '>=3.12.2'
VERSION = "0.8.4"
VERSION = "0.8.5"
KEYWORDS = ["bioinformatics", "fastq", "fasta", "k-mer", "kmer"]
CLASSIFIERS = [
"Development Status :: 1 - Planning",
Expand Down

0 comments on commit 668c5e0

Please sign in to comment.