From 7e0229de746170458998e96abd9180450aac79df Mon Sep 17 00:00:00 2001 From: Kevin Schaper Date: Wed, 7 Aug 2024 13:35:58 -0700 Subject: [PATCH] Fixed truthiness error that made the tsv writer ignore zero values --- src/koza/io/utils.py | 6 ++-- tests/unit/test_io_utils.py | 38 ++++++++++++++++++++++ tests/unit/test_tsvwriter_node_and_edge.py | 30 +++++++++++++++-- 3 files changed, 69 insertions(+), 5 deletions(-) diff --git a/src/koza/io/utils.py b/src/koza/io/utils.py index b1b4ae3..c1b4455 100644 --- a/src/koza/io/utils.py +++ b/src/koza/io/utils.py @@ -144,7 +144,7 @@ def build_export_row(data: Dict, list_delimiter: str = None) -> Dict: tidy_data = {} for key, value in data.items(): new_value = remove_null(value) - if new_value: + if new_value is not None: tidy_data[key] = _sanitize_export_property(key, new_value, list_delimiter) return tidy_data @@ -221,14 +221,14 @@ def remove_null(input: Any) -> Any: new_value = [] for v in input: x = remove_null(v) - if x: + if x is not None: new_value.append(x) elif isinstance(input, dict): # value is a dict new_value = {} for k, v in input.items(): x = remove_null(v) - if x: + if x is not None: new_value[k] = x elif isinstance(input, str): # value is a str diff --git a/tests/unit/test_io_utils.py b/tests/unit/test_io_utils.py index c77c479..fe63168 100644 --- a/tests/unit/test_io_utils.py +++ b/tests/unit/test_io_utils.py @@ -96,3 +96,41 @@ def test_sanitize_export_property(query): assert query[1] == value else: assert query[1] in value + + +@pytest.mark.parametrize( + "input, expected", + [ + ([1, None, 2, "", 3, " "], [1, 2, 3]), + ({"a": 1, "b": None, "c": "", "d": 2, "e": " "}, {"a": 1, "d": 2}), + ({"a": [1, None, 2], "b": {"c": None, "d": 3}}, {"a": [1, 2], "b": {"d": 3}}), + ("test", "test"), + ("", None), + (None, None), + (5, 5), + (5.5, 5.5), + (True, True), + (False, False), + (0, 0), # Ensure zeroes are not turned into None + ([0, None, 1], [0, 1]), # Ensure zeroes in lists are not turned into None + ({"a": 0, "b": None}, {"a": 0}), # Ensure zeroes in dicts are not turned into None + ], +) +def test_remove_null(input, expected): + assert remove_null(input) == expected + + +@pytest.mark.parametrize( + "input, expected", + [ + (None, True), + ("", True), + (" ", True), + ("non-empty string", False), + (0, False), + (False, False), + (True, False), + ], +) +def test_is_null(input, expected): + assert is_null(input) == expected diff --git a/tests/unit/test_tsvwriter_node_and_edge.py b/tests/unit/test_tsvwriter_node_and_edge.py index b0ac031..9456653 100644 --- a/tests/unit/test_tsvwriter_node_and_edge.py +++ b/tests/unit/test_tsvwriter_node_and_edge.py @@ -9,7 +9,7 @@ def test_tsv_writer(): """ Writes a test tsv file """ - g = Gene(id="HGNC:11603", name="TBX4") + g = Gene(id="HGNC:11603", in_taxon=["NCBITaxon:9606"], symbol="TBX4") d = Disease(id="MONDO:0005002", name="chronic obstructive pulmonary disease") a = GeneToDiseaseAssociation( id="uuid:5b06e86f-d768-4cd9-ac27-abe31e95ab1e", @@ -18,11 +18,23 @@ def test_tsv_writer(): predicate="biolink:contributes_to", knowledge_level="not_provided", agent_type="not_provided", + has_count=0, + has_total=20, ) ent = [g, d, a] node_properties = ["id", "category", "symbol", "in_taxon", "provided_by", "source"] - edge_properties = ["id", "subject", "predicate", "object", "category" "qualifiers", "publications", "provided_by"] + edge_properties = [ + "id", + "subject", + "predicate", + "object", + "category" "qualifiers", + "has_count", + "has_total", + "publications", + "provided_by", + ] outdir = "output/tests" outfile = "tsvwriter-node-and-edge" @@ -34,3 +46,17 @@ def test_tsv_writer(): assert os.path.exists("{}/{}_nodes.tsv".format(outdir, outfile)) and os.path.exists( "{}/{}_edges.tsv".format(outdir, outfile) ) + + # read the node and edges tsv files and confirm the expected values + with open("{}/{}_nodes.tsv".format(outdir, outfile), "r") as f: + lines = f.readlines() + assert lines[1] == "HGNC:11603\tbiolink:Gene\t\tNCBITaxon:9606\t\tTBX4\n" + assert len(lines) == 3 + + with open("{}/{}_edges.tsv".format(outdir, outfile), "r") as f: + lines = f.readlines() + assert ( + lines[1].strip() + == "uuid:5b06e86f-d768-4cd9-ac27-abe31e95ab1e\tHGNC:11603\tbiolink:contributes_to\tMONDO:0005002\t\t\t0\t20" + ) + assert len(lines) == 2