-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #73 from monarch-initiative/feature/line_limit
Feature/line limit
- Loading branch information
Showing
13 changed files
with
174 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
koza-env/ | ||
|
||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
"""Koza, an ETL framework for LinkML data models""" | ||
__version__ = '0.1.6' | ||
__version__ = '0.1.7' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
""" | ||
Test the row_limit argument for transforms | ||
Assert correct number of rows has been processed | ||
""" | ||
#TODO: Parameterize row_limit, and test reading from JSON and JSONL | ||
#TODO: Address filter in examples/string-declarative/protein-links-detailed.yaml | ||
|
||
from pathlib import Path | ||
|
||
import pytest | ||
|
||
from koza.cli_runner import transform_source | ||
from koza.model.config.source_config import OutputFormat | ||
|
||
@pytest.mark.parametrize( | ||
"ingest, output_names, output_format, row_limit, header_len, expected_node_len, expected_edge_len", | ||
[ | ||
( | ||
"string-declarative", # ingest | ||
["protein-links-detailed"], # output_names | ||
OutputFormat.tsv, # output_format | ||
3, # row_limit | ||
1, # header_len | ||
11, # expected_node_leng | ||
6 # expected_edge_leng | ||
) | ||
] | ||
) | ||
def test_examples(ingest, output_names, output_format, row_limit, header_len, expected_node_len, expected_edge_len): | ||
|
||
source = f"examples/{ingest}/protein-links-detailed.yaml" | ||
output_suffix = ".tsv" | ||
output_dir = f"./test-output/{ingest}-row-limit" | ||
|
||
transform_source(source=source, | ||
output_dir=output_dir, | ||
output_format=output_format, | ||
global_table="examples/translation_table.yaml", | ||
local_table=None, | ||
row_limit=row_limit) | ||
|
||
# hacky check that correct number of rows was processed | ||
node_file = f"{output_dir}/protein-links-detailed_nodes{output_suffix}" | ||
edge_file = f"{output_dir}/protein-links-detailed_edges{output_suffix}" | ||
|
||
node_lines = sum(1 for line in open(node_file)) | ||
edge_lines = sum(1 for line in open(edge_file)) | ||
|
||
assert node_lines == expected_node_len | ||
assert edge_lines == expected_edge_len | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
import gzip | ||
from pathlib import Path | ||
|
||
import pytest | ||
|
||
from koza.io.reader.jsonl_reader import JSONLReader | ||
|
||
test_zfin = ( | ||
Path(__file__).parent.parent / 'resources' / 'source-files' / 'ZFIN_PHENOTYPE_0.jsonl.gz' | ||
) | ||
|
||
|
||
def test_normal_case(): | ||
with gzip.open(test_zfin, 'rt') as zfin: | ||
row_limit = 3 | ||
jsonl_reader = JSONLReader(zfin, row_limit=row_limit) | ||
row = next(jsonl_reader) | ||
assert len(row) == 6 | ||
assert row['objectId'] == 'ZFIN:ZDB-GENE-011026-1' | ||
|
||
|
||
def test_required_property(): | ||
with gzip.open(test_zfin, 'rt') as zfin: | ||
row_limit = 3 | ||
row_count = 1 | ||
jsonl_reader = JSONLReader(zfin, ['objectId'], row_limit=row_limit) | ||
for row in jsonl_reader: | ||
row_count += 1 | ||
assert 'objectId' in row | ||
assert row_count == row_limit | ||
|
||
|
||
def test_missing_req_property_raises_exception(): | ||
with gzip.open(test_zfin, 'rt') as zfin: | ||
row_limit = 3 | ||
jsonl_reader = JSONLReader(zfin, ['objectId', 'foobar'], row_limit=row_limit) | ||
with pytest.raises(ValueError): | ||
next(jsonl_reader) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
import gzip | ||
from pathlib import Path | ||
|
||
import pytest | ||
|
||
from koza.io.reader.json_reader import JSONReader | ||
|
||
test_ddpheno = Path(__file__).parents[1] / 'resources' / 'source-files' / 'ddpheno.json.gz' | ||
|
||
json_path = ['graphs', 0, 'nodes'] | ||
|
||
|
||
def test_normal_case(): | ||
with gzip.open(test_ddpheno, 'rt') as ddpheno: | ||
json_reader = JSONReader(ddpheno, json_path=json_path, row_limit=3) | ||
row = next(json_reader) | ||
assert row['id'] == 'http://purl.obolibrary.org/obo/DDPHENO_0001198' | ||
|
||
|
||
def test_required_properties(): | ||
with gzip.open(test_ddpheno, 'rt') as ddpheno: | ||
row_limit=3 | ||
row_count = 0 | ||
json_reader = JSONReader(ddpheno, ['id'], json_path=json_path, row_limit=row_limit) | ||
for row in json_reader: | ||
row_count += 1 | ||
assert 'id' in row | ||
assert row_count == row_limit | ||
|
||
|
||
def test_missing_req_property_raises_exception(): | ||
with gzip.open(test_ddpheno, 'rt') as ddpheno: | ||
json_reader = JSONReader(ddpheno, ['fake_prop'], json_path=json_path, row_limit=3) | ||
with pytest.raises(ValueError): | ||
next(json_reader) |