Skip to content

Commit

Permalink
Merge pull request #90 from monarch-initiative/nested-props
Browse files Browse the repository at this point in the history
Nested props
  • Loading branch information
glass-ships authored Jun 4, 2022
2 parents 7412cc1 + 140f805 commit f1f32e6
Show file tree
Hide file tree
Showing 7 changed files with 67 additions and 38 deletions.
2 changes: 1 addition & 1 deletion koza/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
"""Koza, an ETL framework for LinkML data models"""
__version__ = '0.1.13'
__version__ = '0.1.14'
22 changes: 12 additions & 10 deletions koza/io/reader/json_reader.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import json
import json, yaml
import logging
from typing import IO, Any, Dict, Iterator, List, Union
#from xmlrpc.client import Boolean

import yaml
from koza.io.utils import check_data

LOG = logging.getLogger(__name__)


class JSONReader:
"""
A JSON reader that optionally iterates over a json list
Expand Down Expand Up @@ -34,6 +34,8 @@ def __init__(
self.json_path = json_path
self.name = name



if self.json_path:
if is_yaml:
self.json_obj = yaml.safe_load(self.io_str)
Expand Down Expand Up @@ -73,18 +75,18 @@ def __next__(self) -> Dict[str, Any]:

self._line_num += 1

# Check that required properties exist in row
if self.required_properties:
if not set(next_obj.keys()) >= set(self.required_properties):
# TODO - have koza runner handle this exception
# based on some configuration? similar to
# on_map_error
properties = []
for prop in self.required_properties:
new_prop = check_data(next_obj, prop)
properties.append(new_prop)

if False in properties:
raise ValueError(
f"Required properties defined for {self.name} are missing from {self.io_str.name}\n"
f"Missing properties: {set(self.required_properties) - set(next_obj.keys())}\n"
f"Row: {next_obj}"
)

# If we want to subset
# next_obj = {key: next_obj[key] for key in next_obj.keys() if key in self.required_properties}

return next_obj
19 changes: 11 additions & 8 deletions koza/io/reader/jsonl_reader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import logging
from typing import IO, Any, Dict, Iterator, List
from koza.io.utils import check_data

LOG = logging.getLogger(__name__)

Expand Down Expand Up @@ -45,16 +46,18 @@ def __next__(self) -> Dict[str, Any]:

json_obj = json.loads(next_line)

# Check that required properties exist in row
if self.required_properties:
if not set(json_obj.keys()) >= set(self.required_properties):
# TODO - have koza runner handle this exception
# based on some configuration? similar to
# on_map_error
properties = []
for prop in self.required_properties:
new_prop = check_data(json_obj, prop)
properties.append(new_prop)

if False in properties:
raise ValueError(
f"Configured properties missing in source file "
f"{set(self.required_properties) - set(json_obj.keys())}"
f"Required properties defined for {self.name} are missing from {self.io_str.name}\n"
f"Missing properties: {set(self.required_properties) - set(json_obj.keys())}\n"
f"Row: {json_obj}"
)
# If we want to turn this into a subsetter
# json_obj = {key: json_obj[key] for key in json_obj.keys() if key in self.required_properties}

return json_obj
23 changes: 23 additions & 0 deletions koza/io/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import requests

##### Helper Functions for Reader classes #####

def open_resource(resource: Union[str, PathLike]) -> IO[str]:
"""
Expand Down Expand Up @@ -59,6 +60,28 @@ def open_resource(resource: Union[str, PathLike]) -> IO[str]:
else:
raise ValueError(f"Cannot open local or remote file: {resource}")

def check_data(entry, path) -> bool:
"""
Given a dot delimited JSON tag path,
returns the value of the field in the entry.
:param entry:
:param path:
:return: str value of the given path into the entry
"""
ppart = path.split(".")

tag = ppart.pop(0)

while True:
if tag in entry:
entry = entry[tag]
exists = True
else:
exists = False
if len(ppart) == 0:
return exists
else:
tag = ppart.pop(0)

##### Helper functions for Writer classes #####

Expand Down
Binary file not shown.
16 changes: 8 additions & 8 deletions tests/unit/test_jsonlreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,29 @@

from koza.io.reader.jsonl_reader import JSONLReader

test_zfin = (
Path(__file__).parent.parent / 'resources' / 'source-files' / 'ZFIN_PHENOTYPE_0.jsonl.gz'
)

test_zfin_data = Path(__file__).parents[1] / 'resources' / 'source-files' / 'ZFIN_PHENOTYPE_0.jsonl.gz'

def test_normal_case():
with gzip.open(test_zfin, 'rt') as zfin:
with gzip.open(test_zfin_data, 'rt') as zfin:
jsonl_reader = JSONLReader(zfin)
row = next(jsonl_reader)
assert len(row) == 6

assert row['objectId'] == 'ZFIN:ZDB-GENE-011026-1'


def test_required_property():
with gzip.open(test_zfin, 'rt') as zfin:
jsonl_reader = JSONLReader(zfin, ['objectId'])
with gzip.open(test_zfin_data, 'rt') as zfin:
jsonl_reader = JSONLReader(zfin, required_properties = ['objectId', 'evidence.publicationId'])
for row in jsonl_reader:
# assert len(row) == 1 # removed subsetter
print(row)
assert 'objectId' in row
assert row['evidence']['publicationId']


def test_missing_req_property_raises_exception():
with gzip.open(test_zfin, 'rt') as zfin:
with gzip.open(test_zfin_data, 'rt') as zfin:
jsonl_reader = JSONLReader(zfin, ['objectId', 'foobar'])
with pytest.raises(ValueError):
next(jsonl_reader)
23 changes: 12 additions & 11 deletions tests/unit/test_jsonreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,28 @@

from koza.io.reader.json_reader import JSONReader

test_ddpheno = Path(__file__).parents[1] / 'resources' / 'source-files' / 'ddpheno.json.gz'
test_zfin_data = Path(__file__).parents[1] / 'resources' / 'source-files' / 'test_BGI_ZFIN.json.gz'

json_path = ['graphs', 0, 'nodes']
json_path = ['data', 0,]


def test_normal_case():
with gzip.open(test_ddpheno, 'rt') as ddpheno:
json_reader = JSONReader(ddpheno, json_path=json_path)
with gzip.open(test_zfin_data, 'rt') as zfin:
json_reader = JSONReader(zfin, json_path=json_path)
row = next(json_reader)
assert row['id'] == 'http://purl.obolibrary.org/obo/DDPHENO_0001198'
assert row['symbol'] == 'gdnfa'


def test_required_properties():
with gzip.open(test_ddpheno, 'rt') as ddpheno:
json_reader = JSONReader(ddpheno, ['id'], json_path=json_path)
with gzip.open(test_zfin_data, 'rt') as zfin:
json_reader = JSONReader(zfin, ['name', 'basicGeneticEntity.primaryId'], json_path=json_path)
for row in json_reader:
assert 'id' in row

print(row)
assert row['name']
assert row['basicGeneticEntity']['primaryId']

def test_missing_req_property_raises_exception():
with gzip.open(test_ddpheno, 'rt') as ddpheno:
json_reader = JSONReader(ddpheno, ['fake_prop'], json_path=json_path)
with gzip.open(test_zfin_data, 'rt') as zfin:
json_reader = JSONReader(zfin, ['fake_prop'], json_path=json_path)
with pytest.raises(ValueError):
next(json_reader)

0 comments on commit f1f32e6

Please sign in to comment.