Skip to content

Commit

Permalink
Merge pull request #97 from monarch-initiative/develop
Browse files Browse the repository at this point in the history
General QoL updates
  • Loading branch information
glass-ships authored Nov 4, 2022
2 parents a887e24 + e624438 commit 8742158
Show file tree
Hide file tree
Showing 36 changed files with 4,821 additions and 3,176 deletions.
6 changes: 4 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ['3.8', '3.9', '3.10']
python-version: ['3.8', '3.9', '3.10', '3.11']
env:
PYTHON: ${{ matrix.python-version }}
OS: ubuntu
Expand All @@ -32,4 +32,6 @@ jobs:
python-version: ${{ matrix.python-version }}

- name: make test
run: make test
run: |
pip install poetry
make test
15 changes: 10 additions & 5 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,19 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: "3.9"
python-version: "3.10"

- name: Install dependencies
run: |
make
pip install poetry && poetry install
- name: Build
run: |
poetry build
- name: Publish to PyPi
env:
FLIT_USERNAME: ${{ secrets.PYPI_USERNAME }}
FLIT_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
run: |
make publish
poetry config http-basic.pypi "__token__" "${PYPI_API_TOKEN}"
poetry publish
29 changes: 29 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
BSD 3-Clause License

Copyright (c) 2022, Monarch Initiative
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 changes: 14 additions & 21 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,42 +4,35 @@ MAKEFLAGS += --warn-undefined-variables
MAKEFLAGS += --no-builtin-rules
MAKEFLAGS += --no-builtin-variables

ifneq (,$(wildcard ./.env))
include .env
export
endif

.DEFAULT_GOAL := all
SHELL := bash

.PHONY: all
all: install-flit install-koza install-dev test

.PHONY: install-flit
install-flit:
pip install flit

.PHONY: install-koza
install-koza: install-flit
flit install --deps production --symlink
all: install test clean

.PHONY: install-dev
install-dev: install-flit
flit install --deps develop --symlink

.PHONY: test
test: install-flit install-dev
python -m pytest
.PHONY: install
install:
poetry install

.PHONY: build
build:
flit build
poetry build

.PHONY: publish
publish:
flit publish
.PHONY: test
test: install
poetry run python -m pytest

.PHONY: clean
clean:
rm -rf `find . -name __pycache__`
rm -f `find . -type f -name '*.py[co]' `
rm -rf .pytest_cache
rm -rf test-output
rm -rf output test-output
rm -rf dist

.PHONY: lint
Expand Down
2 changes: 1 addition & 1 deletion docs/Usage/configuring_ingests.md
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ This Python script is where you'll define the specific steps of your data transf

```python
import uuid
from biolink_model_pydantic.model import Gene, PairwiseGeneToGeneInteraction
from biolink.pydanticmodel import Gene, PairwiseGeneToGeneInteraction
# Get the KozaApp for your ingest
from koza.cli_runner import get_koza_app
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import re
import uuid

from biolink_model_pydantic.model import PairwiseGeneToGeneInteraction, Predicate, Protein
from biolink.pydanticmodel import PairwiseGeneToGeneInteraction, Protein

from koza.cli_runner import get_koza_app

Expand All @@ -16,8 +16,7 @@
id="uuid:" + str(uuid.uuid1()),
subject=protein_a.id,
object=protein_b.id,
predicate=Predicate.interacts_with,
relation=koza_app.translation_table.global_table['interacts with'],
predicate="biolink:interacts_with",
)

koza_app.write(protein_a, protein_b, pairwise_gene_to_gene_interaction)
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import uuid

from biolink_model_pydantic.model import Gene, PairwiseGeneToGeneInteraction#, Predicate
from biolink.pydanticmodel import Gene, PairwiseGeneToGeneInteraction

from koza.cli_runner import get_koza_app

Expand All @@ -16,8 +16,7 @@
id="uuid:" + str(uuid.uuid1()),
subject=gene_a.id,
object=gene_b.id,
predicate="biolink:interacts_with"#Predicate.interacts_with,
#relation=koza_app.translation_table.global_table['interacts with'],
predicate="biolink:interacts_with"
)

koza_app.write(gene_a, gene_b, pairwise_gene_to_gene_interaction)
2 changes: 1 addition & 1 deletion examples/string-w-map/map-protein-links-detailed.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import uuid

from biolink_model_pydantic.model import Gene, PairwiseGeneToGeneInteraction
from biolink.pydanticmodel import Gene, PairwiseGeneToGeneInteraction

from koza.cli_runner import get_koza_app

Expand Down
5 changes: 2 additions & 3 deletions examples/string/protein-links-detailed.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import re
import uuid

from biolink_model_pydantic.model import PairwiseGeneToGeneInteraction, Predicate, Protein
from biolink.pydanticmodel import PairwiseGeneToGeneInteraction, Protein

from koza.cli_runner import get_koza_app

Expand All @@ -16,8 +16,7 @@
id="uuid:" + str(uuid.uuid1()),
subject=protein_a.id,
object=protein_b.id,
predicate=Predicate.interacts_with,
relation=koza_app.translation_table.global_table['interacts with'],
predicate="biolink:interacts_with",
)

koza_app.write(protein_a, protein_b, pairwise_gene_to_gene_interaction)
2 changes: 0 additions & 2 deletions koza/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +0,0 @@
"""Koza, an ETL framework for LinkML data models"""
__version__ = '0.2.1'
7 changes: 4 additions & 3 deletions koza/app.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import importlib
import logging
import sys
from pathlib import Path
from typing import Dict, Union
Expand All @@ -11,7 +10,7 @@
from linkml_validator.validator import Validator
from koza.converter.kgx_converter import KGXConverter

from koza.exceptions import MapItemException, NextRowException
from koza.utils.exceptions import MapItemException, NextRowException
from koza.io.writer.jsonl_writer import JSONLWriter
from koza.io.writer.tsv_writer import TSVWriter
from koza.io.writer.writer import KozaWriter
Expand All @@ -22,6 +21,7 @@
from koza.model.source import Source
from koza.model.translation_table import TranslationTable

import logging
logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -97,6 +97,7 @@ def process_sources(self):
is_first = True
transform_module = None

logger.info(f"Transforming source: {self.source.config.name}")
if self.source.config.transform_mode == 'flat':
while True:
try:
Expand All @@ -106,7 +107,7 @@ def process_sources(self):
else:
importlib.reload(transform_module)
except MapItemException as mie:
logger.warning(f"{str(mie)} not found in map")
logger.debug(f"{str(mie)} not found in map")
except NextRowException:
continue
except ValidationError as ve:
Expand Down
21 changes: 15 additions & 6 deletions koza/cli_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
Module for managing koza runs through the CLI
"""

import logging
from pathlib import Path
from typing import Dict, Optional, Union

Expand All @@ -17,12 +16,15 @@
from koza.model.config.source_config import FormatType, OutputFormat, PrimaryFileConfig
from koza.model.source import Source
from koza.model.translation_table import TranslationTable
from koza.utils.log_utils import set_log_config

import logging
logger = logging.getLogger(__name__)

global koza_apps
koza_apps = {}


def get_koza_app(source_name) -> Optional[KozaApp]:
"""
Getter for singleton koza app object
Expand All @@ -32,6 +34,7 @@ def get_koza_app(source_name) -> Optional[KozaApp]:
except:
raise KeyError(f"{source_name} was not found in KozaApp dictionary")


def set_koza_app(
source: Source,
translation_table: TranslationTable = None,
Expand All @@ -43,9 +46,10 @@ def set_koza_app(
Setter for singleton koza app object
"""
koza_apps[source.config.name] = KozaApp(source, translation_table, output_dir, output_format, schema)
print(f"koza_apps entry created for: {source.config.name}\nkoza_app: {koza_apps[source.config.name]}")
logger.debug(f"koza_apps entry created for {source.config.name}: {koza_apps[source.config.name]}")
return koza_apps[source.config.name]


def transform_source(
source: str,
output_dir: str,
Expand All @@ -54,8 +58,10 @@ def transform_source(
local_table: str = None,
schema: str = None,
row_limit: int = None,
verbose: bool = None,
):

set_log_config(logging.INFO if (verbose is None) else logging.DEBUG if (verbose == True) else logging.WARNING)

with open(source, 'r') as source_fh:
source_config = PrimaryFileConfig(**yaml.load(source_fh, Loader=UniqueIncludeLoader))
if not source_config.name:
Expand All @@ -76,6 +82,7 @@ def transform_source(
source_koza.process_maps()
source_koza.process_sources()


def validate_file(
file: str,
format: FormatType = FormatType.csv,
Expand Down Expand Up @@ -109,6 +116,7 @@ def validate_file(
for _ in reader:
pass


def get_translation_table(
global_table: Union[str, Dict] = None, local_table: Union[str, Dict] = None
) -> TranslationTable:
Expand All @@ -126,7 +134,7 @@ def get_translation_table(
if local_table:
raise ValueError("Local table without a global table not allowed")
else:
logger.info("No global table used for transform")
logger.debug("No global table used for transform")
else:

if isinstance(global_table, str):
Expand All @@ -143,11 +151,12 @@ def get_translation_table(
local_tt = local_table

else:
logger.info("No local table used for transform")
logger.debug("No local table used for transform")

return TranslationTable(global_tt, local_tt)


def test_koza(koza: KozaApp):
"""Manually sets KozaApp (for testing)"""
global koza_app
koza_app = koza
koza_app = koza
2 changes: 1 addition & 1 deletion koza/converter/biolink_converter.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from biolink_model_pydantic.model import Gene
from biolink.pydanticmodel import Gene

from koza.cli_runner import koza_app

Expand Down
3 changes: 2 additions & 1 deletion koza/converter/kgx_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ def convert(self, entities: Iterable) -> Tuple[dict, dict]:
# otherwise, not a
else:
raise ValueError(
"Can only convert NamedThing or Association entities to KGX compatible dictionaries"
f"Cannot convert {entity}: Can only convert NamedThing or Association entities to KGX compatible dictionaries"

)

return nodes, edges
Expand Down
10 changes: 6 additions & 4 deletions koza/io/reader/csv_reader.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import logging
from csv import reader
from typing import IO, Any, Dict, Iterator, List, Union

from koza.model.config.source_config import FieldType, HeaderMode
from koza.utils.log_utils import get_logger

LOG = logging.getLogger(__name__)
LOG = get_logger(__name__)
# import logging
# LOG = logging.getLogger(__name__)


FIELDTYPE_CLASS = {
Expand Down Expand Up @@ -115,7 +117,7 @@ def __next__(self) -> Dict[str, Any]:
row = next(self.reader)
self.line_count += 1
except StopIteration:
LOG.info(f"Finished processing {self.line_num} rows for {self.name}")
LOG.info(f"Finished processing {self.line_num} rows for {self.name} from file {self.io_str.name}")
raise StopIteration
self.line_num = self.reader.line_num

Expand Down Expand Up @@ -187,7 +189,7 @@ def _set_header(self):

elif self.header == 'infer':
self._header = self._parse_header_line(skip_blank_or_commented_lines=True)
LOG.info(f"headers for {self.name} parsed as {self._header}")
LOG.debug(f"headers for {self.name} parsed as {self._header}")
if self.field_type_map:
self._compare_headers_to_supplied_columns()
else:
Expand Down
Loading

0 comments on commit 8742158

Please sign in to comment.