Skip to content

Commit

Permalink
Merge pull request #117 from monarch-initiative/header-prefix
Browse files Browse the repository at this point in the history
add header prefix config field
  • Loading branch information
glass-ships authored Dec 1, 2023
2 parents dac20b5 + 4da41e7 commit ba0ba45
Show file tree
Hide file tree
Showing 6 changed files with 20 additions and 8 deletions.
3 changes: 2 additions & 1 deletion docs/Usage/ingests.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,9 @@ Creating this ingest will require three things:
| __Optional CSV Specific Properties__ | |
| `columns` | List of columns to include in output (CSV only) |
| `delimiter` | Delimiter for csv files |
| `header_delimiter` | Delimiter for header in csv files |
| `header` | Header row index for csv files |
| `header_delimiter` | Delimiter for header in csv files |
| `header_prefix` | Prefix for header in csv files |
| `comment_char` | Comment character for csv files |
| `skip_blank_lines` | Skip blank lines in csv files |

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "koza"
version = "0.5.1"
version = "0.5.2"
description = "Data transformation framework for LinkML data models"
authors = [
"The Monarch Initiative <[email protected]>",
Expand Down
8 changes: 6 additions & 2 deletions src/koza/cli_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,9 @@ def transform_source(
logger,
)

koza_app = _set_koza_app(koza_source, translation_table, output_dir, output_format, schema, node_type, edge_type, logger)
koza_app = _set_koza_app(
koza_source, translation_table, output_dir, output_format, schema, node_type, edge_type, logger
)
koza_app.process_maps()
koza_app.process_sources()

Expand Down Expand Up @@ -172,7 +174,9 @@ def _set_koza_app(
) -> KozaApp:
"""Create a KozaApp object for a given source"""

koza_apps[source.config.name] = KozaApp(source, translation_table, output_dir, output_format, schema, node_type, edge_type, logger)
koza_apps[source.config.name] = KozaApp(
source, translation_table, output_dir, output_format, schema, node_type, edge_type, logger
)
logger.debug(f"koza_apps entry created for {source.config.name}: {koza_apps[source.config.name]}")
return koza_apps[source.config.name]

Expand Down
6 changes: 5 additions & 1 deletion src/koza/io/reader/csv_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def __init__(
delimiter: str = ",",
header: Union[int, HeaderMode] = HeaderMode.infer,
header_delimiter: str = None,
header_prefix: str = None,
dialect: str = "excel",
skip_blank_lines: bool = True,
name: str = "csv file",
Expand All @@ -69,6 +70,7 @@ def __init__(
if field_type_map is None this will raise a ValueError
:param header_delimiter: delimiter for the header row, default = self.delimiter
:param header_prefix: prefix for the header row, default = None
:param dialect: csv dialect, default=excel
:param skip_blank_lines: true to skip blank lines, false to insert NaN for blank lines,
:param name: filename or alias
Expand All @@ -82,6 +84,7 @@ def __init__(
self.dialect = dialect
self.header = header
self.header_delimiter = header_delimiter if header_delimiter else delimiter
self.header_prefix = header_prefix
self.skip_blank_lines = skip_blank_lines
self.name = name
self.comment_char = comment_char
Expand Down Expand Up @@ -205,11 +208,12 @@ def _parse_header_line(self, skip_blank_or_commented_lines: bool = False) -> Lis
Parse the header line and return a list of headers
"""
fieldnames = next(reader(self.io_str, **{'delimiter': self.header_delimiter, 'dialect': self.dialect}))
if self.header_prefix and fieldnames[0].startswith(self.header_prefix):
fieldnames[0] = fieldnames[0].lstrip(self.header_prefix)
if skip_blank_or_commented_lines:
# there has to be a cleaner way to do this
while not fieldnames or (self.comment_char is not None and fieldnames[0].startswith(self.comment_char)):
fieldnames = next(reader(self.io_str, **{'delimiter': self.header_delimiter, 'dialect': self.dialect}))

fieldnames[0] = fieldnames[0].lstrip(self.comment_char)
return [f.strip() for f in fieldnames]

Expand Down
6 changes: 4 additions & 2 deletions src/koza/model/config/source_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,9 @@ class SourceConfig:
columns: List[str] (optional) - list of columns to include
required_properties: List[str] (optional) - list of properties which must be in json data files
delimiter: str (optional) - delimiter for csv files
header_delimiter: str (optional) - delimiter for header in csv files
header: int (optional) - header row index
header_delimiter: str (optional) - delimiter for header in csv files
header_prefix: str (optional) - prefix for header in csv files
comment_char: str (optional) - comment character for csv files
skip_blank_lines: bool (optional) - skip blank lines in csv files
filters: List[ColumnFilter] (optional) - list of filters to apply
Expand All @@ -171,6 +172,7 @@ class SourceConfig:
delimiter: Optional[str] = None
header: Union[int, HeaderMode] = HeaderMode.infer
header_delimiter: Optional[str] = None
header_prefix: Optional[str] = None
comment_char: str = "#"
skip_blank_lines: bool = True
filters: List[ColumnFilter] = field(default_factory=list)
Expand Down Expand Up @@ -290,7 +292,7 @@ def __post_init__(self):
raise ValueError("Field type map contains more than one key")
for key, val in field.items():
field_type_map[key] = val
print(f"FIELD TYPE MAP: {field_type_map}")
# print(f"FIELD TYPE MAP: {field_type_map}")
self.field_type_map = field_type_map


Expand Down
3 changes: 2 additions & 1 deletion src/koza/model/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ def __init__(self, config: Union[PrimaryFileConfig, MapFileConfig], row_limit: O
name=config.name,
field_type_map=config.field_type_map,
delimiter=config.delimiter,
header_delimiter=config.header_delimiter,
header=config.header,
header_delimiter=config.header_delimiter,
header_prefix=config.header_prefix,
comment_char=self.config.comment_char,
row_limit=self.row_limit,
)
Expand Down

0 comments on commit ba0ba45

Please sign in to comment.