Skip to content

Commit

Permalink
Better error message when initializing with invisible columns (IAMcon…
Browse files Browse the repository at this point in the history
…sortium#634)

Co-authored-by: Philip Hackstock <[email protected]>
  • Loading branch information
danielhuppmann and phackstock authored Mar 2, 2022
1 parent 38897ff commit 7826d18
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 20 deletions.
1 change: 1 addition & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ an empty **IamDataFrame**. Previously, this raised an error.

## Individual updates

- [#634](https://github.com/IAMconsortium/pyam/pull/634) Better error message when initializing with invisible columns
- [#598](https://github.com/IAMconsortium/pyam/pull/598) Support mixed 'year' and 'datetime' domain

# Release v1.3.1
Expand Down
34 changes: 18 additions & 16 deletions pyam/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from pandas.api.types import is_integer

from pathlib import Path
from py._path.local import LocalPath
from tempfile import TemporaryDirectory

from pyam.filter import filter_by_time_domain, filter_by_year, filter_by_dt_arg
Expand Down Expand Up @@ -161,27 +162,28 @@ def _init(self, data, meta=None, index=DEFAULT_META_INDEX, **kwargs):
# cast data from pandas
if isinstance(data, pd.DataFrame) or isinstance(data, pd.Series):
_data = format_data(data.copy(), index=index, **kwargs)

# read data from ixmp Platform instance
elif has_ix and isinstance(data, ixmp.TimeSeries):
# TODO read meta indicators from ixmp
_data = read_ix(data, **kwargs)

# read from file
elif isinstance(data, (str, LocalPath, Path)):
data = Path(data) # casting str or LocalPath to Path
if not data.is_file():
raise FileNotFoundError(f"No such file: '{data}'")
logger.info(f"Reading file {data}")
_data = read_file(data, index=index, **kwargs)

# unsupported `data` args
elif islistable(data):
raise ValueError(
"Initializing from list is not supported, "
"use `IamDataFrame.append()` or `pyam.concat()`"
)
else:
if islistable(data):
raise ValueError(
"Initializing from list is not supported, "
"use `IamDataFrame.append()` or `pyam.concat()`"
)
# read from file
try:
data = Path(data) # casting str or LocalPath to Path
if data.is_file():
logger.info(f"Reading file {data}")
_data = read_file(data, index=index, **kwargs)
else:
raise FileNotFoundError(f"File {data} does not exist")
except TypeError: # `data` cannot be cast to Path
msg = "IamDataFrame constructor not properly called!"
raise ValueError(msg)
raise ValueError("IamDataFrame constructor not properly called!")

self._data, index, self.time_col, self.extra_cols = _data

Expand Down
12 changes: 9 additions & 3 deletions pyam/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,11 @@ def read_pandas(path, sheet_name="data*", *args, **kwargs):
# remove unnamed and empty columns, and rows were all values are nan
def is_empty(name, s):
if str(name).startswith("Unnamed: "):
if len(s) == 0 or all(np.isnan(s)):
return True
try:
if len(s) == 0 or all(np.isnan(s)):
return True
except TypeError:
pass
return False

empty_cols = [c for c in df.columns if is_empty(c, df[c])]
Expand Down Expand Up @@ -351,7 +354,10 @@ def convert_r_columns(c):
# verify that there are no nan's left (in columns)
null_rows = df.isnull().T.any()
if null_rows.any():
raise_data_error("Empty cells in `data`", df.loc[null_rows])
cols = ", ".join(df.columns[df.isnull().any().values])
raise_data_error(
f"Empty cells in `data` (columns: '{cols}')", df.loc[null_rows]
)
del null_rows

# cast to pd.Series, check for duplicates
Expand Down
2 changes: 2 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ install_requires =
setuptools_scm
# required explicitly for Python 3.7
importlib_metadata
# required explicitly for type-checking
py
setup_requires =
setuptools >= 41
setuptools_scm
Expand Down
Binary file added tests/data/na_column.xlsx
Binary file not shown.
9 changes: 8 additions & 1 deletion tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def test_unknown_type():

def test_not_a_file():
# initializing with a file-like that's not a file raises an error
match = "File foo.csv does not exist"
match = "No such file: 'foo.csv'"
with pytest.raises(FileNotFoundError, match=match):
IamDataFrame("foo.csv")

Expand Down Expand Up @@ -111,6 +111,13 @@ def test_init_df_with_na_unit(test_pd_df, tmpdir):
IamDataFrame(file) # reading from file as IamDataFrame works


def test_init_df_with_na_column_raises(test_pd_df, tmpdir):
# reading from file with a "corrupted" column raises expected error
match = "Empty cells in `data` \(columns: 'unnamed: 7'\):"
with pytest.raises(ValueError, match=match):
IamDataFrame(TEST_DATA_DIR / "na_column.xlsx")


@pytest.mark.parametrize(
"sheet_name, init_args, rename",
[
Expand Down

0 comments on commit 7826d18

Please sign in to comment.