Better error message when initializing with invisible columns (IAMcon…

…sortium#634) Co-authored-by: Philip Hackstock <[email protected]>
LauWien · Mar 2, 2022 · 7826d18 · 7826d18
1 parent 38897ff
commit 7826d18
Show file tree

Hide file tree

Showing 6 changed files with 38 additions and 20 deletions.
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
@@ -10,6 +10,7 @@ an empty **IamDataFrame**. Previously, this raised an error.
 
 ## Individual updates
 
+- [#634](https://github.com/IAMconsortium/pyam/pull/634) Better error message when initializing with invisible columns 
 - [#598](https://github.com/IAMconsortium/pyam/pull/598) Support mixed 'year' and 'datetime' domain
 
 # Release v1.3.1

diff --git a/pyam/core.py b/pyam/core.py
@@ -10,6 +10,7 @@
 from pandas.api.types import is_integer
 
 from pathlib import Path
+from py._path.local import LocalPath
 from tempfile import TemporaryDirectory
 
 from pyam.filter import filter_by_time_domain, filter_by_year, filter_by_dt_arg
@@ -161,27 +162,28 @@ def _init(self, data, meta=None, index=DEFAULT_META_INDEX, **kwargs):
         # cast data from pandas
         if isinstance(data, pd.DataFrame) or isinstance(data, pd.Series):
             _data = format_data(data.copy(), index=index, **kwargs)
+
         # read data from ixmp Platform instance
         elif has_ix and isinstance(data, ixmp.TimeSeries):
             # TODO read meta indicators from ixmp
             _data = read_ix(data, **kwargs)
+
+        # read from file
+        elif isinstance(data, (str, LocalPath, Path)):
+            data = Path(data)  # casting str or LocalPath to Path
+            if not data.is_file():
+                raise FileNotFoundError(f"No such file: '{data}'")
+            logger.info(f"Reading file {data}")
+            _data = read_file(data, index=index, **kwargs)
+
+        # unsupported `data` args
+        elif islistable(data):
+            raise ValueError(
+                "Initializing from list is not supported, "
+                "use `IamDataFrame.append()` or `pyam.concat()`"
+            )
         else:
-            if islistable(data):
-                raise ValueError(
-                    "Initializing from list is not supported, "
-                    "use `IamDataFrame.append()` or `pyam.concat()`"
-                )
-            # read from file
-            try:
-                data = Path(data)  # casting str or LocalPath to Path
-                if data.is_file():
-                    logger.info(f"Reading file {data}")
-                    _data = read_file(data, index=index, **kwargs)
-                else:
-                    raise FileNotFoundError(f"File {data} does not exist")
-            except TypeError:  # `data` cannot be cast to Path
-                msg = "IamDataFrame constructor not properly called!"
-                raise ValueError(msg)
+            raise ValueError("IamDataFrame constructor not properly called!")
 
         self._data, index, self.time_col, self.extra_cols = _data
 

diff --git a/pyam/utils.py b/pyam/utils.py
@@ -170,8 +170,11 @@ def read_pandas(path, sheet_name="data*", *args, **kwargs):
         # remove unnamed and empty columns, and rows were all values are nan
         def is_empty(name, s):
             if str(name).startswith("Unnamed: "):
-                if len(s) == 0 or all(np.isnan(s)):
-                    return True
+                try:
+                    if len(s) == 0 or all(np.isnan(s)):
+                        return True
+                except TypeError:
+                    pass
             return False
 
         empty_cols = [c for c in df.columns if is_empty(c, df[c])]
@@ -351,7 +354,10 @@ def convert_r_columns(c):
     # verify that there are no nan's left (in columns)
     null_rows = df.isnull().T.any()
     if null_rows.any():
-        raise_data_error("Empty cells in `data`", df.loc[null_rows])
+        cols = ", ".join(df.columns[df.isnull().any().values])
+        raise_data_error(
+            f"Empty cells in `data` (columns: '{cols}')", df.loc[null_rows]
+        )
     del null_rows
 
     # cast to pd.Series, check for duplicates

diff --git a/setup.cfg b/setup.cfg
@@ -40,6 +40,8 @@ install_requires =
     setuptools_scm
     # required explicitly for Python 3.7
     importlib_metadata
+    # required explicitly for type-checking
+    py
 setup_requires =
     setuptools >= 41
     setuptools_scm

diff --git a/tests/data/na_column.xlsx b/tests/data/na_column.xlsx
diff --git a/tests/test_io.py b/tests/test_io.py
@@ -28,7 +28,7 @@ def test_unknown_type():
 
 def test_not_a_file():
     # initializing with a file-like that's not a file raises an error
-    match = "File foo.csv does not exist"
+    match = "No such file: 'foo.csv'"
     with pytest.raises(FileNotFoundError, match=match):
         IamDataFrame("foo.csv")
 
@@ -111,6 +111,13 @@ def test_init_df_with_na_unit(test_pd_df, tmpdir):
     IamDataFrame(file)  # reading from file as IamDataFrame works
 
 
+def test_init_df_with_na_column_raises(test_pd_df, tmpdir):
+    # reading from file with a "corrupted" column raises expected error
+    match = "Empty cells in `data` \(columns: 'unnamed: 7'\):"
+    with pytest.raises(ValueError, match=match):
+        IamDataFrame(TEST_DATA_DIR / "na_column.xlsx")
+
+
 @pytest.mark.parametrize(
     "sheet_name, init_args, rename",
     [