diff --git a/audb/core/api.py b/audb/core/api.py index 1083e32e..a2720273 100644 --- a/audb/core/api.py +++ b/audb/core/api.py @@ -271,7 +271,7 @@ def dependencies( try: deps = Dependencies() deps.load(cached_deps_file) - except (AttributeError, EOFError, FileNotFoundError, KeyError, ValueError): + except Exception: # does not catch KeyboardInterupt # If loading cached file fails, load again from backend backend_interface = utils.lookup_backend(name, version) deps = download_dependencies(backend_interface, name, version, verbose) diff --git a/tests/assests/dependency-table-pandas/.gitignore b/tests/assests/dependency-table-pandas/.gitignore new file mode 100644 index 00000000..1558c17b --- /dev/null +++ b/tests/assests/dependency-table-pandas/.gitignore @@ -0,0 +1,2 @@ +venv/ +db.csv diff --git a/tests/assests/dependency-table-pandas/README.md b/tests/assests/dependency-table-pandas/README.md new file mode 100644 index 00000000..6888c790 --- /dev/null +++ b/tests/assests/dependency-table-pandas/README.md @@ -0,0 +1,29 @@ +# Dependency table pandas compatibility + +Since version 1.7.0 of `audb`, +we use `pyarrow` dtypes +inside the dependency table +(`audb.Dependencies._df`). +The dependency table +is still stored in cache +as a pickle file. +When loading the pickle file +with a different `pandas` version, +than the one used to store the file, +an error related to the `pyarrow` dtypes +might be raised. + +To test this, +we store an example dependency table +from the `emodb` dataset +as pickle file +using different `pandas` versions +as test assests. + +The pickle files, +stored in this folder, +where created by running: + +```bash +$ bash store_dependency_tables.sh +``` diff --git a/tests/assests/dependency-table-pandas/emodb-pandas-2.0.3.pkl b/tests/assests/dependency-table-pandas/emodb-pandas-2.0.3.pkl new file mode 100644 index 00000000..4bfcf738 Binary files /dev/null and b/tests/assests/dependency-table-pandas/emodb-pandas-2.0.3.pkl differ diff --git a/tests/assests/dependency-table-pandas/emodb-pandas-2.1.4.pkl b/tests/assests/dependency-table-pandas/emodb-pandas-2.1.4.pkl new file mode 100644 index 00000000..31ffc01f Binary files /dev/null and b/tests/assests/dependency-table-pandas/emodb-pandas-2.1.4.pkl differ diff --git a/tests/assests/dependency-table-pandas/emodb-pandas-2.2.2.pkl b/tests/assests/dependency-table-pandas/emodb-pandas-2.2.2.pkl new file mode 100644 index 00000000..9528ceb1 Binary files /dev/null and b/tests/assests/dependency-table-pandas/emodb-pandas-2.2.2.pkl differ diff --git a/tests/assests/dependency-table-pandas/store_dependency_table.py b/tests/assests/dependency-table-pandas/store_dependency_table.py new file mode 100644 index 00000000..59941e81 --- /dev/null +++ b/tests/assests/dependency-table-pandas/store_dependency_table.py @@ -0,0 +1,41 @@ +import argparse + +import audb + + +def main(pandas_version): + """Load dependency from CSV and store as PKL file. + + Args: + pandas_version: version of installed ``pandas`` package + + """ + # Download emodb dependency table + # from version 1.4.1, + # which is still stored as CSV file + repository = audb.Repository( + "data-public", + "https://audeering.jfrog.io/artifactory", + "artifactory", + ) + backend_interface = repository.create_backend_interface() + remote_file = backend_interface.join("/", "emodb", "db.zip") + with backend_interface.backend: + backend_interface.get_archive(remote_file, ".", "1.4.1", verbose=False) + + deps = audb.Dependencies() + deps.load("db.csv") + outfile = f"emodb-pandas-{pandas_version}.pkl" + deps.save(outfile) + + +if __name__ == "__main__": + # Call the program with: + # + # $ python store_dependency_table.py 2.2.2 + # + # where 2.2.2 refers to the installed pandas version. + parser = argparse.ArgumentParser() + parser.add_argument("pandas_version") + args = parser.parse_args() + main(args.pandas_version) diff --git a/tests/assests/dependency-table-pandas/store_dependency_tables.sh b/tests/assests/dependency-table-pandas/store_dependency_tables.sh new file mode 100644 index 00000000..50b1e503 --- /dev/null +++ b/tests/assests/dependency-table-pandas/store_dependency_tables.sh @@ -0,0 +1,17 @@ +#!/bin/bash +# +# This stores dependency tables of emodb +# as pickle files +# for different versions of pandas +# to test compatibility +audb_version="1.7.2" +python_version="3.10" +for pandas_version in 2.0.3 2.1.4 2.2.2; do + rm -rf venv + virtualenv -p "python${python_version}" venv + source venv/bin/activate + pip install "audb==${audb_version}" + pip install "pandas==${pandas_version}" + python store_dependency_table.py ${pandas_version} + deactivate +done diff --git a/tests/test_dependencies.py b/tests/test_dependencies.py index a4079b29..2f24d016 100644 --- a/tests/test_dependencies.py +++ b/tests/test_dependencies.py @@ -1,3 +1,4 @@ +import os import re import pandas as pd @@ -8,6 +9,7 @@ import audb +CURRENT_DIR = os.path.dirname(os.path.realpath(__file__)) ROWS = [ { "file": "db.files.csv", @@ -326,6 +328,56 @@ def test_load_save_backward_compatibility(tmpdir, deps): assert deps == deps2 +@pytest.mark.parametrize("pandas_version", ["2.0.3", "2.1.4", "2.2.2"]) +def test_load_save_pandas_compatibility(pandas_version): + """Test pandas backward compatibility of pickle cache files. + + Dataframes using pyarrow dtypes, + and stored as pickle files + might fail to load + if the used ``pandas`` version + does not match. + + Test which ``pandas`` versions raise errors + when loading deps from pickle files. + We have to except those errors inside + ``audb.dependencies()``. + + See Also: + https://github.com/audeering/audb/issues/418 + + Args: + pandas_version: the version of ``pandas`` + used to store the dependency table in cache + + """ + deps_file = audeer.path( + CURRENT_DIR, + "assests", + "dependency-table-pandas", + f"emodb-pandas-{pandas_version}.pkl", + ) + deps = audb.Dependencies() + + # Dependency table cached with pandas==2.0.3. + # Loading with pandas>=2.1.0 leads to a ModuleNotFoundError + if pd.__version__ >= "2.1.0" and pandas_version == "2.0.3": + error_msg = "No module named 'pandas.core.arrays.arrow.dtype'" + with pytest.raises(ModuleNotFoundError, match=error_msg): + deps.load(deps_file) + + # Dependency table cached with pandas>=2.1.4. + # Loading with pandas==2.0.3 leads to a KeyError + elif pd.__version__ == "2.0.3" and pandas_version >= "2.1.4": + error_msg = "'_data'" + with pytest.raises(KeyError, match=error_msg): + deps.load(deps_file) + + else: + deps.load(deps_file) + assert deps._df.index.dtype == audb.core.define.DEPEND_INDEX_DTYPE + + def test_load_save_errors(deps): """Test possible errors when loading/saving.""" # Wrong file extension