From b0ee769975a683d0aafdbc035de2b29e72b70e02 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 18 Jun 2024 13:17:20 +0200 Subject: [PATCH] Ensure correct order in time when storing tables --- audformat/core/table.py | 17 ++++++--- tests/test_table.py | 79 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 4 deletions(-) diff --git a/audformat/core/table.py b/audformat/core/table.py index bc66b943..995e3bfc 100644 --- a/audformat/core/table.py +++ b/audformat/core/table.py @@ -599,25 +599,34 @@ def save( path = audeer.path(path) define.TableStorageFormat._assert_has_attribute_value(storage_format) + csv_file = f"{path}.{define.TableStorageFormat.CSV}" parquet_file = f"{path}.{define.TableStorageFormat.PARQUET}" pickle_file = f"{path}.{define.TableStorageFormat.PICKLE}" - csv_file = f"{path}.{define.TableStorageFormat.CSV}" - # Make sure the CSV|PARQUET file is always written first - # as it is expected to be older by load() + # Ensure the following storage order: + # 1. PARQUET file + # 2. CSV file + # 3. PKL file + # The PKl is expected to be the oldest by load(), + # the order of PARQUET and CSV file + # is only a convention for now. if storage_format == define.TableStorageFormat.PICKLE: if update_other_formats and os.path.exists(parquet_file): self._save_parquet(parquet_file) - elif update_other_formats and os.path.exists(csv_file): + if update_other_formats and os.path.exists(csv_file): self._save_csv(csv_file) self._save_pickled(pickle_file) if storage_format == define.TableStorageFormat.PARQUET: self._save_parquet(parquet_file) + if update_other_formats and os.path.exists(csv_file): + self._save_csv(csv_file) if update_other_formats and os.path.exists(pickle_file): self._save_pickled(pickle_file) if storage_format == define.TableStorageFormat.CSV: + if update_other_formats and os.path.exists(parquet_file): + self._save_parquet(parquet_file) self._save_csv(csv_file) if update_other_formats and os.path.exists(pickle_file): self._save_pickled(pickle_file) diff --git a/tests/test_table.py b/tests/test_table.py index 49ae61a6..c2fdcadb 100644 --- a/tests/test_table.py +++ b/tests/test_table.py @@ -1,6 +1,7 @@ import os import random import re +import time import typing import numpy as np @@ -2122,3 +2123,81 @@ def test_update(table, overwrite, others): for column_id, column in other.columns.items(): assert column.scheme == table[column_id].scheme assert column.rater == table[column_id].rater + + +@pytest.mark.parametrize("update_other_formats", [True, False]) +@pytest.mark.parametrize( + "storage_format, existing_formats", + [ + ("csv", []), + ("csv", []), + ("csv", ["pkl"]), + ("csv", ["parquet", "pkl"]), + ("pkl", ["parquet"]), + ("pkl", ["csv"]), + ("pkl", ["parquet", "csv"]), + ("parquet", ["pkl"]), + ("parquet", ["csv"]), + ("parquet", ["pkl", "csv"]), + ], +) +def test_update_other_formats( + tmpdir, + storage_format, + existing_formats, + update_other_formats, +): + r"""Tests updating of other table formats. + + When a table is stored with `audformat.Table.save()` + as CSV, PARQUET, or PKL file, + a user might select + that all other existing file representations of the table + are updated as well. + E.g. if a PKL file of the same table exists, + and a user saves to a CSV file + with the argument `update_other_formats=True`, + it should write the table to the CSV and PKL file. + + """ + db = audformat.testing.create_db() + + table_id = "files" + table_file = audeer.path(tmpdir, "table") + + # Create existing table files and pause for a short time + old_mtime = {} + for ext in existing_formats: + db[table_id].save( + table_file, + storage_format=ext, + update_other_formats=False, + ) + old_mtime[ext] = os.path.getmtime(f"{table_file}.{ext}") + time.sleep(0.05) + + # Store table to requested format + db[table_id].save( + table_file, + storage_format=storage_format, + update_other_formats=update_other_formats, + ) + + # Collect mtimes of existing table files + mtime = {} + formats = existing_formats + [storage_format] + for ext in formats: + mtime[ext] = os.path.getmtime(f"{table_file}.{ext}") + + # Ensure mtimes are correct + if update_other_formats: + if "pickle" in formats and "csv" in formats: + assert mtime["pickle"] > mtime["csv"] + if "pickle" in formats and "parquet" in formats: + assert mtime["pickle"] > mtime["parquet"] + if "csv" in formats and "parquet" in formats: + assert mtime["csv"] > mtime["parquet"] + else: + for ext in existing_formats: + assert mtime[ext] == old_mtime[ext] + assert mtime[storage_format] > old_mtime[ext]