Skip to content

Commit

Permalink
Ensure correct order in time when storing tables
Browse files Browse the repository at this point in the history
  • Loading branch information
hagenw committed Jun 18, 2024
1 parent 589da4b commit b0ee769
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 4 deletions.
17 changes: 13 additions & 4 deletions audformat/core/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -599,25 +599,34 @@ def save(
path = audeer.path(path)
define.TableStorageFormat._assert_has_attribute_value(storage_format)

csv_file = f"{path}.{define.TableStorageFormat.CSV}"
parquet_file = f"{path}.{define.TableStorageFormat.PARQUET}"
pickle_file = f"{path}.{define.TableStorageFormat.PICKLE}"
csv_file = f"{path}.{define.TableStorageFormat.CSV}"

# Make sure the CSV|PARQUET file is always written first
# as it is expected to be older by load()
# Ensure the following storage order:
# 1. PARQUET file
# 2. CSV file
# 3. PKL file
# The PKl is expected to be the oldest by load(),
# the order of PARQUET and CSV file
# is only a convention for now.
if storage_format == define.TableStorageFormat.PICKLE:
if update_other_formats and os.path.exists(parquet_file):
self._save_parquet(parquet_file)
elif update_other_formats and os.path.exists(csv_file):
if update_other_formats and os.path.exists(csv_file):
self._save_csv(csv_file)
self._save_pickled(pickle_file)

if storage_format == define.TableStorageFormat.PARQUET:
self._save_parquet(parquet_file)
if update_other_formats and os.path.exists(csv_file):
self._save_csv(csv_file)
if update_other_formats and os.path.exists(pickle_file):
self._save_pickled(pickle_file)

if storage_format == define.TableStorageFormat.CSV:
if update_other_formats and os.path.exists(parquet_file):
self._save_parquet(parquet_file)
self._save_csv(csv_file)
if update_other_formats and os.path.exists(pickle_file):
self._save_pickled(pickle_file)
Expand Down
79 changes: 79 additions & 0 deletions tests/test_table.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import random
import re
import time
import typing

import numpy as np
Expand Down Expand Up @@ -2122,3 +2123,81 @@ def test_update(table, overwrite, others):
for column_id, column in other.columns.items():
assert column.scheme == table[column_id].scheme
assert column.rater == table[column_id].rater


@pytest.mark.parametrize("update_other_formats", [True, False])
@pytest.mark.parametrize(
"storage_format, existing_formats",
[
("csv", []),
("csv", []),
("csv", ["pkl"]),
("csv", ["parquet", "pkl"]),
("pkl", ["parquet"]),
("pkl", ["csv"]),
("pkl", ["parquet", "csv"]),
("parquet", ["pkl"]),
("parquet", ["csv"]),
("parquet", ["pkl", "csv"]),
],
)
def test_update_other_formats(
tmpdir,
storage_format,
existing_formats,
update_other_formats,
):
r"""Tests updating of other table formats.
When a table is stored with `audformat.Table.save()`
as CSV, PARQUET, or PKL file,
a user might select
that all other existing file representations of the table
are updated as well.
E.g. if a PKL file of the same table exists,
and a user saves to a CSV file
with the argument `update_other_formats=True`,
it should write the table to the CSV and PKL file.
"""
db = audformat.testing.create_db()

table_id = "files"
table_file = audeer.path(tmpdir, "table")

# Create existing table files and pause for a short time
old_mtime = {}
for ext in existing_formats:
db[table_id].save(
table_file,
storage_format=ext,
update_other_formats=False,
)
old_mtime[ext] = os.path.getmtime(f"{table_file}.{ext}")
time.sleep(0.05)

# Store table to requested format
db[table_id].save(
table_file,
storage_format=storage_format,
update_other_formats=update_other_formats,
)

# Collect mtimes of existing table files
mtime = {}
formats = existing_formats + [storage_format]
for ext in formats:
mtime[ext] = os.path.getmtime(f"{table_file}.{ext}")

# Ensure mtimes are correct
if update_other_formats:
if "pickle" in formats and "csv" in formats:
assert mtime["pickle"] > mtime["csv"]
if "pickle" in formats and "parquet" in formats:
assert mtime["pickle"] > mtime["parquet"]
if "csv" in formats and "parquet" in formats:
assert mtime["csv"] > mtime["parquet"]
else:
for ext in existing_formats:
assert mtime[ext] == old_mtime[ext]
assert mtime[storage_format] > old_mtime[ext]

0 comments on commit b0ee769

Please sign in to comment.