From 55d7b3ad757c0ddc9df2f44f3d2cd1c781f39a39 Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis <36283973+kounelisagis@users.noreply.github.com> Date: Fri, 1 Mar 2024 20:18:52 +0200 Subject: [PATCH] Specify datetime format in tests (#1905) * Specify datetime format in tests * skip tests containing date_format argument for Python 3.7 and below --- fixes https://app.shortcut.com/tiledb-inc/story/34107 --- tiledb/tests/test_pandas_dataframe.py | 48 ++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/tiledb/tests/test_pandas_dataframe.py b/tiledb/tests/test_pandas_dataframe.py index 32efa1319a..51649e7bfc 100644 --- a/tiledb/tests/test_pandas_dataframe.py +++ b/tiledb/tests/test_pandas_dataframe.py @@ -3,6 +3,7 @@ import os import random import string +import sys import uuid import numpy as np @@ -380,6 +381,10 @@ def test_dataframe_categorical(self): with tiledb.open(uri) as B: tm.assert_frame_equal(df, B.df[:]) + @pytest.mark.skipif( + sys.version_info < (3, 8), + reason="requires Python 3.8 or higher. date_format argument is not supported in 3.7 and below", + ) def test_dataframe_csv_rt1(self): def rand_dtype(dtype, size): nbytes = size * np.dtype(dtype).itemsize @@ -409,7 +414,12 @@ def rand_dtype(dtype, size): csv_array_uri = os.path.join(uri, "tiledb_csv") tiledb.from_csv( - csv_array_uri, csv_uri, index_col=0, parse_dates=[1], sparse=False + csv_array_uri, + csv_uri, + index_col=0, + parse_dates=[1], + date_format="%Y-%m-%d %H:%M:%S.%f", + sparse=False, ) df_from_array = tiledb.open_dataframe(csv_array_uri) @@ -420,7 +430,12 @@ def rand_dtype(dtype, size): with tiledb.FileIO(tiledb.VFS(), csv_uri, "rb") as fio: csv_array_uri2 = os.path.join(csv_array_uri + "_2") tiledb.from_csv( - csv_array_uri2, csv_uri, index_col=0, parse_dates=[1], sparse=False + csv_array_uri2, + csv_uri, + index_col=0, + parse_dates=[1], + sparse=False, + date_format="%Y-%m-%d %H:%M:%S.%f", ) df_from_array2 = tiledb.open_dataframe(csv_array_uri2) @@ -677,6 +692,10 @@ def test_csv_dense(self): tmp_array2 = os.path.join(tmp_dir, "array2") tiledb.from_csv(tmp_array2, tmp_csv, sparse=False) + @pytest.mark.skipif( + sys.version_info < (3, 8), + reason="requires Python 3.8 or higher. date_format argument is not supported in 3.7 and below", + ) def test_csv_col_to_sparse_dims(self): df = make_dataframe_basic3(20) @@ -697,6 +716,7 @@ def test_csv_col_to_sparse_dims(self): sparse=True, index_col=["time", "double_range"], parse_dates=["time"], + date_format="%Y-%m-%d %H:%M:%S.%f", ) df_bk = tiledb.open_dataframe(tmp_array) @@ -734,6 +754,7 @@ def test_csv_col_to_sparse_dims(self): tmp_csv2, index_col=["int_vals"], parse_dates=["time"], + date_format="%Y-%m-%d %H:%M:%S.%f", sparse=True, allows_duplicates=True, float_precision="round_trip", @@ -748,6 +769,10 @@ def test_csv_col_to_sparse_dims(self): cmp_df = df.set_index("int_vals").sort_values(by="time") tm.assert_frame_equal(res_df, cmp_df) + @pytest.mark.skipif( + sys.version_info < (3, 8), + reason="requires Python 3.8 or higher. date_format argument is not supported in 3.7 and below", + ) def test_dataframe_csv_schema_only(self): col_size = 10 df = make_dataframe_basic3(col_size) @@ -784,6 +809,7 @@ def test_dataframe_csv_schema_only(self): tmp_csv, index_col=["time", "double_range"], parse_dates=["time"], + date_format="%Y-%m-%d %H:%M:%S.%f", mode="schema_only", capacity=1001, sparse=True, @@ -856,6 +882,10 @@ def test_dataframe_csv_schema_only(self): df_bk.sort_index(level="time", inplace=True) tm.assert_frame_equal(df_bk, df_combined) + @pytest.mark.skipif( + sys.version_info < (3, 8), + reason="requires Python 3.8 or higher. date_format argument is not supported in 3.7 and below", + ) def test_dataframe_csv_chunked(self): col_size = 200 df = make_dataframe_basic3(col_size) @@ -876,7 +906,7 @@ def test_dataframe_csv_chunked(self): tmp_csv, index_col=["double_range"], parse_dates=["time"], - date_spec={"time": "%Y-%m-%dT%H:%M:%S.%f"}, + date_format="%Y-%m-%d %H:%M:%S.%f", chunksize=10, sparse=True, quotechar='"', @@ -893,7 +923,12 @@ def test_dataframe_csv_chunked(self): # Test dense chunked tmp_array_dense = os.path.join(tmp_dir, "array_dense") tiledb.from_csv( - tmp_array_dense, tmp_csv, parse_dates=["time"], sparse=False, chunksize=25 + tmp_array_dense, + tmp_csv, + parse_dates=["time"], + date_format="%Y-%m-%d %H:%M:%S.%f", + sparse=False, + chunksize=25, ) with tiledb.open(tmp_array_dense) as A: @@ -933,6 +968,10 @@ def test_dataframe_csv_chunked(self): df_idx_res = A.query(coords=False).df[int(ned[0]) : int(ned[1])] tm.assert_frame_equal(df_idx_res, df.reset_index(drop=True)) + @pytest.mark.skipif( + sys.version_info < (3, 8), + reason="requires Python 3.8 or higher. date_format argument is not supported in 3.7 and below", + ) def test_csv_fillna(self): if pytest.tiledb_vfs == "s3": pytest.skip( @@ -1016,6 +1055,7 @@ def check_array(path, df): csv_paths, index_col=["time"], parse_dates=["time"], + date_format="%Y-%m-%d %H:%M:%S.%f", chunksize=25, sparse=True, )