Skip to content

Commit

Permalink
Specify datetime format in tests (#1905)
Browse files Browse the repository at this point in the history
* Specify datetime format in tests

* skip tests containing date_format argument for Python 3.7 and below

---
fixes https://app.shortcut.com/tiledb-inc/story/34107
  • Loading branch information
kounelisagis authored Mar 1, 2024
1 parent 3615631 commit 55d7b3a
Showing 1 changed file with 44 additions and 4 deletions.
48 changes: 44 additions & 4 deletions tiledb/tests/test_pandas_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import random
import string
import sys
import uuid

import numpy as np
Expand Down Expand Up @@ -380,6 +381,10 @@ def test_dataframe_categorical(self):
with tiledb.open(uri) as B:
tm.assert_frame_equal(df, B.df[:])

@pytest.mark.skipif(
sys.version_info < (3, 8),
reason="requires Python 3.8 or higher. date_format argument is not supported in 3.7 and below",
)
def test_dataframe_csv_rt1(self):
def rand_dtype(dtype, size):
nbytes = size * np.dtype(dtype).itemsize
Expand Down Expand Up @@ -409,7 +414,12 @@ def rand_dtype(dtype, size):

csv_array_uri = os.path.join(uri, "tiledb_csv")
tiledb.from_csv(
csv_array_uri, csv_uri, index_col=0, parse_dates=[1], sparse=False
csv_array_uri,
csv_uri,
index_col=0,
parse_dates=[1],
date_format="%Y-%m-%d %H:%M:%S.%f",
sparse=False,
)

df_from_array = tiledb.open_dataframe(csv_array_uri)
Expand All @@ -420,7 +430,12 @@ def rand_dtype(dtype, size):
with tiledb.FileIO(tiledb.VFS(), csv_uri, "rb") as fio:
csv_array_uri2 = os.path.join(csv_array_uri + "_2")
tiledb.from_csv(
csv_array_uri2, csv_uri, index_col=0, parse_dates=[1], sparse=False
csv_array_uri2,
csv_uri,
index_col=0,
parse_dates=[1],
sparse=False,
date_format="%Y-%m-%d %H:%M:%S.%f",
)

df_from_array2 = tiledb.open_dataframe(csv_array_uri2)
Expand Down Expand Up @@ -677,6 +692,10 @@ def test_csv_dense(self):
tmp_array2 = os.path.join(tmp_dir, "array2")
tiledb.from_csv(tmp_array2, tmp_csv, sparse=False)

@pytest.mark.skipif(
sys.version_info < (3, 8),
reason="requires Python 3.8 or higher. date_format argument is not supported in 3.7 and below",
)
def test_csv_col_to_sparse_dims(self):
df = make_dataframe_basic3(20)

Expand All @@ -697,6 +716,7 @@ def test_csv_col_to_sparse_dims(self):
sparse=True,
index_col=["time", "double_range"],
parse_dates=["time"],
date_format="%Y-%m-%d %H:%M:%S.%f",
)

df_bk = tiledb.open_dataframe(tmp_array)
Expand Down Expand Up @@ -734,6 +754,7 @@ def test_csv_col_to_sparse_dims(self):
tmp_csv2,
index_col=["int_vals"],
parse_dates=["time"],
date_format="%Y-%m-%d %H:%M:%S.%f",
sparse=True,
allows_duplicates=True,
float_precision="round_trip",
Expand All @@ -748,6 +769,10 @@ def test_csv_col_to_sparse_dims(self):
cmp_df = df.set_index("int_vals").sort_values(by="time")
tm.assert_frame_equal(res_df, cmp_df)

@pytest.mark.skipif(
sys.version_info < (3, 8),
reason="requires Python 3.8 or higher. date_format argument is not supported in 3.7 and below",
)
def test_dataframe_csv_schema_only(self):
col_size = 10
df = make_dataframe_basic3(col_size)
Expand Down Expand Up @@ -784,6 +809,7 @@ def test_dataframe_csv_schema_only(self):
tmp_csv,
index_col=["time", "double_range"],
parse_dates=["time"],
date_format="%Y-%m-%d %H:%M:%S.%f",
mode="schema_only",
capacity=1001,
sparse=True,
Expand Down Expand Up @@ -856,6 +882,10 @@ def test_dataframe_csv_schema_only(self):
df_bk.sort_index(level="time", inplace=True)
tm.assert_frame_equal(df_bk, df_combined)

@pytest.mark.skipif(
sys.version_info < (3, 8),
reason="requires Python 3.8 or higher. date_format argument is not supported in 3.7 and below",
)
def test_dataframe_csv_chunked(self):
col_size = 200
df = make_dataframe_basic3(col_size)
Expand All @@ -876,7 +906,7 @@ def test_dataframe_csv_chunked(self):
tmp_csv,
index_col=["double_range"],
parse_dates=["time"],
date_spec={"time": "%Y-%m-%dT%H:%M:%S.%f"},
date_format="%Y-%m-%d %H:%M:%S.%f",
chunksize=10,
sparse=True,
quotechar='"',
Expand All @@ -893,7 +923,12 @@ def test_dataframe_csv_chunked(self):
# Test dense chunked
tmp_array_dense = os.path.join(tmp_dir, "array_dense")
tiledb.from_csv(
tmp_array_dense, tmp_csv, parse_dates=["time"], sparse=False, chunksize=25
tmp_array_dense,
tmp_csv,
parse_dates=["time"],
date_format="%Y-%m-%d %H:%M:%S.%f",
sparse=False,
chunksize=25,
)

with tiledb.open(tmp_array_dense) as A:
Expand Down Expand Up @@ -933,6 +968,10 @@ def test_dataframe_csv_chunked(self):
df_idx_res = A.query(coords=False).df[int(ned[0]) : int(ned[1])]
tm.assert_frame_equal(df_idx_res, df.reset_index(drop=True))

@pytest.mark.skipif(
sys.version_info < (3, 8),
reason="requires Python 3.8 or higher. date_format argument is not supported in 3.7 and below",
)
def test_csv_fillna(self):
if pytest.tiledb_vfs == "s3":
pytest.skip(
Expand Down Expand Up @@ -1016,6 +1055,7 @@ def check_array(path, df):
csv_paths,
index_col=["time"],
parse_dates=["time"],
date_format="%Y-%m-%d %H:%M:%S.%f",
chunksize=25,
sparse=True,
)
Expand Down

0 comments on commit 55d7b3a

Please sign in to comment.