Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Specify datetime format in tests #1905

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 44 additions & 4 deletions tiledb/tests/test_pandas_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import random
import string
import sys
import uuid

import numpy as np
Expand Down Expand Up @@ -380,6 +381,10 @@ def test_dataframe_categorical(self):
with tiledb.open(uri) as B:
tm.assert_frame_equal(df, B.df[:])

@pytest.mark.skipif(
sys.version_info < (3, 8),
reason="requires Python 3.8 or higher. date_format argument is not supported in 3.7 and below",
)
def test_dataframe_csv_rt1(self):
def rand_dtype(dtype, size):
nbytes = size * np.dtype(dtype).itemsize
Expand Down Expand Up @@ -409,7 +414,12 @@ def rand_dtype(dtype, size):

csv_array_uri = os.path.join(uri, "tiledb_csv")
tiledb.from_csv(
csv_array_uri, csv_uri, index_col=0, parse_dates=[1], sparse=False
csv_array_uri,
csv_uri,
index_col=0,
parse_dates=[1],
date_format="%Y-%m-%d %H:%M:%S.%f",
sparse=False,
)

df_from_array = tiledb.open_dataframe(csv_array_uri)
Expand All @@ -420,7 +430,12 @@ def rand_dtype(dtype, size):
with tiledb.FileIO(tiledb.VFS(), csv_uri, "rb") as fio:
csv_array_uri2 = os.path.join(csv_array_uri + "_2")
tiledb.from_csv(
csv_array_uri2, csv_uri, index_col=0, parse_dates=[1], sparse=False
csv_array_uri2,
csv_uri,
index_col=0,
parse_dates=[1],
sparse=False,
date_format="%Y-%m-%d %H:%M:%S.%f",
)

df_from_array2 = tiledb.open_dataframe(csv_array_uri2)
Expand Down Expand Up @@ -677,6 +692,10 @@ def test_csv_dense(self):
tmp_array2 = os.path.join(tmp_dir, "array2")
tiledb.from_csv(tmp_array2, tmp_csv, sparse=False)

@pytest.mark.skipif(
sys.version_info < (3, 8),
reason="requires Python 3.8 or higher. date_format argument is not supported in 3.7 and below",
)
def test_csv_col_to_sparse_dims(self):
df = make_dataframe_basic3(20)

Expand All @@ -697,6 +716,7 @@ def test_csv_col_to_sparse_dims(self):
sparse=True,
index_col=["time", "double_range"],
parse_dates=["time"],
date_format="%Y-%m-%d %H:%M:%S.%f",
)

df_bk = tiledb.open_dataframe(tmp_array)
Expand Down Expand Up @@ -734,6 +754,7 @@ def test_csv_col_to_sparse_dims(self):
tmp_csv2,
index_col=["int_vals"],
parse_dates=["time"],
date_format="%Y-%m-%d %H:%M:%S.%f",
sparse=True,
allows_duplicates=True,
float_precision="round_trip",
Expand All @@ -748,6 +769,10 @@ def test_csv_col_to_sparse_dims(self):
cmp_df = df.set_index("int_vals").sort_values(by="time")
tm.assert_frame_equal(res_df, cmp_df)

@pytest.mark.skipif(
sys.version_info < (3, 8),
reason="requires Python 3.8 or higher. date_format argument is not supported in 3.7 and below",
)
def test_dataframe_csv_schema_only(self):
col_size = 10
df = make_dataframe_basic3(col_size)
Expand Down Expand Up @@ -784,6 +809,7 @@ def test_dataframe_csv_schema_only(self):
tmp_csv,
index_col=["time", "double_range"],
parse_dates=["time"],
date_format="%Y-%m-%d %H:%M:%S.%f",
mode="schema_only",
capacity=1001,
sparse=True,
Expand Down Expand Up @@ -856,6 +882,10 @@ def test_dataframe_csv_schema_only(self):
df_bk.sort_index(level="time", inplace=True)
tm.assert_frame_equal(df_bk, df_combined)

@pytest.mark.skipif(
sys.version_info < (3, 8),
reason="requires Python 3.8 or higher. date_format argument is not supported in 3.7 and below",
)
def test_dataframe_csv_chunked(self):
col_size = 200
df = make_dataframe_basic3(col_size)
Expand All @@ -876,7 +906,7 @@ def test_dataframe_csv_chunked(self):
tmp_csv,
index_col=["double_range"],
parse_dates=["time"],
date_spec={"time": "%Y-%m-%dT%H:%M:%S.%f"},
date_format="%Y-%m-%d %H:%M:%S.%f",
chunksize=10,
sparse=True,
quotechar='"',
Expand All @@ -893,7 +923,12 @@ def test_dataframe_csv_chunked(self):
# Test dense chunked
tmp_array_dense = os.path.join(tmp_dir, "array_dense")
tiledb.from_csv(
tmp_array_dense, tmp_csv, parse_dates=["time"], sparse=False, chunksize=25
tmp_array_dense,
tmp_csv,
parse_dates=["time"],
date_format="%Y-%m-%d %H:%M:%S.%f",
sparse=False,
chunksize=25,
)

with tiledb.open(tmp_array_dense) as A:
Expand Down Expand Up @@ -933,6 +968,10 @@ def test_dataframe_csv_chunked(self):
df_idx_res = A.query(coords=False).df[int(ned[0]) : int(ned[1])]
tm.assert_frame_equal(df_idx_res, df.reset_index(drop=True))

@pytest.mark.skipif(
sys.version_info < (3, 8),
reason="requires Python 3.8 or higher. date_format argument is not supported in 3.7 and below",
)
def test_csv_fillna(self):
if pytest.tiledb_vfs == "s3":
pytest.skip(
Expand Down Expand Up @@ -1016,6 +1055,7 @@ def check_array(path, df):
csv_paths,
index_col=["time"],
parse_dates=["time"],
date_format="%Y-%m-%d %H:%M:%S.%f",
chunksize=25,
sparse=True,
)
Expand Down
Loading