From 9bf66b887ac221480d5f4fb11ef4c59ba6a4b989 Mon Sep 17 00:00:00 2001 From: Ian Spektor Date: Tue, 31 Oct 2023 18:10:22 +0100 Subject: [PATCH 1/9] add missing test for join --- temporian/core/operators/test/test_join.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/temporian/core/operators/test/test_join.py b/temporian/core/operators/test/test_join.py index 2493e228c..3440f04b1 100644 --- a/temporian/core/operators/test/test_join.py +++ b/temporian/core/operators/test/test_join.py @@ -115,6 +115,15 @@ def test_wrong_on_type(self): with self.assertRaisesRegex(ValueError, "Got float64 instead for left"): evset_1.join(evset_2, on="c") + def test_same_sampling(self): + evset_1 = event_set([0], features={"a": [0]}) + evset_2 = event_set([0], features={"b": [0]}, same_sampling_as=evset_1) + with self.assertRaisesRegex( + ValueError, + "Both inputs have the same sampling. Use ", + ): + evset_1.join(evset_2) + if __name__ == "__main__": absltest.main() From 2cf62f36e713d6bb5fa31df6088cfa2d4091f11f Mon Sep 17 00:00:00 2001 From: Ian Spektor Date: Tue, 31 Oct 2023 18:11:58 +0100 Subject: [PATCH 2/9] add missing test for fft --- .../core/operators/test/test_fast_fourier_transform.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/temporian/core/operators/test/test_fast_fourier_transform.py b/temporian/core/operators/test/test_fast_fourier_transform.py index 926fc7775..70c011e6c 100644 --- a/temporian/core/operators/test/test_fast_fourier_transform.py +++ b/temporian/core/operators/test/test_fast_fourier_transform.py @@ -106,6 +106,15 @@ def test_wrong_num_spectral_lines(self): num_events=20, num_spectral_lines=15 ) + def test_wrong_num_spectral_lines_negative(self): + evset = event_set([], {"a": f32([])}) + with self.assertRaisesRegex( + ValueError, "num_spectral_lines should be positive. Got" + ): + evset.experimental_fast_fourier_transform( + num_events=20, num_spectral_lines=-1 + ) + if __name__ == "__main__": absltest.main() From d576dc3a7baec4c1b0b629b2058ef298afecce2b Mon Sep 17 00:00:00 2001 From: Ian Spektor Date: Tue, 31 Oct 2023 18:14:35 +0100 Subject: [PATCH 3/9] add missing tests for begin and end --- temporian/core/operators/test/test_begin.py | 9 +++++++++ temporian/core/operators/test/test_end.py | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/temporian/core/operators/test/test_begin.py b/temporian/core/operators/test/test_begin.py index 3532d5fb2..013cf583a 100644 --- a/temporian/core/operators/test/test_begin.py +++ b/temporian/core/operators/test/test_begin.py @@ -35,6 +35,15 @@ def test_basic(self): assertOperatorResult(self, result, expected, check_sampling=False) + def test_empty(self): + evset = event_set(timestamps=[], features={"a": []}) + + result = evset.end() + + expected = event_set(timestamps=[]) + + assertOperatorResult(self, result, expected, check_sampling=False) + if __name__ == "__main__": absltest.main() diff --git a/temporian/core/operators/test/test_end.py b/temporian/core/operators/test/test_end.py index 97c652ed1..4bddb4b8d 100644 --- a/temporian/core/operators/test/test_end.py +++ b/temporian/core/operators/test/test_end.py @@ -35,6 +35,15 @@ def test_basic(self): assertOperatorResult(self, result, expected, check_sampling=False) + def test_empty(self): + evset = event_set(timestamps=[], features={"a": []}) + + result = evset.end() + + expected = event_set(timestamps=[]) + + assertOperatorResult(self, result, expected, check_sampling=False) + if __name__ == "__main__": absltest.main() From 53557115f836b39b7b4e7587d8657cf9fe96150f Mon Sep 17 00:00:00 2001 From: Ian Spektor Date: Tue, 31 Oct 2023 18:16:01 +0100 Subject: [PATCH 4/9] add missing test for since_last --- temporian/core/operators/test/test_begin.py | 2 +- temporian/core/operators/test/test_since_last.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/temporian/core/operators/test/test_begin.py b/temporian/core/operators/test/test_begin.py index 013cf583a..0f8d7d977 100644 --- a/temporian/core/operators/test/test_begin.py +++ b/temporian/core/operators/test/test_begin.py @@ -38,7 +38,7 @@ def test_basic(self): def test_empty(self): evset = event_set(timestamps=[], features={"a": []}) - result = evset.end() + result = evset.begin() expected = event_set(timestamps=[]) diff --git a/temporian/core/operators/test/test_since_last.py b/temporian/core/operators/test/test_since_last.py index 389db844e..37a4e632a 100644 --- a/temporian/core/operators/test/test_since_last.py +++ b/temporian/core/operators/test/test_since_last.py @@ -97,6 +97,14 @@ def test_with_sampling_2steps(self): assertOperatorResult(self, result, expected) + def test_negative_steps(self): + evset = event_set([]) + + with self.assertRaisesRegex( + ValueError, "Number of steps must be greater than 0. Got" + ): + evset.since_last(steps=-1) + if __name__ == "__main__": absltest.main() From f99599e5dafa23741277622b775a10e2037a187b Mon Sep 17 00:00:00 2001 From: Ian Spektor Date: Tue, 31 Oct 2023 18:27:02 +0100 Subject: [PATCH 5/9] add missing tests for filter --- temporian/core/operators/test/test_filter.py | 27 ++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/temporian/core/operators/test/test_filter.py b/temporian/core/operators/test/test_filter.py index 8e154dde1..ea0a07e57 100644 --- a/temporian/core/operators/test/test_filter.py +++ b/temporian/core/operators/test/test_filter.py @@ -39,6 +39,33 @@ def test_basic(self): assertOperatorResult(self, result, expected, check_sampling=False) + def test_no_condition(self): + evset = event_set([0, 1, 2], features={"cond": [True, False, True]}) + + result = evset.filter() + + expected = event_set([0, 2], features={"cond": [True, True]}) + + assertOperatorResult(self, result, expected, check_sampling=False) + + def test_condition_many_features(self): + evset = event_set([]) + condition = event_set( + [], features={"a": [], "b": []}, same_sampling_as=evset + ) + with self.assertRaisesRegex( + ValueError, "Condition must be a single feature. Got" + ): + evset.filter(condition) + + def test_condition_not_boolean(self): + evset = event_set([]) + condition = event_set([0], features={"a": [3]}, same_sampling_as=evset) + with self.assertRaisesRegex( + ValueError, "Condition must be a boolean feature. Got" + ): + evset.filter(condition) + if __name__ == "__main__": absltest.main() From 0a1c35157bcc2c140bb9d787d36d86b110865eef Mon Sep 17 00:00:00 2001 From: Ian Spektor Date: Tue, 31 Oct 2023 18:33:52 +0100 Subject: [PATCH 6/9] add missing tests for glue --- temporian/core/operators/glue.py | 4 ++-- temporian/core/operators/test/test_glue.py | 14 +++++++++++++- temporian/implementation/numpy/operators/glue.py | 6 +----- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/temporian/core/operators/glue.py b/temporian/core/operators/glue.py index fb4a816a9..08d7825d6 100644 --- a/temporian/core/operators/glue.py +++ b/temporian/core/operators/glue.py @@ -41,11 +41,11 @@ def __init__( # serialization. if len(inputs) < 2: - raise ValueError("At least two arguments should be provided") + raise ValueError("At least two arguments should be provided.") if len(inputs) >= MAX_NUM_ARGUMENTS: raise ValueError( - f"Too many (>{MAX_NUM_ARGUMENTS}) arguments provided" + f"Too many (>{MAX_NUM_ARGUMENTS}) arguments provided." ) # inputs diff --git a/temporian/core/operators/test/test_glue.py b/temporian/core/operators/test/test_glue.py index 87b6aeeb7..c4f0f2de9 100644 --- a/temporian/core/operators/test/test_glue.py +++ b/temporian/core/operators/test/test_glue.py @@ -14,7 +14,7 @@ from absl.testing import absltest from absl.testing.parameterized import TestCase -from temporian.core.operators.glue import glue +from temporian.core.operators.glue import MAX_NUM_ARGUMENTS, glue from temporian.implementation.numpy.data.io import event_set from temporian.test.utils import assertOperatorResult, f32 @@ -89,6 +89,18 @@ def test_duplicate_feature(self): ) glue(evset_1, evset_2) + def test_no_evsets(self): + with self.assertRaisesRegex( + ValueError, + "At least two arguments should be provided.", + ): + glue() + + def test_too_many_evsets(self): + evsets = [event_set([], features={"a": []})] * (MAX_NUM_ARGUMENTS + 1) + with self.assertRaisesRegex(ValueError, "Too many"): + glue(*evsets) + def test_order_unchanged(self): """Tests that input evsets' order is kept. diff --git a/temporian/implementation/numpy/operators/glue.py b/temporian/implementation/numpy/operators/glue.py index d42eb4c0b..8cae0d0d1 100644 --- a/temporian/implementation/numpy/operators/glue.py +++ b/temporian/implementation/numpy/operators/glue.py @@ -14,7 +14,7 @@ """Implementation for the Glue operator.""" -from typing import Dict, List +from typing import Dict from temporian.core.operators.glue import GlueOperator from temporian.implementation.numpy import implementation_lib @@ -38,10 +38,6 @@ def __call__( # Dictionary order is guaranteed evsets = list(inputs.values()) - if len(evsets) < 2: - raise ValueError( - f"Glue operator cannot be called on a {len(evsets)} EventSets." - ) dst_evset = EventSet(data={}, schema=output_schema) for index_key, index_data in evsets[0].data.items(): From 17d1c2e5f4ac89f12c807205fa4ef3299538022b Mon Sep 17 00:00:00 2001 From: Ian Spektor Date: Tue, 31 Oct 2023 18:36:15 +0100 Subject: [PATCH 7/9] fix test for until_next --- temporian/core/operators/test/test_until_next.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/temporian/core/operators/test/test_until_next.py b/temporian/core/operators/test/test_until_next.py index a4ccf9ce7..01842f615 100644 --- a/temporian/core/operators/test/test_until_next.py +++ b/temporian/core/operators/test/test_until_next.py @@ -63,9 +63,9 @@ def test_timeout_non_finite(self): sampling = event_set([]) with self.assertRaisesRegex( - ValueError, "A duration should be a strictly" + ValueError, "Timeout should be finite. Instead, got " ): - evset.until_next(sampling=sampling, timeout=math.nan) + evset.until_next(sampling=sampling, timeout=math.inf) if __name__ == "__main__": From ed6680f55311db0ffcc09c306ecb5fc691ef6b5e Mon Sep 17 00:00:00 2001 From: Ian Spektor Date: Tue, 31 Oct 2023 18:51:05 +0100 Subject: [PATCH 8/9] add missing tests for add_index --- temporian/core/operators/add_index.py | 22 +++----- .../core/operators/test/test_add_index.py | 54 +++++++++++++------ 2 files changed, 44 insertions(+), 32 deletions(-) diff --git a/temporian/core/operators/add_index.py b/temporian/core/operators/add_index.py index 879f92771..fb2c4d45e 100644 --- a/temporian/core/operators/add_index.py +++ b/temporian/core/operators/add_index.py @@ -37,10 +37,10 @@ def __init__( ) -> None: super().__init__() - self._output_feature_schemas = self._get_output_feature_schemas( + output_feature_schemas = self._get_output_feature_schemas( input, indexes ) - self._output_indexes = self._get_output_indexes(input, indexes) + output_indexes = self._get_output_indexes(input, indexes) self._indexes = indexes self.add_input("input", input) @@ -50,8 +50,8 @@ def __init__( self.add_output( "output", create_node_new_features_new_sampling( - features=self._output_feature_schemas, - indexes=self._output_indexes, + features=output_feature_schemas, + indexes=output_indexes, is_unix_timestamp=input.schema.is_unix_timestamp, creator=self, ), @@ -75,12 +75,12 @@ def _get_output_indexes( new_indexes: List[IndexSchema] = [] for index in indexes: - if index not in feature_dict: - raise ValueError(f"{index} is not a feature in input.") - if index in index_dict: raise ValueError(f"{index} is already an index in input.") + if index not in feature_dict: + raise ValueError(f"{index} is not a feature in input.") + new_indexes.append( IndexSchema(name=index, dtype=feature_dict[index]) ) @@ -101,14 +101,6 @@ def build_op_definition(cls) -> pb.OperatorDef: outputs=[pb.OperatorDef.Output(key="output")], ) - @property - def output_feature_schemas(self) -> List[FeatureSchema]: - return self._output_feature_schemas - - @property - def output_indexes(self) -> List[IndexSchema]: - return self._output_indexes - @property def indexes(self) -> List[str]: return self._indexes diff --git a/temporian/core/operators/test/test_add_index.py b/temporian/core/operators/test/test_add_index.py index 30f092417..3c946b50d 100644 --- a/temporian/core/operators/test/test_add_index.py +++ b/temporian/core/operators/test/test_add_index.py @@ -23,65 +23,65 @@ class AddIndexTest(TestCase): def setUp(self) -> None: self.timestamps = [0.4, 0.5, 0.6, 0.1, 0.2, 0.3, 0.4, 0.3] self.features = { - "state_id": ["A", "A", "A", "B", "B", "B", "B", "B"], - "store_id": [0, 0, 0, 0, 0, 1, 1, 1], - "item_id": [1, 1, 1, 2, 2, 2, 2, 3], - "sales": [10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0], + "a": ["A", "A", "A", "B", "B", "B", "B", "B"], + "b": [0, 0, 0, 0, 0, 1, 1, 1], + "c": [1, 1, 1, 2, 2, 2, 2, 3], + "d": [10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0], } self.evset = event_set( timestamps=self.timestamps, features=self.features, - indexes=["state_id"], + indexes=["a"], ) def test_add_index_single(self): - result = self.evset.add_index("store_id") + result = self.evset.add_index("b") expected = event_set( timestamps=self.timestamps, features=self.features, - indexes=["state_id", "store_id"], + indexes=["a", "b"], ) assertOperatorResult(self, result, expected, check_sampling=False) def test_add_index_multiple(self): - result = self.evset.add_index(["store_id", "item_id"]) + result = self.evset.add_index(["b", "c"]) expected = event_set( timestamps=self.timestamps, features=self.features, - indexes=["state_id", "store_id", "item_id"], + indexes=["a", "b", "c"], ) assertOperatorResult(self, result, expected, check_sampling=False) def test_set_index_single(self): - result = self.evset.set_index("store_id") + result = self.evset.set_index("b") expected = event_set( timestamps=self.timestamps, # Old index is now the last feature features={ - **{k: v for k, v in self.features.items() if k != "state_id"}, - "state_id": self.features["state_id"], + **{k: v for k, v in self.features.items() if k != "a"}, + "a": self.features["a"], }, - indexes=["store_id"], + indexes=["b"], ) assertOperatorResult(self, result, expected, check_sampling=False) def test_set_index_multiple(self): - result = self.evset.set_index(["store_id", "item_id"]) + result = self.evset.set_index(["b", "c"]) expected = event_set( timestamps=self.timestamps, # Old index is now the last feature features={ - **{k: v for k, v in self.features.items() if k != "state_id"}, - "state_id": self.features["state_id"], + **{k: v for k, v in self.features.items() if k != "a"}, + "a": self.features["a"], }, - indexes=["store_id", "item_id"], + indexes=["b", "c"], ) assertOperatorResult(self, result, expected, check_sampling=False) @@ -178,6 +178,26 @@ def test_mix(self): self.assertEqual(result.data[(2, b"X")].timestamps.tolist(), [4, 5]) self.assertEqual(result.data[(3, b"Z")].timestamps.tolist(), [6]) + def test_target_doesnt_exist(self): + with self.assertRaisesRegex(ValueError, "is not a feature in input"): + self.evset.add_index("e") + + def test_target_is_index(self): + with self.assertRaisesRegex(ValueError, "is already an index in input"): + self.evset.add_index("a") + + def test_empty_list(self): + with self.assertRaisesRegex( + ValueError, "Cannot specify empty list as `indexes` argument" + ): + self.evset.add_index([]) + + def test_empty_list_set(self): + with self.assertRaisesRegex( + ValueError, "Cannot specify empty list as `indexes` argument" + ): + self.evset.set_index([]) + if __name__ == "__main__": absltest.main() From 91ac3af56876d1badb9682800e6c461780e44e2f Mon Sep 17 00:00:00 2001 From: Ian Spektor Date: Tue, 31 Oct 2023 19:03:55 +0100 Subject: [PATCH 9/9] add missing tests for drop_index --- temporian/core/operators/drop_index.py | 20 +----- .../core/operators/test/test_drop_index.py | 62 +++++++++++-------- 2 files changed, 39 insertions(+), 43 deletions(-) diff --git a/temporian/core/operators/drop_index.py b/temporian/core/operators/drop_index.py index 14721e7db..97d162618 100644 --- a/temporian/core/operators/drop_index.py +++ b/temporian/core/operators/drop_index.py @@ -50,7 +50,7 @@ def __init__( input, indexes, keep ) - self._output_indexes = [ + output_indexes = [ index_level for index_level in input.schema.indexes if index_level.name not in indexes @@ -60,7 +60,7 @@ def __init__( "output", create_node_new_features_new_sampling( features=self._output_feature_schemas, - indexes=self._output_indexes, + indexes=output_indexes, is_unix_timestamp=input.schema.is_unix_timestamp, creator=self, ), @@ -74,19 +74,9 @@ def _get_output_feature_schemas( return input.schema.features index_dict = input.schema.index_name_to_dtype() - feature_dict = input.schema.feature_name_to_dtype() new_features: List[FeatureSchema] = [] for index in indexes: - if index not in index_dict: - raise ValueError(f"{index} is not an index in input.") - - if index in feature_dict: - raise ValueError( - f"{index} already exists in input's features. If you" - " want to drop the index, specify `keep=False`." - ) - new_features.append( FeatureSchema(name=index, dtype=index_dict[index]) ) @@ -98,10 +88,6 @@ def _get_output_feature_schemas( def output_feature_schemas(self) -> List[FeatureSchema]: return self._output_feature_schemas - @property - def output_indexes(self) -> List[IndexSchema]: - return self._output_indexes - @property def indexes(self) -> List[str]: return self._indexes @@ -150,7 +136,7 @@ def _normalize_indexes( index_dict = input.schema.index_name_to_dtype() for index in indexes: if index not in index_dict: - raise KeyError( + raise ValueError( f"{index} is not an index in {input.schema.indexes}." ) diff --git a/temporian/core/operators/test/test_drop_index.py b/temporian/core/operators/test/test_drop_index.py index f89d6d50f..c0436d202 100644 --- a/temporian/core/operators/test/test_drop_index.py +++ b/temporian/core/operators/test/test_drop_index.py @@ -23,15 +23,15 @@ class DropIndexTest(TestCase): def setUp(self) -> None: self.timestamps = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] self.features = { - "state_id": ["A", "A", "A", "B", "B", "B", "B", "B"], - "store_id": [0, 0, 0, 0, 0, 1, 1, 1], - "item_id": [1, 1, 1, 2, 2, 2, 2, 3], - "sales": [10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0], + "a": ["A", "A", "A", "B", "B", "B", "B", "B"], + "b": [0, 0, 0, 0, 0, 1, 1, 1], + "c": [1, 1, 1, 2, 2, 2, 2, 3], + "d": [10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0], } self.evset = event_set( timestamps=self.timestamps, features=self.features, - indexes=["store_id", "item_id"], + indexes=["b", "c"], ) def test_drop_all(self) -> None: @@ -39,10 +39,10 @@ def test_drop_all(self) -> None: timestamps=self.timestamps, # Old indexes are now the last features features={ - "state_id": self.features["state_id"], - "sales": self.features["sales"], - "store_id": self.features["store_id"], - "item_id": self.features["item_id"], + "a": self.features["a"], + "d": self.features["d"], + "b": self.features["b"], + "c": self.features["c"], }, ) @@ -55,15 +55,15 @@ def test_drop_single_first(self) -> None: timestamps=self.timestamps, # Old indexes are now the last features features={ - "item_id": self.features["item_id"], - "state_id": self.features["state_id"], - "sales": self.features["sales"], - "store_id": self.features["store_id"], + "c": self.features["c"], + "a": self.features["a"], + "d": self.features["d"], + "b": self.features["b"], }, - indexes=["item_id"], + indexes=["c"], ) - result = self.evset.drop_index("store_id") + result = self.evset.drop_index("b") assertOperatorResult(self, result, expected, check_sampling=False) @@ -72,15 +72,15 @@ def test_drop_single_second(self) -> None: timestamps=self.timestamps, # Old indexes are now the last features features={ - "store_id": self.features["store_id"], - "state_id": self.features["state_id"], - "sales": self.features["sales"], - "item_id": self.features["item_id"], + "b": self.features["b"], + "a": self.features["a"], + "d": self.features["d"], + "c": self.features["c"], }, - indexes=["store_id"], + indexes=["b"], ) - result = self.evset.drop_index("item_id") + result = self.evset.drop_index("c") assertOperatorResult(self, result, expected, check_sampling=False) @@ -89,14 +89,14 @@ def test_drop_single_keep_false(self) -> None: timestamps=self.timestamps, # Old indexes are now the last features features={ - "store_id": self.features["store_id"], - "state_id": self.features["state_id"], - "sales": self.features["sales"], + "b": self.features["b"], + "a": self.features["a"], + "d": self.features["d"], }, - indexes=["store_id"], + indexes=["b"], ) - result = self.evset.drop_index("item_id", keep=False) + result = self.evset.drop_index("c", keep=False) assertOperatorResult(self, result, expected, check_sampling=False) @@ -122,6 +122,16 @@ def test_str_index(self) -> None: assertOperatorResult(self, result, expected, check_sampling=False) + def test_wrong_index(self): + with self.assertRaisesRegex(ValueError, "x is not an index in"): + self.evset.drop_index("x") + + def test_empty_list(self): + with self.assertRaisesRegex( + ValueError, "Cannot specify empty list as `indexes` argument" + ): + self.evset.drop_index([]) + if __name__ == "__main__": absltest.main()