diff --git a/.benchmarks/Darwin-CPython-3.11-64bit/0001_e8b0acf18413d142a4778eb63f40c518f32b94bc_20241120_094745_uncommited-changes.json b/.benchmarks/Darwin-CPython-3.11-64bit/0001_e8b0acf18413d142a4778eb63f40c518f32b94bc_20241120_094745_uncommited-changes.json new file mode 100644 index 000000000..d96f71f7c --- /dev/null +++ b/.benchmarks/Darwin-CPython-3.11-64bit/0001_e8b0acf18413d142a4778eb63f40c518f32b94bc_20241120_094745_uncommited-changes.json @@ -0,0 +1,288 @@ +{ + "machine_info": { + "node": "diotimac2.local", + "processor": "arm", + "machine": "arm64", + "python_compiler": "Clang 18.1.8 ", + "python_implementation": "CPython", + "python_implementation_version": "3.11.10", + "python_version": "3.11.10", + "python_build": [ + "main", + "Oct 1 2024 00:26:49" + ], + "release": "23.4.0", + "system": "Darwin", + "cpu": { + "python_version": "3.11.10.final.0 (64 bit)", + "cpuinfo_version": [ + 9, + 0, + 0 + ], + "cpuinfo_version_string": "9.0.0", + "arch": "ARM_8", + "bits": 64, + "count": 8, + "arch_string_raw": "arm64", + "brand_raw": "Apple M1" + } + }, + "commit_info": { + "id": "e8b0acf18413d142a4778eb63f40c518f32b94bc", + "time": "2024-11-20T10:36:21+01:00", + "author_time": "2024-11-20T10:27:23+01:00", + "dirty": true, + "project": "openfisca-core", + "branch": "perf/add-benchmark-to-perf-test" + }, + "benchmarks": [ + { + "group": "Enum.__eq__", + "name": "test_benchmark_enum_eq", + "fullname": "openfisca_core/indexed_enums/tests/test_enum.py::test_benchmark_enum_eq", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 6.116424999891024e-06, + "max": 6.239233299857005e-06, + "mean": 6.1712051209888155e-06, + "stddev": 2.3556546098919103e-08, + "rounds": 100, + "median": 6.170224949983095e-06, + "iqr": 2.8364549962134198e-08, + "q1": 6.158277100075793e-06, + "q3": 6.186641650037927e-06, + "iqr_outliers": 2, + "stddev_outliers": 30, + "outliers": "30;2", + "ld15iqr": 6.116424999891024e-06, + "hd15iqr": 6.2380624998695565e-06, + "ops": 162042.90416452225, + "total": 0.0006171205120988818, + "iterations": 10000 + } + }, + { + "group": "Enum.encode (int)", + "name": "test_benchmark_enum_encode_int", + "fullname": "openfisca_core/indexed_enums/tests/test_enum.py::test_benchmark_enum_encode_int", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 6.000000212225132e-07, + "max": 4.333400102041196e-06, + "mean": 6.603360034205253e-07, + "stddev": 3.7118752804099704e-07, + "rounds": 100, + "median": 6.209000275703147e-07, + "iqr": 1.2449982023099353e-08, + "q1": 6.166999810375274e-07, + "q3": 6.291499630606267e-07, + "iqr_outliers": 3, + "stddev_outliers": 1, + "outliers": "1;3", + "ld15iqr": 6.000000212225132e-07, + "hd15iqr": 6.584001312148757e-07, + "ops": 1514380.5499321925, + "total": 6.603360034205245e-05, + "iterations": 10 + } + }, + { + "group": "Enum.encode (str)", + "name": "test_benchmark_enum_encode_str", + "fullname": "openfisca_core/indexed_enums/tests/test_enum.py::test_benchmark_enum_encode_str", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 0.0011514084000737058, + "max": 0.005457095800011302, + "mean": 0.0013338218729913934, + "stddev": 0.0005570327348628046, + "rounds": 100, + "median": 0.0011803895499724604, + "iqr": 3.827289992841543e-05, + "q1": 0.0011704437500156927, + "q3": 0.0012087166499441082, + "iqr_outliers": 16, + "stddev_outliers": 6, + "outliers": "6;16", + "ld15iqr": 0.0011514084000737058, + "hd15iqr": 0.0013195624998843414, + "ops": 749.7252970947887, + "total": 0.13338218729913934, + "iterations": 10 + } + }, + { + "group": "Enum.encode (Enum)", + "name": "test_benchmark_enum_encode_enum", + "fullname": "openfisca_core/indexed_enums/tests/test_enum.py::test_benchmark_enum_encode_enum", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 0.0019466166999336566, + "max": 0.0024945540999397053, + "mean": 0.0020038005400019755, + "stddev": 8.117530790455684e-05, + "rounds": 100, + "median": 0.001972604149977997, + "iqr": 5.918540000493565e-05, + "q1": 0.0019553521000489123, + "q3": 0.002014537500053848, + "iqr_outliers": 11, + "stddev_outliers": 15, + "outliers": "15;11", + "ld15iqr": 0.0019466166999336566, + "hd15iqr": 0.002103391700075008, + "ops": 499.05166708808946, + "total": 0.2003800540001976, + "iterations": 10 + } + }, + { + "group": "EnumArray.__eq__", + "name": "test_benchmark_enum_array_eq", + "fullname": "openfisca_core/indexed_enums/tests/test_enum_array.py::test_benchmark_enum_array_eq", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 8.86542000444024e-06, + "max": 9.384160002809948e-06, + "mean": 8.935446600662544e-06, + "stddev": 1.0087850596955905e-07, + "rounds": 100, + "median": 8.896874996935366e-06, + "iqr": 4.874999831372528e-08, + "q1": 8.881044996087438e-06, + "q3": 8.929794994401163e-06, + "iqr_outliers": 15, + "stddev_outliers": 10, + "outliers": "10;15", + "ld15iqr": 8.86542000444024e-06, + "hd15iqr": 9.002920014609117e-06, + "ops": 111913.82419831734, + "total": 0.0008935446600662543, + "iterations": 100 + } + }, + { + "group": "EnumArray.decode", + "name": "test_benchmark_enum_array_decode", + "fullname": "openfisca_core/indexed_enums/tests/test_enum_array.py::test_benchmark_enum_array_decode", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 0.0007523354099976131, + "max": 0.0015013620799982164, + "mean": 0.0007657737745999838, + "stddev": 7.457900895775381e-05, + "rounds": 100, + "median": 0.0007576514599986695, + "iqr": 4.303959995013377e-06, + "q1": 0.0007556031250078377, + "q3": 0.0007599070850028511, + "iqr_outliers": 3, + "stddev_outliers": 1, + "outliers": "1;3", + "ld15iqr": 0.0007523354099976131, + "hd15iqr": 0.000770621250012482, + "ops": 1305.8686953890117, + "total": 0.07657737745999839, + "iterations": 100 + } + }, + { + "group": "EnumArray.decode_to_str", + "name": "test_benchmark_enum_array_decode_to_str", + "fullname": "openfisca_core/indexed_enums/tests/test_enum_array.py::test_benchmark_enum_array_decode_to_str", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 0.0008616308300042875, + "max": 0.0009163904200067918, + "mean": 0.0008716657909002606, + "stddev": 9.943622783295886e-06, + "rounds": 100, + "median": 0.0008683410449975782, + "iqr": 6.7525050053518325e-06, + "q1": 0.00086608999499731, + "q3": 0.0008728425000026619, + "iqr_outliers": 11, + "stddev_outliers": 12, + "outliers": "12;11", + "ld15iqr": 0.0008616308300042875, + "hd15iqr": 0.0008845904199915822, + "ops": 1147.2286860852892, + "total": 0.08716657909002602, + "iterations": 100 + } + } + ], + "datetime": "2024-11-20T09:48:12.053939+00:00", + "version": "5.1.0" +} \ No newline at end of file diff --git a/openfisca_core/indexed_enums/_enum_type.py b/openfisca_core/indexed_enums/_enum_type.py index 8083a6d49..9b95364a2 100644 --- a/openfisca_core/indexed_enums/_enum_type.py +++ b/openfisca_core/indexed_enums/_enum_type.py @@ -66,5 +66,14 @@ def __new__( def __dir__(cls) -> list[str]: return sorted({"indices", "names", "enums", *super().__dir__()}) + def __hash__(cls) -> int: + return object.__hash__(cls.__name__) + + def __eq__(cls, other: object) -> bool: + return hash(cls) == hash(other) + + def __ne__(cls, other: object) -> bool: + return hash(cls) != hash(other) + __all__ = ["EnumType"] diff --git a/openfisca_core/indexed_enums/_utils.py b/openfisca_core/indexed_enums/_utils.py index aa676b92f..67c9e741b 100644 --- a/openfisca_core/indexed_enums/_utils.py +++ b/openfisca_core/indexed_enums/_utils.py @@ -92,8 +92,7 @@ def _int_to_index( ... ) >>> _int_to_index(Road, 1) - Traceback (most recent call last): - TypeError: 'int' object is not iterable + array([1], dtype=uint8) >>> _int_to_index(Road, [1]) array([1], dtype=uint8) @@ -105,8 +104,7 @@ def _int_to_index( array([1], dtype=uint8) >>> _int_to_index(Road, numpy.array(1)) - Traceback (most recent call last): - TypeError: iteration over a 0-d array + array([1], dtype=uint8) >>> _int_to_index(Road, numpy.array([1])) array([1], dtype=uint8) @@ -118,9 +116,9 @@ def _int_to_index( array([1, 1], dtype=uint8) """ - return numpy.array( - [index for index in value if index < len(enum_class.__members__)], t.EnumDType - ) + indices = enum_class.indices + values = numpy.array(value, copy=False) + return values[values < indices.size].astype(t.EnumDType) def _str_to_index( @@ -155,14 +153,13 @@ def _str_to_index( ... ) >>> _str_to_index(Road, "AVENUE") - array([], dtype=uint8) + array([1], dtype=uint8) >>> _str_to_index(Road, ["AVENUE"]) array([1], dtype=uint8) >>> _str_to_index(Road, numpy.array("AVENUE")) - Traceback (most recent call last): - TypeError: iteration over a 0-d array + array([1], dtype=uint8) >>> _str_to_index(Road, numpy.array(["AVENUE"])) array([1], dtype=uint8) @@ -174,14 +171,12 @@ def _str_to_index( array([1, 1], dtype=uint8) """ - return numpy.array( - [ - enum_class.__members__[name].index - for name in value - if name in enum_class._member_names_ - ], - t.EnumDType, - ) + values = numpy.array(value, copy=False) + names = enum_class.names + mask = numpy.isin(values, names) + sorter = numpy.argsort(names) + result = sorter[numpy.searchsorted(names, values[mask], sorter=sorter)] + return result.astype(t.EnumDType) __all__ = ["_enum_to_index", "_int_to_index", "_str_to_index"] diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index a733fd5da..43a893e85 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -117,13 +117,6 @@ def __init__(self, *__args: object, **__kwargs: object) -> None: """ self.index = len(self._member_names_) - # Bypass the slow Enum.__eq__ - __eq__ = object.__eq__ - - # In Python 3, __hash__ must be defined if __eq__ is defined to stay - # hashable. - __hash__ = object.__hash__ - def __repr__(self) -> str: return f"{self.__class__.__name__}.{self.name}" @@ -199,7 +192,7 @@ def _encode_array(cls, value: t.VarArray) -> t.EnumArray: indices = _int_to_index(cls, value) elif _is_str_array(value): # type: ignore[unreachable] indices = _str_to_index(cls, value) - elif _is_enum_array(value) and cls.__name__ is value[0].__class__.__name__: + elif _is_enum_array(value) and cls == value[0].__class__: indices = _enum_to_index(value) else: raise EnumEncodingError(cls, value) diff --git a/openfisca_core/indexed_enums/enum_array.py b/openfisca_core/indexed_enums/enum_array.py index 98f9b4c6a..65bc209a7 100644 --- a/openfisca_core/indexed_enums/enum_array.py +++ b/openfisca_core/indexed_enums/enum_array.py @@ -153,10 +153,7 @@ def __eq__(self, other: object) -> t.BoolArray: # type: ignore[override] return NotImplemented if other is None: return NotImplemented - if ( - isinstance(other, type(t.Enum)) - and other.__name__ is self.possible_values.__name__ - ): + if isinstance(other, type(t.Enum)) and other == self.possible_values: result = ( self.view(numpy.ndarray) == self.possible_values.indices[ @@ -164,10 +161,7 @@ def __eq__(self, other: object) -> t.BoolArray: # type: ignore[override] ] ) return result - if ( - isinstance(other, t.Enum) - and other.__class__.__name__ is self.possible_values.__name__ - ): + if isinstance(other, t.Enum) and other.__class__ == self.possible_values: result = self.view(numpy.ndarray) == other.index return result # For NumPy >=1.26.x. diff --git a/openfisca_core/indexed_enums/tests/test_enum.py b/openfisca_core/indexed_enums/tests/test_enum.py index 2e49c1742..c6708a05c 100644 --- a/openfisca_core/indexed_enums/tests/test_enum.py +++ b/openfisca_core/indexed_enums/tests/test_enum.py @@ -133,3 +133,52 @@ def test_enum_encode_with_any_sequence(): sequence = memoryview(b"DOG") with pytest.raises(IndexError): Animal.encode(sequence) + + +# Benchmarking + + +@pytest.mark.benchmark(group="Enum.__eq__") +def test_benchmark_enum_eq(benchmark): + """Benchmark the `__eq__` method.""" + array = numpy.random.choice([*list(Animal), *list(Colour)], size=50000) + + def test(): + animal_1, animal_2 = numpy.random.choice(array, size=2) + animal_1 == animal_2 + animal_1 != animal_2 + + benchmark.pedantic(test, iterations=10000, rounds=100) + + +@pytest.mark.benchmark(group="Enum.encode (int)") +def test_benchmark_enum_encode_int(benchmark): + """Benchmark the `Enum.encode` method.""" + array = numpy.random.choice([0, 1, 2], size=50000) + + def test(): + Colour.encode(array) + + benchmark.pedantic(test, iterations=10, rounds=100) + + +@pytest.mark.benchmark(group="Enum.encode (str)") +def test_benchmark_enum_encode_str(benchmark): + """Benchmark the `Enum.encode` method.""" + array = numpy.random.choice(["INCARNADINE", "TURQUOISE", "AMARANTH"], size=50000) + + def test(): + Colour.encode(array) + + benchmark.pedantic(test, iterations=10, rounds=100) + + +@pytest.mark.benchmark(group="Enum.encode (Enum)") +def test_benchmark_enum_encode_enum(benchmark): + """Benchmark the `Enum.encode` method.""" + array = numpy.random.choice(list(Colour), size=50000) + + def test(): + Colour.encode(array) + + benchmark.pedantic(test, iterations=10, rounds=100) diff --git a/openfisca_core/indexed_enums/tests/test_enum_array.py b/openfisca_core/indexed_enums/tests/test_enum_array.py index 1ab247468..854c65ddf 100644 --- a/openfisca_core/indexed_enums/tests/test_enum_array.py +++ b/openfisca_core/indexed_enums/tests/test_enum_array.py @@ -28,3 +28,45 @@ def test_enum_array_any_other_operation(enum_array): """Only equality and non-equality operations are permitted.""" with pytest.raises(TypeError, match="Forbidden operation."): enum_array * 1 + + +# Benchmarking + + +@pytest.mark.benchmark(group="EnumArray.__eq__") +def test_benchmark_enum_array_eq(benchmark): + """Benchmark the `EnumArray.__eq__` method.""" + array_1 = numpy.random.choice(list(Fruit), size=50000) + array_2 = numpy.random.choice(list(Fruit), size=50000) + enum_array_1 = Fruit.encode(array_1) + enum_array_2 = Fruit.encode(array_2) + + def test(): + enum_array_1 == enum_array_2 + enum_array_1 != enum_array_2 + + benchmark.pedantic(test, iterations=100, rounds=100) + + +@pytest.mark.benchmark(group="EnumArray.decode") +def test_benchmark_enum_array_decode(benchmark): + """Benchmark the `EnumArray.decode` method.""" + array = numpy.random.choice(list(Fruit), size=50000) + enum_array = Fruit.encode(array) + + def test(): + enum_array.decode() + + benchmark.pedantic(test, iterations=100, rounds=100) + + +@pytest.mark.benchmark(group="EnumArray.decode_to_str") +def test_benchmark_enum_array_decode_to_str(benchmark): + """Benchmark the `EnumArray.decode_to_str` method.""" + array = numpy.random.choice(list(Fruit), size=50000) + enum_array = Fruit.encode(array) + + def test(): + enum_array.decode_to_str() + + benchmark.pedantic(test, iterations=100, rounds=100) diff --git a/setup.cfg b/setup.cfg index e6b37ba7e..32a3f962a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -72,7 +72,11 @@ py_version = 39 sections = FUTURE, TYPING, TYPES, STDLIB, THIRDPARTY, OPENFISCA, FIRSTPARTY, LOCALFOLDER [tool:pytest] -addopts = --disable-pytest-warnings --doctest-modules --showlocals +addopts = + --benchmark-skip + --disable-pytest-warnings + --doctest-modules + --showlocals doctest_optionflags = ELLIPSIS IGNORE_EXCEPTION_DETAIL NUMBER NORMALIZE_WHITESPACE python_files = **/*.py testpaths = tests diff --git a/setup.py b/setup.py index b466b2407..491c63a2a 100644 --- a/setup.py +++ b/setup.py @@ -63,6 +63,7 @@ "pylint >=3.3.1, <4.0", "pylint-per-file-ignores >=1.3.2, <2.0", "pyright >=1.1.382, <2.0", + "pytest-benchmark >=5.1.0, <6.0", "ruff >=0.6.9, <1.0", "ruff-lsp >=0.0.57, <1.0", *api_requirements, diff --git a/tests/core/test_big_dataset.py b/tests/core/test_big_dataset.py index 677b483a6..020894438 100644 --- a/tests/core/test_big_dataset.py +++ b/tests/core/test_big_dataset.py @@ -1,6 +1,5 @@ from collections.abc import Iterable -from time import time from unittest import TestCase from openfisca_core.simulations import SimulationBuilder @@ -55,14 +54,9 @@ def run_simulation(tax_benefit_system) -> None: tc.assertAlmostEqual(total_taxes, sum(persons_salaries) * 0.17833333, delta=1) -def test_speed(tax_benefit_system): - elapsed = 0 - for _ in range(10): - start = time() +def test_speed(tax_benefit_system, benchmark) -> None: + def run() -> None: run_simulation(tax_benefit_system) - end = time() - elapsed += end - start - elapsed_mean = elapsed / 10 - # print(f"Mean elapsed time: {elapsed_mean:.2f} seconds") - # Expected time is less than 0.3 seconds on a AMD Threadripper 1950X - tc.assertLess(elapsed_mean, 0.3) + + result = benchmark.pedantic(run, iterations=1, rounds=10) + assert not result