From 172e71256b76d550470d26d5a9cf1a89e7eb8e21 Mon Sep 17 00:00:00 2001 From: Plamen Valentinov Kolev <41479552+pvk-developer@users.noreply.github.com> Date: Thu, 9 May 2024 00:04:41 +0200 Subject: [PATCH] Issue 1995 update code to remove futurewarning related to enforce uniqueness (#1997) --- sdv/data_processing/data_processor.py | 26 +++++---- .../data_processing/test_data_processor.py | 56 +++++++------------ 2 files changed, 35 insertions(+), 47 deletions(-) diff --git a/sdv/data_processing/data_processor.py b/sdv/data_processing/data_processor.py index 5c1bff886..638a922ce 100644 --- a/sdv/data_processing/data_processor.py +++ b/sdv/data_processing/data_processor.py @@ -412,7 +412,7 @@ def _update_transformers_by_sdtypes(self, sdtype, transformer): self._transformers_by_sdtype[sdtype] = transformer @staticmethod - def create_anonymized_transformer(sdtype, column_metadata, enforce_uniqueness, + def create_anonymized_transformer(sdtype, column_metadata, cardinality_rule, locales=['en_US']): """Create an instance of an ``AnonymizedFaker``. @@ -424,9 +424,11 @@ def create_anonymized_transformer(sdtype, column_metadata, enforce_uniqueness, Sematic data type or a ``Faker`` function name. column_metadata (dict): A dictionary representing the rest of the metadata for the given ``sdtype``. - enforce_uniqueness (bool): - If ``True`` overwrite ``enforce_uniqueness`` with ``True`` to ensure unique - generation for primary keys. + cardinality_rule (str): + If ``'unique'`` enforce that every created value is unique. + If ``'match'`` match the cardinality of the data seen during fit. + If ``None`` do not consider cardinality. + Defaults to ``None``. locales (str or list): Locale or list of locales to use for the AnonymizedFaker transfomer. Defaults to ['en_US']. @@ -434,14 +436,14 @@ def create_anonymized_transformer(sdtype, column_metadata, enforce_uniqueness, Returns: Instance of ``rdt.transformers.pii.AnonymizedFaker``. """ - kwargs = {'locales': locales} + kwargs = { + 'locales': locales, + 'cardinality_rule': cardinality_rule + } for key, value in column_metadata.items(): if key not in ['pii', 'sdtype']: kwargs[key] = value - if enforce_uniqueness: - kwargs['enforce_uniqueness'] = True - try: transformer = get_anonymized_transformer(sdtype, kwargs) except AttributeError as error: @@ -494,7 +496,7 @@ def _get_transformer_instance(self, sdtype, column_metadata): is_baseprovider = transformer.provider_name == 'BaseProvider' if is_lexify and is_baseprovider: # Default settings return self.create_anonymized_transformer( - sdtype, column_metadata, False, self._locales + sdtype, column_metadata, None, self._locales ) kwargs = { @@ -598,11 +600,11 @@ def _create_config(self, data, columns_created_by_constraints): elif pii: sdtypes[column] = 'pii' - enforce_uniqueness = bool(column in self._keys) + cardinality_rule = 'unique' if bool(column in self._keys) else None transformers[column] = self.create_anonymized_transformer( sdtype, column_metadata, - enforce_uniqueness, + cardinality_rule, self._locales ) @@ -614,7 +616,7 @@ def _create_config(self, data, columns_created_by_constraints): transformers[column] = self.create_anonymized_transformer( sdtype=sdtype, column_metadata=column_metadata, - enforce_uniqueness=True, + cardinality_rule='unique', locales=self._locales ) diff --git a/tests/unit/data_processing/test_data_processor.py b/tests/unit/data_processing/test_data_processor.py index 50cd2751d..6fec93773 100644 --- a/tests/unit/data_processing/test_data_processor.py +++ b/tests/unit/data_processing/test_data_processor.py @@ -987,40 +987,23 @@ def test_create_regex_generator_regex_generator(self, mock_rdt): ) @patch('sdv.data_processing.data_processor.get_anonymized_transformer') - def test_create_anonymized_transformer_enforce_uniqueness(self, - mock_get_anonymized_transformer): - """Test the ``create_regex_generator`` method. - - Test that when given an ``sdtype`` and ``column_metadata`` that does not contain a - ``regex_format`` this calls ``create_anonymized_transformer`` with ``enforce_uniqueness`` - set to ``True``. - - Input: - - String representing an ``sdtype``. - - Dictionary with ``column_metadata`` that contains ``sdtype``. - - Mock: - - Mock the ``create_anonymized_transformer``. + def test_create_anonymized_transformer_cardinality_rule_unique( + self, mock_get_anonymized_transformer): + """Test the ``create_anonymized_transformer`` method. - Output: - - The return value of ``create_anonymized_transformer``. + Test that when calling with ``cardinality_rule`` set to ``'unique'``, this + calls ``get_anonymized_transformer`` with the given parameters. """ # Setup sdtype = 'ssn' - column_metadata = { - 'sdtype': 'ssn', - } + column_metadata = {'sdtype': 'ssn'} # Run - output = DataProcessor.create_anonymized_transformer( - sdtype, - column_metadata, - True - ) + output = DataProcessor.create_anonymized_transformer(sdtype, column_metadata, 'unique') # Assert mock_get_anonymized_transformer.assert_called_once_with( - 'ssn', {'enforce_uniqueness': True, 'locales': ['en_US']} + 'ssn', {'cardinality_rule': 'unique', 'locales': ['en_US']} ) assert output == mock_get_anonymized_transformer.return_value @@ -1033,21 +1016,19 @@ def test_create_anonymized_transformer_locales(self, mock_get_anonymized_transfo """ # Setup sdtype = 'ssn' - column_metadata = { - 'sdtype': 'ssn', - } + column_metadata = {'sdtype': 'ssn'} # Run output = DataProcessor.create_anonymized_transformer( sdtype, column_metadata, - False, + None, locales=['en_US', 'en_CA'] ) # Assert mock_get_anonymized_transformer.assert_called_once_with( - 'ssn', {'locales': ['en_US', 'en_CA']} + 'ssn', {'locales': ['en_US', 'en_CA'], 'cardinality_rule': None} ) assert output == mock_get_anonymized_transformer.return_value @@ -1069,7 +1050,7 @@ def test_create_anonymized_transformer_locales_missing_attribute(self): DataProcessor.create_anonymized_transformer( sdtype, column_metadata, - False, + None, locales=['en_UK'] ) @@ -1099,13 +1080,18 @@ def test_create_anonymized_transformer(self, mock_get_anonymized_transformer): } # Run - output = DataProcessor.create_anonymized_transformer(sdtype, column_metadata, False) + output = DataProcessor.create_anonymized_transformer(sdtype, column_metadata, 'unique') # Assert assert output == mock_get_anonymized_transformer.return_value - mock_get_anonymized_transformer.assert_called_once_with( - 'email', {'function_kwargs': {'domain': 'gmail.com'}, 'locales': ['en_US']} - ) + expected_kwargs = { + 'function_kwargs': { + 'domain': 'gmail.com' + }, + 'locales': ['en_US'], + 'cardinality_rule': 'unique' + } + mock_get_anonymized_transformer.assert_called_once_with('email', expected_kwargs) def test__get_transformer_instance_no_kwargs(self): """Test the ``_get_transformer_instance`` without keyword args.