Skip to content

Commit

Permalink
Issue 1995 update code to remove futurewarning related to enforce uni…
Browse files Browse the repository at this point in the history
…queness (#1997)
  • Loading branch information
pvk-developer authored May 8, 2024
1 parent e1f787e commit 172e712
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 47 deletions.
26 changes: 14 additions & 12 deletions sdv/data_processing/data_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,7 @@ def _update_transformers_by_sdtypes(self, sdtype, transformer):
self._transformers_by_sdtype[sdtype] = transformer

@staticmethod
def create_anonymized_transformer(sdtype, column_metadata, enforce_uniqueness,
def create_anonymized_transformer(sdtype, column_metadata, cardinality_rule,
locales=['en_US']):
"""Create an instance of an ``AnonymizedFaker``.
Expand All @@ -424,24 +424,26 @@ def create_anonymized_transformer(sdtype, column_metadata, enforce_uniqueness,
Sematic data type or a ``Faker`` function name.
column_metadata (dict):
A dictionary representing the rest of the metadata for the given ``sdtype``.
enforce_uniqueness (bool):
If ``True`` overwrite ``enforce_uniqueness`` with ``True`` to ensure unique
generation for primary keys.
cardinality_rule (str):
If ``'unique'`` enforce that every created value is unique.
If ``'match'`` match the cardinality of the data seen during fit.
If ``None`` do not consider cardinality.
Defaults to ``None``.
locales (str or list):
Locale or list of locales to use for the AnonymizedFaker transfomer.
Defaults to ['en_US'].
Returns:
Instance of ``rdt.transformers.pii.AnonymizedFaker``.
"""
kwargs = {'locales': locales}
kwargs = {
'locales': locales,
'cardinality_rule': cardinality_rule
}
for key, value in column_metadata.items():
if key not in ['pii', 'sdtype']:
kwargs[key] = value

if enforce_uniqueness:
kwargs['enforce_uniqueness'] = True

try:
transformer = get_anonymized_transformer(sdtype, kwargs)
except AttributeError as error:
Expand Down Expand Up @@ -494,7 +496,7 @@ def _get_transformer_instance(self, sdtype, column_metadata):
is_baseprovider = transformer.provider_name == 'BaseProvider'
if is_lexify and is_baseprovider: # Default settings
return self.create_anonymized_transformer(
sdtype, column_metadata, False, self._locales
sdtype, column_metadata, None, self._locales
)

kwargs = {
Expand Down Expand Up @@ -598,11 +600,11 @@ def _create_config(self, data, columns_created_by_constraints):

elif pii:
sdtypes[column] = 'pii'
enforce_uniqueness = bool(column in self._keys)
cardinality_rule = 'unique' if bool(column in self._keys) else None
transformers[column] = self.create_anonymized_transformer(
sdtype,
column_metadata,
enforce_uniqueness,
cardinality_rule,
self._locales
)

Expand All @@ -614,7 +616,7 @@ def _create_config(self, data, columns_created_by_constraints):
transformers[column] = self.create_anonymized_transformer(
sdtype=sdtype,
column_metadata=column_metadata,
enforce_uniqueness=True,
cardinality_rule='unique',
locales=self._locales
)

Expand Down
56 changes: 21 additions & 35 deletions tests/unit/data_processing/test_data_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -987,40 +987,23 @@ def test_create_regex_generator_regex_generator(self, mock_rdt):
)

@patch('sdv.data_processing.data_processor.get_anonymized_transformer')
def test_create_anonymized_transformer_enforce_uniqueness(self,
mock_get_anonymized_transformer):
"""Test the ``create_regex_generator`` method.
Test that when given an ``sdtype`` and ``column_metadata`` that does not contain a
``regex_format`` this calls ``create_anonymized_transformer`` with ``enforce_uniqueness``
set to ``True``.
Input:
- String representing an ``sdtype``.
- Dictionary with ``column_metadata`` that contains ``sdtype``.
Mock:
- Mock the ``create_anonymized_transformer``.
def test_create_anonymized_transformer_cardinality_rule_unique(
self, mock_get_anonymized_transformer):
"""Test the ``create_anonymized_transformer`` method.
Output:
- The return value of ``create_anonymized_transformer``.
Test that when calling with ``cardinality_rule`` set to ``'unique'``, this
calls ``get_anonymized_transformer`` with the given parameters.
"""
# Setup
sdtype = 'ssn'
column_metadata = {
'sdtype': 'ssn',
}
column_metadata = {'sdtype': 'ssn'}

# Run
output = DataProcessor.create_anonymized_transformer(
sdtype,
column_metadata,
True
)
output = DataProcessor.create_anonymized_transformer(sdtype, column_metadata, 'unique')

# Assert
mock_get_anonymized_transformer.assert_called_once_with(
'ssn', {'enforce_uniqueness': True, 'locales': ['en_US']}
'ssn', {'cardinality_rule': 'unique', 'locales': ['en_US']}
)
assert output == mock_get_anonymized_transformer.return_value

Expand All @@ -1033,21 +1016,19 @@ def test_create_anonymized_transformer_locales(self, mock_get_anonymized_transfo
"""
# Setup
sdtype = 'ssn'
column_metadata = {
'sdtype': 'ssn',
}
column_metadata = {'sdtype': 'ssn'}

# Run
output = DataProcessor.create_anonymized_transformer(
sdtype,
column_metadata,
False,
None,
locales=['en_US', 'en_CA']
)

# Assert
mock_get_anonymized_transformer.assert_called_once_with(
'ssn', {'locales': ['en_US', 'en_CA']}
'ssn', {'locales': ['en_US', 'en_CA'], 'cardinality_rule': None}
)
assert output == mock_get_anonymized_transformer.return_value

Expand All @@ -1069,7 +1050,7 @@ def test_create_anonymized_transformer_locales_missing_attribute(self):
DataProcessor.create_anonymized_transformer(
sdtype,
column_metadata,
False,
None,
locales=['en_UK']
)

Expand Down Expand Up @@ -1099,13 +1080,18 @@ def test_create_anonymized_transformer(self, mock_get_anonymized_transformer):
}

# Run
output = DataProcessor.create_anonymized_transformer(sdtype, column_metadata, False)
output = DataProcessor.create_anonymized_transformer(sdtype, column_metadata, 'unique')

# Assert
assert output == mock_get_anonymized_transformer.return_value
mock_get_anonymized_transformer.assert_called_once_with(
'email', {'function_kwargs': {'domain': 'gmail.com'}, 'locales': ['en_US']}
)
expected_kwargs = {
'function_kwargs': {
'domain': 'gmail.com'
},
'locales': ['en_US'],
'cardinality_rule': 'unique'
}
mock_get_anonymized_transformer.assert_called_once_with('email', expected_kwargs)

def test__get_transformer_instance_no_kwargs(self):
"""Test the ``_get_transformer_instance`` without keyword args.
Expand Down

0 comments on commit 172e712

Please sign in to comment.