From f7f9322798bd583fb0308d25ef21022ab69bf525 Mon Sep 17 00:00:00 2001 From: rwedge Date: Wed, 4 Oct 2023 14:36:13 -0400 Subject: [PATCH 1/4] cap numpy version below 2 --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 8b15ca8d5..55e8d4b28 100644 --- a/setup.py +++ b/setup.py @@ -18,8 +18,8 @@ 'cloudpickle>=2.1.0,<3.0', 'Faker>=10,<15', 'graphviz>=0.13.2,<1', - "numpy>=1.20.0,<1.25.0;python_version<'3.10'", - "numpy>=1.23.3,<1.25.0;python_version>='3.10'", + "numpy>=1.20.0,<2;python_version<'3.10'", + "numpy>=1.23.3,<2;python_version>='3.10'", "pandas>=1.1.3;python_version<'3.10'", "pandas>=1.3.4;python_version>='3.10' and python_version<'3.11'", "pandas>=1.5.0;python_version>='3.11'", From 9d95ed8c2fe902b901858c627c719b8059576624 Mon Sep 17 00:00:00 2001 From: rwedge Date: Fri, 6 Oct 2023 18:19:11 -0400 Subject: [PATCH 2/4] update failing copulas test to use truncnorm --- tests/integration/single_table/test_copulas.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/single_table/test_copulas.py b/tests/integration/single_table/test_copulas.py index 0e9547365..df996612b 100644 --- a/tests/integration/single_table/test_copulas.py +++ b/tests/integration/single_table/test_copulas.py @@ -345,7 +345,7 @@ def test_numerical_columns_gets_pii(): 'numerical': {'sdtype': 'numerical'} } }) - synth = GaussianCopulaSynthesizer(metadata) + synth = GaussianCopulaSynthesizer(metadata, default_distribution='truncnorm') synth.fit(data) # Run @@ -366,7 +366,7 @@ def test_numerical_columns_gets_pii(): 8: 'Davidland', 9: 'Port Christopher' }, - 'numerical': {0: 21, 1: 24, 2: 22, 3: 23, 4: 22, 5: 24, 6: 23, 7: 23, 8: 24, 9: 23} + 'numerical': {0: 22, 1: 24, 2: 22, 3: 23, 4: 22, 5: 24, 6: 23, 7: 24, 8: 24, 9: 24} }) pd.testing.assert_frame_equal(expected_sampled, sampled) From 512208db59a5e99418591b8894aa47c2d6bfe8cc Mon Sep 17 00:00:00 2001 From: rwedge Date: Fri, 6 Oct 2023 18:14:10 -0400 Subject: [PATCH 3/4] update failing constraints test to use truncnorm --- tests/integration/single_table/test_constraints.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/integration/single_table/test_constraints.py b/tests/integration/single_table/test_constraints.py index 1ace72f63..f2e7c98f8 100644 --- a/tests/integration/single_table/test_constraints.py +++ b/tests/integration/single_table/test_constraints.py @@ -534,7 +534,7 @@ def test_scalar_range_constraint_with_datetimes_and_nones(): }) metadata.validate() - synth = GaussianCopulaSynthesizer(metadata) + synth = GaussianCopulaSynthesizer(metadata, default_distribution='truncnorm') synth.add_constraints([ { 'constraint_class': 'ScalarRange', @@ -555,16 +555,16 @@ def test_scalar_range_constraint_with_datetimes_and_nones(): # Assert expected_sampled = pd.DataFrame({ 'A': { - 0: '2020-03-03', + 0: '2020-02-04', 1: np.nan, - 2: '2020-03-03', + 2: '2020-02-07', 3: np.nan, 4: np.nan, - 5: '2020-03-03', + 5: '2020-02-29', 6: np.nan, 7: np.nan, 8: np.nan, - 9: '2020-02-27', + 9: '2020-02-02', }, 'B': { 0: np.nan, @@ -572,9 +572,9 @@ def test_scalar_range_constraint_with_datetimes_and_nones(): 2: np.nan, 3: np.nan, 4: np.nan, - 5: '2021-04-14', + 5: '2021-11-22', 6: np.nan, - 7: '2021-05-21', + 7: '2021-06-19', 8: np.nan, 9: np.nan, } From b95609b51621d58958d58b9cf6235c1f892b8ea5 Mon Sep 17 00:00:00 2001 From: rwedge Date: Fri, 6 Oct 2023 18:38:48 -0400 Subject: [PATCH 4/4] update failing evaluation test to use truncnorm --- tests/integration/evaluation/test_single_table.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/evaluation/test_single_table.py b/tests/integration/evaluation/test_single_table.py index 46c1ce561..244d402e7 100644 --- a/tests/integration/evaluation/test_single_table.py +++ b/tests/integration/evaluation/test_single_table.py @@ -12,13 +12,13 @@ def test_evaluation(): data = pd.DataFrame({'col': [1, 2, 3]}) metadata = SingleTableMetadata() metadata.add_column('col', sdtype='numerical') - synthesizer = GaussianCopulaSynthesizer(metadata) + synthesizer = GaussianCopulaSynthesizer(metadata, default_distribution='truncnorm') # Run and Assert synthesizer.fit(data) samples = synthesizer.sample(10) score = evaluate_quality(data, samples, metadata).get_score() - assert score == 0.8333333333333334 + assert score == 0.8666666666666667 diagnostic = run_diagnostic(data, samples, metadata).get_results() assert diagnostic == {