Skip to content

Commit

Permalink
fix: faster rounding test in weekly (#957)
Browse files Browse the repository at this point in the history
Co-authored-by: jfrery <[email protected]>
  • Loading branch information
andrei-stoian-zama and jfrery authored Dec 11, 2024
1 parent b04284a commit 5b9466c
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 26 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/continuous-integration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -980,10 +980,12 @@ jobs:
run: |
./script/make_utils/check_installation_with_all_python.sh --version ${{ matrix.python_version }} --sync_env
# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4679
# Check installation with pip
- name: Check installation with pip and python ${{ matrix.python_version }} (weekly)
if: |
(fromJSON(env.IS_WEEKLY))
&& matrix.python_version != '3.12'
&& steps.conformance.outcome == 'success'
&& !cancelled()
run: |
Expand Down
2 changes: 1 addition & 1 deletion docs/advanced_examples/DecisionTreeClassifier.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@
"\n",
"# List of hyper parameters to tune\n",
"param_grid = {\n",
" \"max_features\": [None, \"auto\", \"sqrt\", \"log2\"],\n",
" \"max_features\": [None, \"sqrt\", \"log2\"],\n",
" \"min_samples_leaf\": [1, 10, 100],\n",
" \"min_samples_split\": [2, 10, 100],\n",
" \"max_depth\": [None, 2, 4, 6, 8],\n",
Expand Down
31 changes: 24 additions & 7 deletions docs/advanced_examples/ExperimentPrivacyTreePaper.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -130,28 +130,45 @@
" y (np.array): Target labels of the dataset.\n",
" \"\"\"\n",
" if data_id is not None:\n",
" X, y = fetch_openml(data_id=data_id, as_frame=False, cache=True, return_X_y=True)\n",
" X, y = fetch_openml(data_id=data_id, as_frame=True, cache=True, return_X_y=True)\n",
" else:\n",
" X, y = fetch_openml(name=name, as_frame=False, cache=True, return_X_y=True)\n",
" X, y = fetch_openml(name=name, as_frame=True, cache=True, return_X_y=True)\n",
" return X, y\n",
"\n",
"\n",
"def preprocess_features(X):\n",
" \"\"\"Convert categorical columns to numerical.\"\"\"\n",
" X_processed = X.copy()\n",
"\n",
" for column in X_processed.columns:\n",
" if X_processed[column].dtype == \"object\" or X_processed[column].dtype.name == \"category\":\n",
" # Convert categorical columns to numeric using label encoding\n",
" X_processed[column] = X_processed[column].astype(\"category\").cat.codes\n",
"\n",
" return X_processed.astype(np.float32)\n",
"\n",
"\n",
"for ds_name, ds_id in dataset_names.items():\n",
" print(f\"Loading {ds_name}\")\n",
"\n",
" X, y = load_dataset(ds_name, ds_id)\n",
"\n",
" # Preprocess features (handle categorical data)\n",
" X = preprocess_features(X)\n",
"\n",
" # Remove rows with NaN values\n",
" not_nan_idx = np.where(~np.isnan(X).any(axis=1))\n",
" X = X[not_nan_idx]\n",
" y = y[not_nan_idx]\n",
" not_nan_mask = ~np.isnan(X).any(axis=1)\n",
" X = X[not_nan_mask]\n",
" y = y[not_nan_mask]\n",
"\n",
" # Convert non-integer target labels to integers\n",
" if not y.dtype == np.int64:\n",
" encoder = OrdinalEncoder()\n",
" y = encoder.fit_transform(y.reshape(-1, 1)).astype(np.int32).squeeze()\n",
" # Convert pandas Series to numpy array before reshaping\n",
" y = encoder.fit_transform(np.array(y).reshape(-1, 1)).astype(np.int32).squeeze()\n",
"\n",
" datasets[ds_name] = {\"X\": X, \"y\": y}"
" # Ensure both X and y are numpy arrays before storing\n",
" datasets[ds_name] = {\"X\": np.array(X), \"y\": np.array(y)}"
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions docs/advanced_examples/LogisticRegressionTraining.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@
"\n",
"# Load the Iris dataset\n",
"X_full, y_full = datasets.load_iris(return_X_y=True)\n",
"X_full = MinMaxScaler(feature_range=[-1, 1]).fit_transform(X_full)\n",
"X_full = MinMaxScaler(feature_range=(-1, 1)).fit_transform(X_full)\n",
"\n",
"# Select petal length and petal width for visualization\n",
"X = X_full[:, 2:4] # Petal length and petal width\n",
Expand Down Expand Up @@ -384,7 +384,7 @@
"X, y = datasets.load_breast_cancer(return_X_y=True)\n",
"x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y)\n",
"\n",
"scaler = MinMaxScaler(feature_range=[-1, 1])\n",
"scaler = MinMaxScaler(feature_range=(-1, 1))\n",
"x_train = scaler.fit_transform(x_train)\n",
"x_test = scaler.transform(x_test)\n",
"\n",
Expand Down
17 changes: 1 addition & 16 deletions tests/sklearn/test_sklearn_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1344,14 +1344,9 @@ def check_rounding_consistency(
y,
predict_method,
metric,
is_weekly_option,
):
"""Test that Concrete ML without and with rounding are 'equivalent'."""

# Run the test with more samples during weekly CIs
if is_weekly_option:
fhe_test = get_random_samples(x, n_sample=5)

# Check that rounding is enabled
assert os.environ.get("TREES_USE_ROUNDING") == "1", "'TREES_USE_ROUNDING' is not enabled"

Expand All @@ -1361,10 +1356,6 @@ def check_rounding_consistency(
rounded_predict_quantized = predict_method(x, fhe="disable")
rounded_predict_simulate = predict_method(x, fhe="simulate")

# Compute the FHE predictions only during weekly CIs
if is_weekly_option:
rounded_predict_fhe = predict_method(fhe_test, fhe="execute")

with pytest.MonkeyPatch.context() as mp_context:

# Disable rounding
Expand All @@ -1389,11 +1380,6 @@ def check_rounding_consistency(
metric(rounded_predict_quantized, not_rounded_predict_quantized)
metric(rounded_predict_simulate, not_rounded_predict_simulate)

# Compute the FHE predictions only during weekly CIs
if is_weekly_option:
not_rounded_predict_fhe = predict_method(fhe_test, fhe="execute")
metric(rounded_predict_fhe, not_rounded_predict_fhe)

# Check that the maximum bit-width of the circuit with rounding is at most:
# maximum bit-width (of the circuit without rounding) + 2
# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4178
Expand Down Expand Up @@ -2076,7 +2062,7 @@ def test_linear_models_have_no_tlu(
# Additional tests for this purpose should be added in future updates
# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4179
@pytest.mark.parametrize("model_class, parameters", get_sklearn_tree_models_and_datasets())
@pytest.mark.parametrize("n_bits", [2, 5, 10])
@pytest.mark.parametrize("n_bits", [2, 5, 8])
def test_rounding_consistency_for_regular_models(
model_class,
parameters,
Expand Down Expand Up @@ -2110,7 +2096,6 @@ def test_rounding_consistency_for_regular_models(
y,
predict_method,
metric,
is_weekly_option,
)


Expand Down

0 comments on commit 5b9466c

Please sign in to comment.