Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AutoMLx internal explainability mode #1025

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
2 changes: 2 additions & 0 deletions ads/opctl/operator/lowcode/forecast/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,12 @@ class SpeedAccuracyMode(str, metaclass=ExtendedEnumMeta):
HIGH_ACCURACY = "HIGH_ACCURACY"
BALANCED = "BALANCED"
FAST_APPROXIMATE = "FAST_APPROXIMATE"
AUTOMLX = "AUTOMLX"
codeloop marked this conversation as resolved.
Show resolved Hide resolved
ratio = {}
ratio[HIGH_ACCURACY] = 1 # 100 % data used for generating explanations
ratio[BALANCED] = 0.5 # 50 % data used for generating explanations
ratio[FAST_APPROXIMATE] = 0 # constant
ratio[AUTOMLX] = 0 # constant


class SupportedMetrics(str, metaclass=ExtendedEnumMeta):
Expand Down
98 changes: 89 additions & 9 deletions ads/opctl/operator/lowcode/forecast/model/automlx.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from ads.opctl.operator.lowcode.forecast.const import (
AUTOMLX_METRIC_MAP,
ForecastOutputColumns,
SpeedAccuracyMode,
SupportedModels,
)
from ads.opctl.operator.lowcode.forecast.utils import _label_encode_dataframe
Expand Down Expand Up @@ -241,18 +242,18 @@ def _generate_report(self):
# If the key is present, call the "explain_model" method
self.explain_model()

# Convert the global explanation data to a DataFrame
global_explanation_df = pd.DataFrame(self.global_explanation)
global_explanation_section = None
if self.spec.explanations_accuracy_mode != SpeedAccuracyMode.AUTOMLX:
# Convert the global explanation data to a DataFrame
global_explanation_df = pd.DataFrame(self.global_explanation)

self.formatted_global_explanation = (
global_explanation_df / global_explanation_df.sum(axis=0) * 100
)
self.formatted_global_explanation = (
self.formatted_global_explanation.rename(
self.formatted_global_explanation = (
global_explanation_df / global_explanation_df.sum(axis=0) * 100
)
self.formatted_global_explanation = self.formatted_global_explanation.rename(
{self.spec.datetime_column.name: ForecastOutputColumns.DATE},
axis=1,
)
)

aggregate_local_explanations = pd.DataFrame()
for s_id, local_ex_df in self.local_explanation.items():
Expand Down Expand Up @@ -293,8 +294,11 @@ def _generate_report(self):
)

# Append the global explanation text and section to the "other_sections" list
if global_explanation_section:
other_sections.append(global_explanation_section)

# Append the local explanation text and section to the "other_sections" list
other_sections = other_sections + [
global_explanation_section,
codeloop marked this conversation as resolved.
Show resolved Hide resolved
local_explanation_section,
]
except Exception as e:
Expand Down Expand Up @@ -375,3 +379,79 @@ def _custom_predict_automlx(self, data):
return self.models.get(self.series_id).forecast(
X=data_temp, periods=data_temp.shape[0]
)[self.series_id]

@runtime_dependency(
module="automlx",
err_msg=(
"Please run `python3 -m pip install automlx` to install the required dependencies for model explanation."
),
)
def explain_model(self):
"""
Generates explanations for the model using the AutoMLx library.

Parameters
----------
None

Returns
-------
None

Notes
-----
This function works by generating local explanations for each series in the dataset.
It uses the ``MLExplainer`` class from the AutoMLx library to generate feature attributions
for each series. The feature attributions are then stored in the ``self.local_explanation`` dictionary.

If the accuracy mode is set to AutoMLX, it uses the AutoMLx library to generate explanations.
Otherwise, it falls back to the default explanation generation method.
"""
import automlx

# Loop through each series in the dataset
for s_id, data_i in self.datasets.get_data_by_series(
include_horizon=False
).items():
try:
if self.spec.explanations_accuracy_mode == SpeedAccuracyMode.AUTOMLX:
# Use the MLExplainer class from AutoMLx to generate explanations
explainer = automlx.MLExplainer(
self.models[s_id],
self.datasets.additional_data.get_data_for_series(series_id=s_id)
.drop(self.spec.datetime_column.name, axis=1)
.head(-self.spec.horizon)
if self.spec.additional_data
else None,
pd.DataFrame(data_i[self.spec.target_column]),
task="forecasting",
)

# Generate explanations for the forecast
explanations = explainer.explain_prediction(
X=self.datasets.additional_data.get_data_for_series(series_id=s_id)
.drop(self.spec.datetime_column.name, axis=1)
.tail(self.spec.horizon)
if self.spec.additional_data
else None,
forecast_timepoints=list(range(self.spec.horizon + 1)),
)

# Convert the explanations to a DataFrame
explanations_df = pd.concat(
[exp.to_dataframe() for exp in explanations]
)
explanations_df["row"] = explanations_df.groupby("Feature").cumcount()
explanations_df = explanations_df.pivot(
index="row", columns="Feature", values="Attribution"
)
explanations_df = explanations_df.reset_index(drop=True)

# Store the explanations in the local_explanation dictionary
self.local_explanation[s_id] = explanations_df
else:
# Fall back to the default explanation generation method
super().explain_model()
except Exception as e:
logger.warning(f"Failed to generate explanations for series {s_id} with error: {e}.")
logger.debug(f"Full Traceback: {traceback.format_exc()}")
42 changes: 36 additions & 6 deletions ads/opctl/operator/lowcode/forecast/model/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
SpeedAccuracyMode,
SupportedMetrics,
SupportedModels,
BACKTEST_REPORT_NAME
BACKTEST_REPORT_NAME,
)
from ..operator_config import ForecastOperatorConfig, ForecastOperatorSpec
from .forecast_datasets import ForecastDatasets
Expand Down Expand Up @@ -266,7 +266,11 @@ def generate_report(self):
output_dir = self.spec.output_directory.url
file_path = f"{output_dir}/{BACKTEST_REPORT_NAME}"
if self.spec.model == AUTO_SELECT:
backtest_sections.append(rc.Heading("Auto-Select Backtesting and Performance Metrics", level=2))
backtest_sections.append(
rc.Heading(
"Auto-Select Backtesting and Performance Metrics", level=2
)
)
if not os.path.exists(file_path):
failure_msg = rc.Text(
"auto-select could not be executed. Please check the "
Expand All @@ -275,15 +279,23 @@ def generate_report(self):
backtest_sections.append(failure_msg)
else:
backtest_stats = pd.read_csv(file_path)
model_metric_map = backtest_stats.drop(columns=['metric', 'backtest'])
average_dict = {k: round(v, 4) for k, v in model_metric_map.mean().to_dict().items()}
model_metric_map = backtest_stats.drop(
columns=["metric", "backtest"]
)
average_dict = {
k: round(v, 4)
for k, v in model_metric_map.mean().to_dict().items()
}
best_model = min(average_dict, key=average_dict.get)
summary_text = rc.Text(
f"Overall, the average {self.spec.metric} scores for the models are {average_dict}, with"
f" {best_model} being identified as the top-performing model during backtesting.")
f" {best_model} being identified as the top-performing model during backtesting."
)
backtest_table = rc.DataTable(backtest_stats, index=True)
liner_plot = get_auto_select_plot(backtest_stats)
backtest_sections.extend([backtest_table, summary_text, liner_plot])
backtest_sections.extend(
[backtest_table, summary_text, liner_plot]
)

forecast_plots = []
if len(self.forecast_output.list_series_ids()) > 0:
Expand Down Expand Up @@ -646,6 +658,13 @@ def _save_model(self, output_dir, storage_options):
storage_options=storage_options,
)

def _validate_automlx_explanation_mode(self):
if self.spec.model != SupportedModels.AutoMLX and self.spec.explanations_accuracy_mode == SpeedAccuracyMode.AUTOMLX:
raise ValueError(
"AUTOMLX explanation accuracy mode is only supported for AutoMLX models. "
"Please select mode other than AUTOMLX from the available explanations_accuracy_mode options"
)

@runtime_dependency(
module="shap",
err_msg=(
Expand Down Expand Up @@ -674,6 +693,9 @@ def explain_model(self):
)
ratio = SpeedAccuracyMode.ratio[self.spec.explanations_accuracy_mode]

# validate the automlx mode is use for automlx model
self._validate_automlx_explanation_mode()

for s_id, data_i in self.datasets.get_data_by_series(
include_horizon=False
).items():
Expand Down Expand Up @@ -708,6 +730,14 @@ def explain_model(self):
logger.warn(
"No explanations generated. Ensure that additional data has been provided."
)
elif (
self.spec.model == SupportedModels.AutoMLX
and self.spec.explanations_accuracy_mode
== SpeedAccuracyMode.AUTOMLX
):
logger.warning(
"Global explanations not available for AutoMLX models with inherent explainability"
)
else:
self.global_explanation[s_id] = dict(
zip(
Expand Down
1 change: 1 addition & 0 deletions ads/opctl/operator/lowcode/forecast/schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,7 @@ spec:
- HIGH_ACCURACY
- BALANCED
- FAST_APPROXIMATE
- AUTOMLX

generate_report:
type: boolean
Expand Down
23 changes: 10 additions & 13 deletions tests/operators/forecast/test_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,8 @@ def test_all_series_failure(model):
yaml_i["spec"]["preprocessing"] = {"enabled": True, "steps": preprocessing_steps}
if yaml_i["spec"].get("additional_data") is not None and model != "autots":
yaml_i["spec"]["generate_explanations"] = True
else:
yaml_i["spec"]["generate_explanations"] = False
if model == "autots":
yaml_i["spec"]["model_kwargs"] = {"model_list": "superfast"}
if model == "automlx":
Expand Down Expand Up @@ -672,6 +674,7 @@ def test_arima_automlx_errors(operator_setup, model):
yaml_i["spec"]["model_kwargs"] = {"model_list": "superfast"}
if model == "automlx":
yaml_i["spec"]["model_kwargs"] = {"time_budget": 1}
yaml_i["spec"]["explanations_accuracy_mode"] = "AUTOMLX"
codeloop marked this conversation as resolved.
Show resolved Hide resolved

run_yaml(
tmpdirname=tmpdirname,
Expand Down Expand Up @@ -699,21 +702,15 @@ def test_arima_automlx_errors(operator_setup, model):
in error_content["13"]["error"]
), "Error message mismatch"

if model not in ["autots", "automlx"]: # , "lgbforecast"
global_fn = f"{tmpdirname}/results/global_explanation.csv"
assert os.path.exists(
global_fn
), f"Global explanation file not found at {report_path}"
if model not in ["autots"]: # , "lgbforecast"
if yaml_i["spec"].get("explanations_accuracy_mode") != "AUTOMLX":
global_fn = f"{tmpdirname}/results/global_explanation.csv"
assert os.path.exists(global_fn), f"Global explanation file not found at {report_path}"
assert not pd.read_csv(global_fn, index_col=0).empty

local_fn = f"{tmpdirname}/results/local_explanation.csv"
assert os.path.exists(
local_fn
), f"Local explanation file not found at {report_path}"

glb_expl = pd.read_csv(global_fn, index_col=0)
loc_expl = pd.read_csv(local_fn)
assert not glb_expl.empty
assert not loc_expl.empty
assert os.path.exists(local_fn), f"Local explanation file not found at {report_path}"
assert not pd.read_csv(local_fn).empty


def test_smape_error():
Expand Down
Loading