Skip to content
This repository has been archived by the owner on Mar 31, 2024. It is now read-only.

Commit

Permalink
feat(slo): create SLO burn rate rule (elastic#145464)
Browse files Browse the repository at this point in the history
  • Loading branch information
kdelemme authored Nov 18, 2022
1 parent 92fb6fc commit 957d5a5
Show file tree
Hide file tree
Showing 11 changed files with 622 additions and 18 deletions.
1 change: 1 addition & 0 deletions x-pack/plugins/observability/common/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@
* 2.0.
*/

export const SLO_BURN_RATE_RULE_ID = 'slo.rules.burnRate';
export const ALERT_STATUS_ALL = 'all';
9 changes: 9 additions & 0 deletions x-pack/plugins/observability/server/common/constants.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

export const OBSERVABILITY_FEATURE_ID = 'observability';
export const RULE_REGISTRATION_CONTEXT = 'observability.slo';
23 changes: 22 additions & 1 deletion x-pack/plugins/observability/server/domain/models/duration.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,27 @@ class Duration {
}
}

const toDurationUnit = (unit: string): DurationUnit => {
switch (unit) {
case 'm':
return DurationUnit.Minute;
case 'h':
return DurationUnit.Hour;
case 'd':
return DurationUnit.Day;
case 'w':
return DurationUnit.Week;
case 'M':
return DurationUnit.Month;
case 'Q':
return DurationUnit.Quarter;
case 'y':
return DurationUnit.Year;
default:
throw new Error('invalid duration unit');
}
};

const toMomentUnitOfTime = (unit: DurationUnit): moment.unitOfTime.Diff => {
switch (unit) {
case DurationUnit.Minute:
Expand All @@ -60,4 +81,4 @@ const toMomentUnitOfTime = (unit: DurationUnit): moment.unitOfTime.Diff => {
}
};

export { Duration, DurationUnit, toMomentUnitOfTime };
export { Duration, DurationUnit, toMomentUnitOfTime, toDurationUnit };
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { PluginSetupContract } from '@kbn/alerting-plugin/server';
import { Logger } from '@kbn/core/server';
import { createLifecycleExecutor, IRuleDataClient } from '@kbn/rule-registry-plugin/server';
import { sloBurnRateRuleType } from './slo_burn_rate';

export function registerRuleTypes(
alertingPlugin: PluginSetupContract,
logger: Logger,
ruleDataClient: IRuleDataClient
) {
const createLifecycleRuleExecutor = createLifecycleExecutor(logger.get('rules'), ruleDataClient);
alertingPlugin.registerType(sloBurnRateRuleType(createLifecycleRuleExecutor));
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,279 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import uuid from 'uuid';
import { IUiSettingsClient, SavedObjectsClientContract } from '@kbn/core/server';
import {
ElasticsearchClientMock,
elasticsearchServiceMock,
loggingSystemMock,
savedObjectsClientMock,
} from '@kbn/core/server/mocks';
import { LifecycleAlertService, LifecycleAlertServices } from '@kbn/rule-registry-plugin/server';
import { PublicAlertFactory } from '@kbn/alerting-plugin/server/alert/create_alert_factory';
import { ISearchStartSearchSource } from '@kbn/data-plugin/public';
import { MockedLogger } from '@kbn/logging-mocks';
import { SanitizedRuleConfig } from '@kbn/alerting-plugin/common';
import { Alert, RuleExecutorServices } from '@kbn/alerting-plugin/server';
import {
ALERT_EVALUATION_THRESHOLD,
ALERT_EVALUATION_VALUE,
ALERT_REASON,
} from '@kbn/rule-data-utils';
import {
BurnRateAlertContext,
BurnRateAlertState,
BurnRateAllowedActionGroups,
BurnRateRuleParams,
FIRED_ACTION,
AlertStates,
getRuleExecutor,
} from './executor';
import { aStoredSLO, createSLO } from '../../../services/slo/fixtures/slo';
import { SLO } from '../../../domain/models';

const commonEsResponse = {
took: 100,
timed_out: false,
_shards: {
total: 0,
successful: 0,
skipped: 0,
failed: 0,
},
hits: {
hits: [],
},
};

const BURN_RATE_THRESHOLD = 2;
const BURN_RATE_ABOVE_THRESHOLD = BURN_RATE_THRESHOLD + 0.01;
const BURN_RATE_BELOW_THRESHOLD = BURN_RATE_THRESHOLD - 0.01;

describe('BurnRateRuleExecutor', () => {
let esClientMock: ElasticsearchClientMock;
let soClientMock: jest.Mocked<SavedObjectsClientContract>;
let loggerMock: jest.Mocked<MockedLogger>;
let alertWithLifecycleMock: jest.MockedFn<LifecycleAlertService>;
let alertFactoryMock: jest.Mocked<
PublicAlertFactory<BurnRateAlertState, BurnRateAlertContext, BurnRateAllowedActionGroups>
>;
let searchSourceClientMock: jest.Mocked<ISearchStartSearchSource>;
let uiSettingsClientMock: jest.Mocked<IUiSettingsClient>;
let servicesMock: RuleExecutorServices<
BurnRateAlertState,
BurnRateAlertContext,
BurnRateAllowedActionGroups
> &
LifecycleAlertServices<BurnRateAlertState, BurnRateAlertContext, BurnRateAllowedActionGroups>;

beforeEach(() => {
esClientMock = elasticsearchServiceMock.createElasticsearchClient();
soClientMock = savedObjectsClientMock.create();
alertWithLifecycleMock = jest.fn();
alertFactoryMock = {
create: jest.fn(),
done: jest.fn(),
alertLimit: { getValue: jest.fn(), setLimitReached: jest.fn() },
};
loggerMock = loggingSystemMock.createLogger();
servicesMock = {
alertWithLifecycle: alertWithLifecycleMock,
savedObjectsClient: soClientMock,
scopedClusterClient: { asCurrentUser: esClientMock, asInternalUser: esClientMock },
alertFactory: alertFactoryMock,
searchSourceClient: searchSourceClientMock,
uiSettingsClient: uiSettingsClientMock,
shouldWriteAlerts: jest.fn(),
shouldStopExecution: jest.fn(),
getAlertStartedDate: jest.fn(),
getAlertUuid: jest.fn(),
getAlertByAlertUuid: jest.fn(),
};
});

it('does not schedule an alert when both windows burn rates are below the threshold', async () => {
const slo = createSLO({ objective: { target: 0.9 } });
soClientMock.get.mockResolvedValue(aStoredSLO(slo));
esClientMock.search.mockResolvedValue(
generateEsResponse(slo, BURN_RATE_BELOW_THRESHOLD, BURN_RATE_BELOW_THRESHOLD)
);
alertFactoryMock.done.mockReturnValueOnce({ getRecoveredAlerts: () => [] });

const executor = getRuleExecutor();
await executor({
params: someRuleParams({ sloId: slo.id, threshold: BURN_RATE_THRESHOLD }),
startedAt: new Date(),
services: servicesMock,
executionId: 'irrelevant',
logger: loggerMock,
previousStartedAt: null,
rule: {} as SanitizedRuleConfig,
spaceId: 'irrelevant',
state: {},
});

expect(alertWithLifecycleMock).not.toBeCalled();
});

it('does not schedule an alert when the long window burn rate is below the threshold', async () => {
const slo = createSLO({ objective: { target: 0.9 } });
soClientMock.get.mockResolvedValue(aStoredSLO(slo));
esClientMock.search.mockResolvedValue(
generateEsResponse(slo, BURN_RATE_ABOVE_THRESHOLD, BURN_RATE_BELOW_THRESHOLD)
);
alertFactoryMock.done.mockReturnValueOnce({ getRecoveredAlerts: () => [] });

const executor = getRuleExecutor();
await executor({
params: someRuleParams({ sloId: slo.id, threshold: BURN_RATE_THRESHOLD }),
startedAt: new Date(),
services: servicesMock,
executionId: 'irrelevant',
logger: loggerMock,
previousStartedAt: null,
rule: {} as SanitizedRuleConfig,
spaceId: 'irrelevant',
state: {},
});

expect(alertWithLifecycleMock).not.toBeCalled();
});

it('does not schedule an alert when the short window burn rate is below the threshold', async () => {
const slo = createSLO({ objective: { target: 0.9 } });
soClientMock.get.mockResolvedValue(aStoredSLO(slo));
esClientMock.search.mockResolvedValue(
generateEsResponse(slo, BURN_RATE_BELOW_THRESHOLD, BURN_RATE_ABOVE_THRESHOLD)
);
alertFactoryMock.done.mockReturnValueOnce({ getRecoveredAlerts: () => [] });

const executor = getRuleExecutor();
await executor({
params: someRuleParams({ sloId: slo.id, threshold: BURN_RATE_THRESHOLD }),
startedAt: new Date(),
services: servicesMock,
executionId: 'irrelevant',
logger: loggerMock,
previousStartedAt: null,
rule: {} as SanitizedRuleConfig,
spaceId: 'irrelevant',
state: {},
});

expect(alertWithLifecycleMock).not.toBeCalled();
});

it('schedules an alert when both windows burn rate have reached the threshold', async () => {
const slo = createSLO({ objective: { target: 0.9 } });
soClientMock.get.mockResolvedValue(aStoredSLO(slo));
esClientMock.search.mockResolvedValue(
generateEsResponse(slo, BURN_RATE_THRESHOLD, BURN_RATE_THRESHOLD)
);
const alertMock: Partial<Alert> = {
scheduleActions: jest.fn(),
replaceState: jest.fn(),
};
alertWithLifecycleMock.mockImplementation(() => alertMock as any);
alertFactoryMock.done.mockReturnValueOnce({ getRecoveredAlerts: () => [] });

const executor = getRuleExecutor();
await executor({
params: someRuleParams({ sloId: slo.id, threshold: BURN_RATE_THRESHOLD }),
startedAt: new Date(),
services: servicesMock,
executionId: 'irrelevant',
logger: loggerMock,
previousStartedAt: null,
rule: {} as SanitizedRuleConfig,
spaceId: 'irrelevant',
state: {},
});

expect(alertWithLifecycleMock).toBeCalledWith({
id: `alert-${slo.id}-${slo.revision}`,
fields: {
[ALERT_REASON]:
'The burn rate for the past 1h is 2 and for the past 5m is 2. Alert when above 2 for both windows',
[ALERT_EVALUATION_THRESHOLD]: 2,
[ALERT_EVALUATION_VALUE]: 2,
},
});
expect(alertMock.scheduleActions).toBeCalledWith(
FIRED_ACTION.id,
expect.objectContaining({
longWindow: { burnRate: 2, duration: '1h' },
shortWindow: { burnRate: 2, duration: '5m' },
threshold: 2,
reason:
'The burn rate for the past 1h is 2 and for the past 5m is 2. Alert when above 2 for both windows',
})
);
expect(alertMock.replaceState).toBeCalledWith({ alertState: AlertStates.ALERT });
});

it('sets the context on the recovered alerts', async () => {
const slo = createSLO({ objective: { target: 0.9 } });
soClientMock.get.mockResolvedValue(aStoredSLO(slo));
esClientMock.search.mockResolvedValue(
generateEsResponse(slo, BURN_RATE_BELOW_THRESHOLD, BURN_RATE_ABOVE_THRESHOLD)
);
const alertMock: Partial<Alert> = {
setContext: jest.fn(),
};
alertFactoryMock.done.mockReturnValueOnce({ getRecoveredAlerts: () => [alertMock] as any });

const executor = getRuleExecutor();

await executor({
params: someRuleParams({ sloId: slo.id, threshold: BURN_RATE_THRESHOLD }),
startedAt: new Date(),
services: servicesMock,
executionId: 'irrelevant',
logger: loggerMock,
previousStartedAt: null,
rule: {} as SanitizedRuleConfig,
spaceId: 'irrelevant',
state: {},
});

expect(alertWithLifecycleMock).not.toBeCalled();
expect(alertMock.setContext).toBeCalledWith(
expect.objectContaining({
longWindow: { burnRate: 2.01, duration: '1h' },
shortWindow: { burnRate: 1.99, duration: '5m' },
threshold: 2,
})
);
});
});

function someRuleParams(params: Partial<BurnRateRuleParams> = {}): BurnRateRuleParams {
return {
sloId: uuid(),
threshold: 2,
longWindow: { duration: 1, unit: 'h' },
shortWindow: { duration: 5, unit: 'm' },
...params,
};
}

function generateEsResponse(slo: SLO, shortWindowBurnRate: number, longWindowBurnRate: number) {
return {
...commonEsResponse,
aggregations: {
SHORT_WINDOW: { buckets: [generateBucketForBurnRate(slo, shortWindowBurnRate)] },
LONG_WINDOW: { buckets: [generateBucketForBurnRate(slo, longWindowBurnRate)] },
},
};
}

function generateBucketForBurnRate(slo: SLO, burnRate: number) {
const total = 100;
const good = total * (1 - burnRate + slo.objective.target * burnRate);
return { good: { value: good }, total: { value: total } };
}
Loading

0 comments on commit 957d5a5

Please sign in to comment.