Skip to content

Commit

Permalink
Support model metrics for KServe (RHOAIENG-6560) (RHOAIENG-6561)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexcreasy committed Jun 14, 2024
1 parent f5fa031 commit e5ba97d
Show file tree
Hide file tree
Showing 29 changed files with 1,164 additions and 42 deletions.
4 changes: 3 additions & 1 deletion frontend/src/__mocks__/mockConfigMap.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,17 @@ import { ConfigMapKind } from '~/k8sTypes';
type MockConfigMapType = {
data?: Record<string, string>;
namespace?: string;
name?: string;
};
export const mockConfigMap = ({
data = { key: 'value' },
namespace = 'test-project',
name = 'config-test',
}: MockConfigMapType): ConfigMapKind => ({
kind: 'ConfigMap',
apiVersion: 'v1',
metadata: {
name: 'config-test',
name,
labels: { 'opendatahub.io/dashboard': 'true' },
namespace,
},
Expand Down
113 changes: 113 additions & 0 deletions frontend/src/__mocks__/mockKserveMetricsConfigMap.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import { ConfigMapKind } from '~/k8sTypes';
import { mockConfigMap } from '~/__mocks__/mockConfigMap';

type MockKserveMetricsConfigMapType = {
namespace?: string;
modelName?: string;
supported?: boolean;
config?: string;
};

export const MOCK_KSERVE_METRICS_CONFIG_1 = `
{
"config": [
{
"title": "Number of incoming requests",
"type": "REQUEST_COUNT",
"queries": [
{
"title": "Successful requests",
"query": "sum(increase(ovms_requests_success{namespace='models',name='mnist'}[5m]))"
},
{
"title": "Failed requests",
"query": "sum(increase(ovms_requests_fail{namespace='models',name='mnist'}[5m]))"
}
]
},
{
"title": "Mean Model Latency",
"type": "MEAN_LATENCY",
"queries": [
{
"title": "Mean inference latency",
"query": "sum by (name) (rate(ovms_inference_time_us_sum{namespace='models', name='mnist'}[1m])) / sum by (name) (rate(ovms_inference_time_us_count{namespace='models', name='mnist'}[1m]))"
},
{
"title": "Mean request latency",
"query": "sum by (name) (rate(ovms_request_time_us_sum{name='mnist'}[1m])) / sum by (name) (rate(ovms_request_time_us_count{name='mnist'}[1m]))"
}
]
},
{
"title": "CPU usage",
"type": "CPU_USAGE",
"queries": [
{
"title": "CPU usage",
"query": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace='models'}* on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace='models', workload=~'mnist-predictor-.*', workload_type=~'deployment'}) by (pod)"
}
]
},
{
"title": "Memory usage",
"type": "MEMORY_USAGE",
"queries": [
{
"title": "Memory usage",
"query": "sum(container_memory_working_set_bytes{namespace='models', container!='', image!=''} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster='', namespace='models', workload=~'mnist-.*', workload_type=~'deployment'}) by (pod)"
}
]
}
]
}`;

export const MOCK_KSERVE_METRICS_CONFIG_2 =
'{ I am malformed JSON and I am here to ruin your day }';

export const MOCK_KSERVE_METRICS_CONFIG_3 = `
{
"config": [
{
"title": "Number of incoming requests",
"type": "REQUEST_COUNT",
"queries": [
{
"title": "Successful requests",
"query": "sum(increase(ovms_requests_success{namespace='models',name='mnist'}[5m]))"
},
{
"title": "Failed requests",
"query": "sum(increase(ovms_requests_fail{namespace='models',name='mnist'}[5m]))"
}
]
},
{
"title": "Mean Model Latency",
"type": "MEAN_LATENCY",
"queries": [
{
"title": "Mean inference latency",
"query": "sum by (name) (rate(ovms_inference_time_us_sum{namespace='models', name='mnist'}[1m])) / sum by (name) (rate(ovms_inference_time_us_count{namespace='models', name='mnist'}[1m]))"
},
{
"title": "Mean request latency",
"query": "sum by (name) (rate(ovms_request_time_us_sum{name='mnist'}[1m])) / sum by (name) (rate(ovms_request_time_us_count{name='mnist'}[1m]))"
}
]
}
]
}`;

export const mockKserveMetricsConfigMap = ({
namespace = 'test-project',
modelName = 'test-inference-service',
supported = true,
config = MOCK_KSERVE_METRICS_CONFIG_1,
}: MockKserveMetricsConfigMapType): ConfigMapKind => {
const data = {
metrics: config,
supported: String(supported),
};
return mockConfigMap({ data, namespace, name: `${modelName}-metrics-dashboard` });
};
21 changes: 20 additions & 1 deletion frontend/src/__tests__/cypress/cypress/pages/modelMetrics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ class ModelMetricsGlobal {
getMetricsChart(title: string) {
return new ModelMetricsChart(() => cy.findByTestId(`metrics-card-${title}`).parents());
}

getAllMetricsCharts() {
return cy.findAllByTestId(/metrics-card-.*/);
}
}

class ModelMetricsChart extends Contextual<HTMLTableRowElement> {
Expand All @@ -32,7 +36,7 @@ class ModelMetricsPerformance extends ModelMetricsGlobal {
this.wait();
}

private wait() {
protected wait() {
cy.findByTestId('performance-metrics-loaded');
cy.testA11y();
}
Expand All @@ -42,6 +46,20 @@ class ModelMetricsPerformance extends ModelMetricsGlobal {
}
}

class ModelMetricsKserve extends ModelMetricsPerformance {
findKserveAreaDisabledCard() {
return cy.findByTestId('kserve-metrics-disabled');
}

findUnsupportedRuntimeCard() {
return cy.findByTestId('kserve-metrics-runtime-unsupported');
}

findUnknownErrorCard() {
return cy.findByTestId('kserve-unknown-error');
}
}

class ModelMetricsBias extends ModelMetricsGlobal {
visit(project: string, model: string, disableA11y = false) {
cy.visitWithLogin(`/modelServing/${project}/metrics/${model}/bias`);
Expand Down Expand Up @@ -181,3 +199,4 @@ export const modelMetricsBias = new ModelMetricsBias();
export const serverMetrics = new ServerMetrics();
export const modelMetricsConfigureSection = new ModelMetricsConfigureSection();
export const configureBiasMetricModal = new ConfigureBiasMetricModal();
export const modelMetricsKserve = new ModelMetricsKserve();
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
configureBiasMetricModal,
modelMetricsBias,
modelMetricsConfigureSection,
modelMetricsKserve,
modelMetricsPerformance,
serverMetrics,
} from '~/__tests__/cypress/cypress/pages/modelMetrics';
Expand All @@ -26,6 +27,7 @@ import {
import { ServingRuntimePlatform } from '~/types';
import { mock403Error, mock404Error } from '~/__mocks__/mockK8sStatus';
import {
ConfigMapModel,
InferenceServiceModel,
ProjectModel,
RouteModel,
Expand All @@ -34,10 +36,16 @@ import {
TemplateModel,
TrustyAIApplicationsModel,
} from '~/__tests__/cypress/cypress/utils/models';
import {
MOCK_KSERVE_METRICS_CONFIG_2,
MOCK_KSERVE_METRICS_CONFIG_3,
mockKserveMetricsConfigMap,
} from '~/__mocks__/mockKserveMetricsConfigMap';

type HandlersProps = {
disablePerformanceMetrics?: boolean;
disableBiasMetrics?: boolean;
disableKServeMetrics?: boolean;
servingRuntimes?: ServingRuntimeKind[];
inferenceServices?: InferenceServiceKind[];
hasServingData: boolean;
Expand All @@ -50,6 +58,7 @@ type HandlersProps = {
const initIntercepts = ({
disablePerformanceMetrics,
disableBiasMetrics,
disableKServeMetrics,
servingRuntimes = [mockServingRuntimeK8sResource({})],
inferenceServices = [mockInferenceServiceK8sResource({ isModelMesh: true })],
hasServingData = false,
Expand All @@ -69,6 +78,7 @@ const initIntercepts = ({
mockDashboardConfig({
disableBiasMetrics,
disablePerformanceMetrics,
disableKServeMetrics,
}),
);

Expand Down Expand Up @@ -614,3 +624,136 @@ describe('Model Metrics', () => {
});
});
});

describe('KServe performance metrics', () => {
it('should inform user when area disabled', () => {
initIntercepts({
disableBiasMetrics: false,
disablePerformanceMetrics: false,
disableKServeMetrics: true,
hasServingData: false,
hasBiasData: false,
inferenceServices: [mockInferenceServiceK8sResource({ isModelMesh: false })],
});
modelMetricsKserve.visit('test-project', 'test-inference-service');
modelMetricsKserve.findKserveAreaDisabledCard().should('be.visible');
});

it('should show error when ConfigMap is missing', () => {
initIntercepts({
disableBiasMetrics: false,
disablePerformanceMetrics: false,
disableKServeMetrics: false,
hasServingData: true,
hasBiasData: false,
inferenceServices: [mockInferenceServiceK8sResource({ isModelMesh: false })],
});

cy.interceptK8s(
{
model: ConfigMapModel,
ns: 'test-project',
name: 'test-inference-service-metrics-dashboard',
},
{ statusCode: 404, body: mock404Error({}) },
);

modelMetricsKserve.visit('test-project', 'test-inference-service');
modelMetricsKserve.findUnknownErrorCard().should('be.visible');
});

it('should inform user when serving runtime is unsupported', () => {
initIntercepts({
disableBiasMetrics: false,
disablePerformanceMetrics: false,
disableKServeMetrics: false,
hasServingData: true,
hasBiasData: false,
inferenceServices: [mockInferenceServiceK8sResource({ isModelMesh: false })],
});

cy.interceptK8s(ConfigMapModel, mockKserveMetricsConfigMap({ supported: false }));

modelMetricsKserve.visit('test-project', 'test-inference-service');
modelMetricsKserve.findUnsupportedRuntimeCard().should('be.visible');
});

it('should handle a malformed graph definition gracefully', () => {
initIntercepts({
disableBiasMetrics: false,
disablePerformanceMetrics: false,
disableKServeMetrics: false,
hasServingData: true,
hasBiasData: false,
inferenceServices: [mockInferenceServiceK8sResource({ isModelMesh: false })],
});

cy.interceptK8s(
ConfigMapModel,
mockKserveMetricsConfigMap({ config: MOCK_KSERVE_METRICS_CONFIG_2 }),
);

modelMetricsKserve.visit('test-project', 'test-inference-service');
modelMetricsKserve.findUnknownErrorCard().should('be.visible');
});

it('should display only 2 graphs, when the config specifies', () => {
initIntercepts({
disableBiasMetrics: false,
disablePerformanceMetrics: false,
disableKServeMetrics: false,
hasServingData: true,
hasBiasData: false,
inferenceServices: [mockInferenceServiceK8sResource({ isModelMesh: false })],
});

cy.interceptK8s(
ConfigMapModel,
mockKserveMetricsConfigMap({ config: MOCK_KSERVE_METRICS_CONFIG_3 }),
);

modelMetricsKserve.visit('test-project', 'test-inference-service');
modelMetricsKserve.getMetricsChart('Number of incoming requests').shouldHaveData();
modelMetricsKserve.getMetricsChart('Mean Model Latency').shouldHaveData();
modelMetricsKserve.getAllMetricsCharts().should('have.length', 2);
});

it('charts should show data when serving data is available', () => {
initIntercepts({
disableBiasMetrics: false,
disablePerformanceMetrics: false,
disableKServeMetrics: false,
hasServingData: true,
hasBiasData: false,
inferenceServices: [mockInferenceServiceK8sResource({ isModelMesh: false })],
});

cy.interceptK8s(ConfigMapModel, mockKserveMetricsConfigMap({ supported: true }));

modelMetricsKserve.visit('test-project', 'test-inference-service');
modelMetricsKserve.getMetricsChart('Number of incoming requests').shouldHaveData();
modelMetricsKserve.getMetricsChart('Mean Model Latency').shouldHaveData();
modelMetricsKserve.getMetricsChart('CPU usage').shouldHaveData();
modelMetricsKserve.getMetricsChart('Memory usage').shouldHaveData();
modelMetricsKserve.getAllMetricsCharts().should('have.length', 4);
});

it('charts should show empty state when no serving data is available', () => {
initIntercepts({
disableBiasMetrics: false,
disablePerformanceMetrics: false,
disableKServeMetrics: false,
hasServingData: false,
hasBiasData: false,
inferenceServices: [mockInferenceServiceK8sResource({ isModelMesh: false })],
});

cy.interceptK8s(ConfigMapModel, mockKserveMetricsConfigMap({ supported: true }));

modelMetricsKserve.visit('test-project', 'test-inference-service');
modelMetricsKserve.getMetricsChart('Number of incoming requests').shouldHaveNoData();
modelMetricsKserve.getMetricsChart('Mean Model Latency').shouldHaveNoData();
modelMetricsKserve.getMetricsChart('CPU usage').shouldHaveNoData();
modelMetricsKserve.getMetricsChart('Memory usage').shouldHaveNoData();
});
});
1 change: 1 addition & 0 deletions frontend/src/api/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ export * from './pipelines/k8s';
export * from './prometheus/pvcs';
export * from './prometheus/serving';
export * from './prometheus/distributedWorkloads';
export * from './prometheus/kservePerformanceMetrics';

// Network error handling
export * from './errorUtils';
Expand Down
10 changes: 8 additions & 2 deletions frontend/src/api/k8s/__tests__/configMaps.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,11 @@ describe('getConfigMap', () => {
expect(result).toStrictEqual(configMapMock);
expect(k8sGetResourceMock).toHaveBeenCalledTimes(1);
expect(k8sGetResourceMock).toHaveBeenCalledWith({
fetchOptions: {
requestInit: {},
},
model: ConfigMapModel,
queryOptions: { name: configMapName, ns: namespace },
queryOptions: { name: configMapName, ns: namespace, queryParams: {} },
});
});

Expand All @@ -70,8 +73,11 @@ describe('getConfigMap', () => {
await expect(getConfigMap(namespace, configMapName)).rejects.toThrow('error1');
expect(k8sGetResourceMock).toHaveBeenCalledTimes(1);
expect(k8sGetResourceMock).toHaveBeenCalledWith({
fetchOptions: {
requestInit: {},
},
model: ConfigMapModel,
queryOptions: { name: configMapName, ns: namespace },
queryOptions: { name: configMapName, ns: namespace, queryParams: {} },
});
});
});
Expand Down
Loading

0 comments on commit e5ba97d

Please sign in to comment.