Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: retry delay strategy #871

Merged
merged 13 commits into from
May 1, 2024
4 changes: 3 additions & 1 deletion docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,16 @@
### Added

- chore: adds required `npm audit` check to PRs
- new `HttpAgent` option: `backoffStrategy` - allows you to set a custom delay strategy for retries. The default is a newly exported `exponentialBackoff`, but you can pass your own function to customize the delay between retries.

### Changed

- chore: upgrades github actions to v4
- fix: retry logic now includes delays with exponential backoff matching the dfx strategy. Retries should no longer happen too quickly for the replica to catch up.

## [1.2.1] - 2024-04-25

### Changed
### Added

- feat: make `IdbStorage` `get/set` methods generic
- chore: add context to errors thrown when failing to decode CBOR values.
Expand Down
3 changes: 1 addition & 2 deletions e2e/node/basic/mainnet.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,9 @@ const createWhoamiActor = async (identity: Identity) => {
describe('certified query', () => {
it('should verify a query certificate', async () => {
const actor = await createWhoamiActor(new AnonymousIdentity());

const result = await actor.whoami();
expect(Principal.from(result)).toBeInstanceOf(Principal);
}, 100_000);
}, 10_000);
it('should verify lots of query certificates', async () => {
let count = 1;
const identities = Array.from({ length: 20 }).map(() => {
Expand Down
3 changes: 3 additions & 0 deletions e2e/node/basic/watermark.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ test('replay attack', async () => {
const actor = await createActor({
verifyQuerySignatures: true,
fetch: fetchProxy.fetch.bind(fetchProxy),
backoffStrategy: () => ({
next: () => 0,
}),
});

const agent = Actor.agentOf(actor) as HttpAgent;
Expand Down
2 changes: 1 addition & 1 deletion packages/agent/src/actor.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import { CallRequest, SubmitRequestType, UnSigned } from './agent/http/types';
import * as cbor from './cbor';
import { requestIdOf } from './request_id';
import * as pollingImport from './polling';
import { Actor, ActorConfig } from './actor';
import { ActorConfig } from './actor';

const importActor = async (mockUpdatePolling?: () => void) => {
jest.dontMock('./polling');
Expand Down
9 changes: 7 additions & 2 deletions packages/agent/src/agent/http/http.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import { AnonymousIdentity, SignIdentity, toHex } from '../..';
import { Ed25519KeyIdentity } from '@dfinity/identity';
import { AgentError } from '../../errors';
import { AgentHTTPResponseError } from './errors';
import { ExponentialBackoff } from '../../polling/backoff';
const { window } = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`);
window.fetch = global.fetch;
(global as any).window = window;
Expand Down Expand Up @@ -46,7 +47,6 @@ beforeEach(() => {
global.fetch = originalFetch;
});


afterEach(() => {
global.Date.now = originalDateNowFn;
global.window = originalWindow;
Expand Down Expand Up @@ -781,11 +781,16 @@ describe('default host', () => {
});

test('retry requests that fail due to a network failure', async () => {
jest.useRealTimers();
const mockFetch: jest.Mock = jest.fn(() => {
throw new Error('Network failure');
});

const agent = new HttpAgent({ host: HTTP_AGENT_HOST, fetch: mockFetch });
const agent = new HttpAgent({
host: HTTP_AGENT_HOST,
fetch: mockFetch,
});

try {
await agent.call(Principal.managementCanister(), {
methodName: 'test',
Expand Down
160 changes: 116 additions & 44 deletions packages/agent/src/agent/http/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ import { ExpirableMap } from '../../utils/expirableMap';
import { Ed25519PublicKey } from '../../public_key';
import { decodeTime } from '../../utils/leb';
import { ObservableLog } from '../../observable';

import { BackoffStrategy, BackoffStrategyFactory, ExponentialBackoff } from '../../polling/backoff';
export * from './transforms';
export { Nonce, makeNonce } from './types';

Expand Down Expand Up @@ -124,6 +124,10 @@ export interface HttpAgentOptions {
* @default 3
*/
retryTimes?: number;
/**
* The strategy to use for backoff when retrying requests
*/
backoffStrategy?: BackoffStrategyFactory;
/**
* Whether the agent should verify signatures signed by node keys on query responses. Increases security, but adds overhead and must make a separate request to cache the node keys for the canister's subnet.
* @default true
Expand Down Expand Up @@ -189,7 +193,8 @@ export class HttpAgent implements Agent {
private readonly _host: URL;
private readonly _credentials: string | undefined;
private _rootKeyFetched = false;
private readonly _retryTimes; // Retry requests N times before erroring by default
#retryTimes; // Retry requests N times before erroring by default
#backoffStrategy: BackoffStrategyFactory;
public readonly _isAgent = true;

// The UTC time in milliseconds when the latest request was made
Expand Down Expand Up @@ -269,7 +274,13 @@ export class HttpAgent implements Agent {
this.#verifyQuerySignatures = options.verifyQuerySignatures;
}
// Default is 3
this._retryTimes = options.retryTimes ?? 3;
this.#retryTimes = options.retryTimes ?? 3;
// Delay strategy for retries. Default is exponential backoff
const defaultBackoffFactory = () =>
new ExponentialBackoff({
maxIterations: this.#retryTimes,
});
this.#backoffStrategy = options.backoffStrategy ?? defaultBackoffFactory;
krpeacock marked this conversation as resolved.
Show resolved Hide resolved
// Rewrite to avoid redirects
if (this._host.hostname.endsWith(IC0_SUB_DOMAIN)) {
this._host.hostname = IC0_DOMAIN;
Expand Down Expand Up @@ -402,13 +413,17 @@ export class HttpAgent implements Agent {

// Run both in parallel. The fetch is quite expensive, so we have plenty of time to
// calculate the requestId locally.
const request = this._requestAndRetry(() =>
this._fetch('' + new URL(`/api/v2/canister/${ecid.toText()}/call`, this._host), {
...this._callOptions,
...transformedRequest.request,
body,
}),
);
const backoff = this.#backoffStrategy();
const request = this.#requestAndRetry({
request: () =>
this._fetch('' + new URL(`/api/v2/canister/${ecid.toText()}/call`, this._host), {
...this._callOptions,
...transformedRequest.request,
body,
}),
backoff,
tries: 0,
});

const [response, requestId] = await Promise.all([request, requestIdOf(submit)]);

Expand All @@ -429,19 +444,42 @@ export class HttpAgent implements Agent {
};
}

async #requestAndRetryQuery(
args: {
ecid: Principal;
transformedRequest: HttpAgentRequest;
body: ArrayBuffer;
requestId: RequestId;
},
tries = 0,
): Promise<ApiQueryResponse> {
const { ecid, transformedRequest, body, requestId } = args;
async #requestAndRetryQuery(args: {
ecid: Principal;
transformedRequest: HttpAgentRequest;
body: ArrayBuffer;
requestId: RequestId;
backoff: BackoffStrategy;
tries: number;
}): Promise<ApiQueryResponse> {
const { ecid, transformedRequest, body, requestId, backoff, tries } = args;

const delay = tries === 0 ? 0 : backoff.next();
this.log(`fetching "/api/v2/canister/${ecid.toString()}/query" with tries:`, {
tries,
backoff,
delay,
});

// If delay is null, the backoff strategy is exhausted due to a maximum number of retries, duration, or other reason
if (delay === null) {
throw new AgentError(
`Timestamp failed to pass the watermark after retrying the configured ${
this.#retryTimes
} times. We cannot guarantee the integrity of the response since it could be a replay attack.`,
);
}

if (delay > 0) {
await new Promise(resolve => setTimeout(resolve, delay));
}
let response: ApiQueryResponse;
// Make the request and retry if it throws an error
try {
this.log(
`fetching "/api/v2/canister/${ecid.toString()}/query" with request:`,
transformedRequest,
);
const fetchResponse = await this._fetch(
'' + new URL(`/api/v2/canister/${ecid.toString()}/query`, this._host),
{
Expand Down Expand Up @@ -476,13 +514,13 @@ export class HttpAgent implements Agent {
);
}
} catch (error) {
if (tries < this._retryTimes) {
if (tries < this.#retryTimes) {
this.log.warn(
`Caught exception while attempting to make query:\n` +
` ${error}\n` +
` Retrying query.`,
);
return await this.#requestAndRetryQuery(args, tries + 1);
return await this.#requestAndRetryQuery({ ...args, tries: tries + 1 });
}
throw error;
}
Expand Down Expand Up @@ -515,31 +553,54 @@ export class HttpAgent implements Agent {
timestamp,
waterMark: this.waterMark,
});
if (tries < this._retryTimes) {
return await this.#requestAndRetryQuery(args, tries + 1);
if (tries < this.#retryTimes) {
return await this.#requestAndRetryQuery({ ...args, tries: tries + 1 });
}
{
throw new AgentError(
`Timestamp failed to pass the watermark after retrying the configured ${this._retryTimes} times. We cannot guarantee the integrity of the response since it could be a replay attack.`,
`Timestamp failed to pass the watermark after retrying the configured ${
this.#retryTimes
} times. We cannot guarantee the integrity of the response since it could be a replay attack.`,
);
}
}

return response;
}

private async _requestAndRetry(request: () => Promise<Response>, tries = 0): Promise<Response> {
async #requestAndRetry(args: {
request: () => Promise<Response>;
backoff: BackoffStrategy;
tries: number;
}): Promise<Response> {
const { request, backoff, tries } = args;
const delay = tries === 0 ? 0 : backoff.next();

// If delay is null, the backoff strategy is exhausted due to a maximum number of retries, duration, or other reason
if (delay === null) {
throw new AgentError(
`Timestamp failed to pass the watermark after retrying the configured ${
this.#retryTimes
} times. We cannot guarantee the integrity of the response since it could be a replay attack.`,
);
}

if (delay > 0) {
await new Promise(resolve => setTimeout(resolve, delay));
}

let response: Response;
try {
response = await request();
} catch (error) {
if (this._retryTimes > tries) {
if (this.#retryTimes > tries) {
this.log.warn(
`Caught exception while attempting to make request:\n` +
` ${error}\n` +
` Retrying request.`,
);
return await this._requestAndRetry(request, tries + 1);
// Delay the request by the configured backoff strategy
return await this.#requestAndRetry({ request, backoff, tries: tries + 1 });
}
throw error;
}
Expand All @@ -553,11 +614,9 @@ export class HttpAgent implements Agent {
` Code: ${response.status} (${response.statusText})\n` +
` Body: ${responseText}\n`;

if (this._retryTimes > tries) {
this.log.warn(errorMessage + ` Retrying request.`);
return await this._requestAndRetry(request, tries + 1);
if (tries < this.#retryTimes) {
return await this.#requestAndRetry({ request, backoff, tries: tries + 1 });
}

throw new AgentHTTPResponseError(errorMessage, {
ok: response.ok,
status: response.status,
Expand All @@ -571,6 +630,7 @@ export class HttpAgent implements Agent {
fields: QueryFields,
identity?: Identity | Promise<Identity>,
): Promise<ApiQueryResponse> {
const backoff = this.#backoffStrategy();
const ecid = fields.effectiveCanisterId
? Principal.from(fields.effectiveCanisterId)
: Principal.from(canisterId);
Expand Down Expand Up @@ -624,6 +684,8 @@ export class HttpAgent implements Agent {
transformedRequest,
body,
requestId,
backoff,
tries: 0,
};

return await this.#requestAndRetryQuery(args);
Expand All @@ -641,7 +703,6 @@ export class HttpAgent implements Agent {
return this.#subnetKeys.get(ecid.toString());
};
// Attempt to make the query i=retryTimes times
// eslint-disable-next-line @typescript-eslint/no-unused-vars
// Make query and fetch subnet keys in parallel
const [query, subnetStatus] = await Promise.all([makeQuery(), getSubnetStatus()]);

Expand Down Expand Up @@ -796,13 +857,21 @@ export class HttpAgent implements Agent {
transformedRequest,
);
// TODO - https://dfinity.atlassian.net/browse/SDK-1092
const response = await this._requestAndRetry(() =>
this._fetch('' + new URL(`/api/v2/canister/${canister}/read_state`, this._host), {
...this._fetchOptions,
...transformedRequest.request,
body,
}),
);
const backoff = this.#backoffStrategy();

const response = await this.#requestAndRetry({
request: () =>
this._fetch(
'' + new URL(`/api/v2/canister/${canister.toString()}/read_state`, this._host),
{
...this._fetchOptions,
...transformedRequest.request,
body,
},
),
backoff,
tries: 0,
});

if (!response.ok) {
throw new Error(
Expand Down Expand Up @@ -887,10 +956,13 @@ export class HttpAgent implements Agent {
: {};

this.log(`fetching "/api/v2/status"`);
const response = await this._requestAndRetry(() =>
this._fetch('' + new URL(`/api/v2/status`, this._host), { headers, ...this._fetchOptions }),
);

const backoff = this.#backoffStrategy();
const response = await this.#requestAndRetry({
backoff,
request: () =>
this._fetch('' + new URL(`/api/v2/status`, this._host), { headers, ...this._fetchOptions }),
tries: 0,
});
return cbor.decode(await response.arrayBuffer());
}

Expand Down
6 changes: 6 additions & 0 deletions packages/agent/src/fetch_candid.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { IDL } from '@dfinity/candid';
import * as cbor from './cbor';

test('simulate fetching a Candid interface', async () => {
jest.useRealTimers();
const mockFetch = jest.fn().mockImplementation((/*resource, init*/) => {
return Promise.resolve(
new Response(
Expand All @@ -23,6 +24,11 @@ test('simulate fetching a Candid interface', async () => {
fetch: mockFetch,
host: 'http://127.0.0.1',
verifyQuerySignatures: false,
backoffStrategy: () => {
return {
next: () => 0,
};
},
});

const candid = await fetchCandid('ryjl3-tyaaa-aaaaa-aaaba-cai', agent);
Expand Down
Loading
Loading