Skip to content

Commit

Permalink
feat: retry delay strategy (#871)
Browse files Browse the repository at this point in the history
* feat: retry delay strategy

* renaming backoffStrategy

* wip

* unit tests passing

* e2e tests aren't finishing

* wip

* fixes to retry strategy (no delay on first try)

* same null backoff strategy for queries as calls, clean up

* more cleanup

* Update packages/agent/src/agent/http/index.ts
  • Loading branch information
krpeacock authored May 1, 2024
1 parent b699f2c commit a38f3d5
Show file tree
Hide file tree
Showing 10 changed files with 290 additions and 50 deletions.
4 changes: 3 additions & 1 deletion docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,16 @@
### Added

- chore: adds required `npm audit` check to PRs
- new `HttpAgent` option: `backoffStrategy` - allows you to set a custom delay strategy for retries. The default is a newly exported `exponentialBackoff`, but you can pass your own function to customize the delay between retries.

### Changed

- chore: upgrades github actions to v4
- fix: retry logic now includes delays with exponential backoff matching the dfx strategy. Retries should no longer happen too quickly for the replica to catch up.

## [1.2.1] - 2024-04-25

### Changed
### Added

- feat: make `IdbStorage` `get/set` methods generic
- chore: add context to errors thrown when failing to decode CBOR values.
Expand Down
3 changes: 1 addition & 2 deletions e2e/node/basic/mainnet.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,9 @@ const createWhoamiActor = async (identity: Identity) => {
describe('certified query', () => {
it('should verify a query certificate', async () => {
const actor = await createWhoamiActor(new AnonymousIdentity());

const result = await actor.whoami();
expect(Principal.from(result)).toBeInstanceOf(Principal);
}, 100_000);
}, 10_000);
it('should verify lots of query certificates', async () => {
let count = 1;
const identities = Array.from({ length: 20 }).map(() => {
Expand Down
3 changes: 3 additions & 0 deletions e2e/node/basic/watermark.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ test('replay attack', async () => {
const actor = await createActor({
verifyQuerySignatures: true,
fetch: fetchProxy.fetch.bind(fetchProxy),
backoffStrategy: () => ({
next: () => 0,
}),
});

const agent = Actor.agentOf(actor) as HttpAgent;
Expand Down
2 changes: 1 addition & 1 deletion packages/agent/src/actor.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import { CallRequest, SubmitRequestType, UnSigned } from './agent/http/types';
import * as cbor from './cbor';
import { requestIdOf } from './request_id';
import * as pollingImport from './polling';
import { Actor, ActorConfig } from './actor';
import { ActorConfig } from './actor';

const importActor = async (mockUpdatePolling?: () => void) => {
jest.dontMock('./polling');
Expand Down
9 changes: 7 additions & 2 deletions packages/agent/src/agent/http/http.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import { AnonymousIdentity, SignIdentity, toHex } from '../..';
import { Ed25519KeyIdentity } from '@dfinity/identity';
import { AgentError } from '../../errors';
import { AgentHTTPResponseError } from './errors';
import { ExponentialBackoff } from '../../polling/backoff';
const { window } = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`);
window.fetch = global.fetch;
(global as any).window = window;
Expand Down Expand Up @@ -46,7 +47,6 @@ beforeEach(() => {
global.fetch = originalFetch;
});


afterEach(() => {
global.Date.now = originalDateNowFn;
global.window = originalWindow;
Expand Down Expand Up @@ -781,11 +781,16 @@ describe('default host', () => {
});

test('retry requests that fail due to a network failure', async () => {
jest.useRealTimers();
const mockFetch: jest.Mock = jest.fn(() => {
throw new Error('Network failure');
});

const agent = new HttpAgent({ host: HTTP_AGENT_HOST, fetch: mockFetch });
const agent = new HttpAgent({
host: HTTP_AGENT_HOST,
fetch: mockFetch,
});

try {
await agent.call(Principal.managementCanister(), {
methodName: 'test',
Expand Down
160 changes: 116 additions & 44 deletions packages/agent/src/agent/http/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ import { ExpirableMap } from '../../utils/expirableMap';
import { Ed25519PublicKey } from '../../public_key';
import { decodeTime } from '../../utils/leb';
import { ObservableLog } from '../../observable';

import { BackoffStrategy, BackoffStrategyFactory, ExponentialBackoff } from '../../polling/backoff';
export * from './transforms';
export { Nonce, makeNonce } from './types';

Expand Down Expand Up @@ -124,6 +124,10 @@ export interface HttpAgentOptions {
* @default 3
*/
retryTimes?: number;
/**
* The strategy to use for backoff when retrying requests
*/
backoffStrategy?: BackoffStrategyFactory;
/**
* Whether the agent should verify signatures signed by node keys on query responses. Increases security, but adds overhead and must make a separate request to cache the node keys for the canister's subnet.
* @default true
Expand Down Expand Up @@ -189,7 +193,8 @@ export class HttpAgent implements Agent {
private readonly _host: URL;
private readonly _credentials: string | undefined;
private _rootKeyFetched = false;
private readonly _retryTimes; // Retry requests N times before erroring by default
#retryTimes; // Retry requests N times before erroring by default
#backoffStrategy: BackoffStrategyFactory;
public readonly _isAgent = true;

// The UTC time in milliseconds when the latest request was made
Expand Down Expand Up @@ -269,7 +274,13 @@ export class HttpAgent implements Agent {
this.#verifyQuerySignatures = options.verifyQuerySignatures;
}
// Default is 3
this._retryTimes = options.retryTimes ?? 3;
this.#retryTimes = options.retryTimes ?? 3;
// Delay strategy for retries. Default is exponential backoff
const defaultBackoffFactory = () =>
new ExponentialBackoff({
maxIterations: this.#retryTimes,
});
this.#backoffStrategy = options.backoffStrategy || defaultBackoffFactory;
// Rewrite to avoid redirects
if (this._host.hostname.endsWith(IC0_SUB_DOMAIN)) {
this._host.hostname = IC0_DOMAIN;
Expand Down Expand Up @@ -402,13 +413,17 @@ export class HttpAgent implements Agent {

// Run both in parallel. The fetch is quite expensive, so we have plenty of time to
// calculate the requestId locally.
const request = this._requestAndRetry(() =>
this._fetch('' + new URL(`/api/v2/canister/${ecid.toText()}/call`, this._host), {
...this._callOptions,
...transformedRequest.request,
body,
}),
);
const backoff = this.#backoffStrategy();
const request = this.#requestAndRetry({
request: () =>
this._fetch('' + new URL(`/api/v2/canister/${ecid.toText()}/call`, this._host), {
...this._callOptions,
...transformedRequest.request,
body,
}),
backoff,
tries: 0,
});

const [response, requestId] = await Promise.all([request, requestIdOf(submit)]);

Expand All @@ -429,19 +444,42 @@ export class HttpAgent implements Agent {
};
}

async #requestAndRetryQuery(
args: {
ecid: Principal;
transformedRequest: HttpAgentRequest;
body: ArrayBuffer;
requestId: RequestId;
},
tries = 0,
): Promise<ApiQueryResponse> {
const { ecid, transformedRequest, body, requestId } = args;
async #requestAndRetryQuery(args: {
ecid: Principal;
transformedRequest: HttpAgentRequest;
body: ArrayBuffer;
requestId: RequestId;
backoff: BackoffStrategy;
tries: number;
}): Promise<ApiQueryResponse> {
const { ecid, transformedRequest, body, requestId, backoff, tries } = args;

const delay = tries === 0 ? 0 : backoff.next();
this.log(`fetching "/api/v2/canister/${ecid.toString()}/query" with tries:`, {
tries,
backoff,
delay,
});

// If delay is null, the backoff strategy is exhausted due to a maximum number of retries, duration, or other reason
if (delay === null) {
throw new AgentError(
`Timestamp failed to pass the watermark after retrying the configured ${
this.#retryTimes
} times. We cannot guarantee the integrity of the response since it could be a replay attack.`,
);
}

if (delay > 0) {
await new Promise(resolve => setTimeout(resolve, delay));
}
let response: ApiQueryResponse;
// Make the request and retry if it throws an error
try {
this.log(
`fetching "/api/v2/canister/${ecid.toString()}/query" with request:`,
transformedRequest,
);
const fetchResponse = await this._fetch(
'' + new URL(`/api/v2/canister/${ecid.toString()}/query`, this._host),
{
Expand Down Expand Up @@ -476,13 +514,13 @@ export class HttpAgent implements Agent {
);
}
} catch (error) {
if (tries < this._retryTimes) {
if (tries < this.#retryTimes) {
this.log.warn(
`Caught exception while attempting to make query:\n` +
` ${error}\n` +
` Retrying query.`,
);
return await this.#requestAndRetryQuery(args, tries + 1);
return await this.#requestAndRetryQuery({ ...args, tries: tries + 1 });
}
throw error;
}
Expand Down Expand Up @@ -515,31 +553,54 @@ export class HttpAgent implements Agent {
timestamp,
waterMark: this.waterMark,
});
if (tries < this._retryTimes) {
return await this.#requestAndRetryQuery(args, tries + 1);
if (tries < this.#retryTimes) {
return await this.#requestAndRetryQuery({ ...args, tries: tries + 1 });
}
{
throw new AgentError(
`Timestamp failed to pass the watermark after retrying the configured ${this._retryTimes} times. We cannot guarantee the integrity of the response since it could be a replay attack.`,
`Timestamp failed to pass the watermark after retrying the configured ${
this.#retryTimes
} times. We cannot guarantee the integrity of the response since it could be a replay attack.`,
);
}
}

return response;
}

private async _requestAndRetry(request: () => Promise<Response>, tries = 0): Promise<Response> {
async #requestAndRetry(args: {
request: () => Promise<Response>;
backoff: BackoffStrategy;
tries: number;
}): Promise<Response> {
const { request, backoff, tries } = args;
const delay = tries === 0 ? 0 : backoff.next();

// If delay is null, the backoff strategy is exhausted due to a maximum number of retries, duration, or other reason
if (delay === null) {
throw new AgentError(
`Timestamp failed to pass the watermark after retrying the configured ${
this.#retryTimes
} times. We cannot guarantee the integrity of the response since it could be a replay attack.`,
);
}

if (delay > 0) {
await new Promise(resolve => setTimeout(resolve, delay));
}

let response: Response;
try {
response = await request();
} catch (error) {
if (this._retryTimes > tries) {
if (this.#retryTimes > tries) {
this.log.warn(
`Caught exception while attempting to make request:\n` +
` ${error}\n` +
` Retrying request.`,
);
return await this._requestAndRetry(request, tries + 1);
// Delay the request by the configured backoff strategy
return await this.#requestAndRetry({ request, backoff, tries: tries + 1 });
}
throw error;
}
Expand All @@ -553,11 +614,9 @@ export class HttpAgent implements Agent {
` Code: ${response.status} (${response.statusText})\n` +
` Body: ${responseText}\n`;

if (this._retryTimes > tries) {
this.log.warn(errorMessage + ` Retrying request.`);
return await this._requestAndRetry(request, tries + 1);
if (tries < this.#retryTimes) {
return await this.#requestAndRetry({ request, backoff, tries: tries + 1 });
}

throw new AgentHTTPResponseError(errorMessage, {
ok: response.ok,
status: response.status,
Expand All @@ -571,6 +630,7 @@ export class HttpAgent implements Agent {
fields: QueryFields,
identity?: Identity | Promise<Identity>,
): Promise<ApiQueryResponse> {
const backoff = this.#backoffStrategy();
const ecid = fields.effectiveCanisterId
? Principal.from(fields.effectiveCanisterId)
: Principal.from(canisterId);
Expand Down Expand Up @@ -624,6 +684,8 @@ export class HttpAgent implements Agent {
transformedRequest,
body,
requestId,
backoff,
tries: 0,
};

return await this.#requestAndRetryQuery(args);
Expand All @@ -641,7 +703,6 @@ export class HttpAgent implements Agent {
return this.#subnetKeys.get(ecid.toString());
};
// Attempt to make the query i=retryTimes times
// eslint-disable-next-line @typescript-eslint/no-unused-vars
// Make query and fetch subnet keys in parallel
const [query, subnetStatus] = await Promise.all([makeQuery(), getSubnetStatus()]);

Expand Down Expand Up @@ -796,13 +857,21 @@ export class HttpAgent implements Agent {
transformedRequest,
);
// TODO - https://dfinity.atlassian.net/browse/SDK-1092
const response = await this._requestAndRetry(() =>
this._fetch('' + new URL(`/api/v2/canister/${canister}/read_state`, this._host), {
...this._fetchOptions,
...transformedRequest.request,
body,
}),
);
const backoff = this.#backoffStrategy();

const response = await this.#requestAndRetry({
request: () =>
this._fetch(
'' + new URL(`/api/v2/canister/${canister.toString()}/read_state`, this._host),
{
...this._fetchOptions,
...transformedRequest.request,
body,
},
),
backoff,
tries: 0,
});

if (!response.ok) {
throw new Error(
Expand Down Expand Up @@ -887,10 +956,13 @@ export class HttpAgent implements Agent {
: {};

this.log(`fetching "/api/v2/status"`);
const response = await this._requestAndRetry(() =>
this._fetch('' + new URL(`/api/v2/status`, this._host), { headers, ...this._fetchOptions }),
);

const backoff = this.#backoffStrategy();
const response = await this.#requestAndRetry({
backoff,
request: () =>
this._fetch('' + new URL(`/api/v2/status`, this._host), { headers, ...this._fetchOptions }),
tries: 0,
});
return cbor.decode(await response.arrayBuffer());
}

Expand Down
6 changes: 6 additions & 0 deletions packages/agent/src/fetch_candid.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { IDL } from '@dfinity/candid';
import * as cbor from './cbor';

test('simulate fetching a Candid interface', async () => {
jest.useRealTimers();
const mockFetch = jest.fn().mockImplementation((/*resource, init*/) => {
return Promise.resolve(
new Response(
Expand All @@ -23,6 +24,11 @@ test('simulate fetching a Candid interface', async () => {
fetch: mockFetch,
host: 'http://127.0.0.1',
verifyQuerySignatures: false,
backoffStrategy: () => {
return {
next: () => 0,
};
},
});

const candid = await fetchCandid('ryjl3-tyaaa-aaaaa-aaaba-cai', agent);
Expand Down
Loading

0 comments on commit a38f3d5

Please sign in to comment.