diff --git a/src/discovery/Discovery.ts b/src/discovery/Discovery.ts index 050bade762..f28bb80506 100644 --- a/src/discovery/Discovery.ts +++ b/src/discovery/Discovery.ts @@ -624,18 +624,28 @@ class Discovery { identityId, ctx, ); + let lastProviderPaginationToken = await this.gestaltGraph + .getIdentity(providerIdentityId) + .then((identity) => identity?.lastProviderPaginationToken); // If we don't have identity info, simply skip this vertex if (vertexIdentityInfo == null) { return; } // Getting and verifying claims - const claims = await this.verifyIdentityClaims(providerId, identityId, ctx); - // The verifyIdentityClaims takes too long, causing the ctx.signal to abort + const { + identityClaims, + lastProviderPaginationToken: lastProviderPaginationToken_, + } = await this.verifyIdentityClaims( + providerId, + identityId, + lastProviderPaginationToken, + ctx, + ); + lastProviderPaginationToken = lastProviderPaginationToken_; + const isAborted = ctx.signal.aborted; // Link the identity with each node from its claims on the provider - // Iterate over each of the claims - for (const [claimId, claim] of Object.entries(claims)) { - // Since ctx.signal.aborted haqs already aborted, no claims ever get processed - if (ctx.signal.aborted) throw ctx.signal.reason; + // Iterate over each of the claims, even if ctx has aborted + for (const [claimId, claim] of Object.entries(identityClaims)) { // Claims on an identity provider will always be node -> identity // So just cast payload data as such const linkedVertexNodeId = nodesUtils.decodeNodeId(claim.payload.iss); @@ -646,7 +656,10 @@ class Discovery { }; await this.gestaltGraph.linkNodeAndIdentity( linkedVertexNodeInfo, - vertexIdentityInfo, + { + ...vertexIdentityInfo, + lastProviderPaginationToken, + }, { claim: claim, meta: { @@ -671,6 +684,11 @@ class Discovery { ); } } + // Throw after we have processed the node claims if the signal aborted whilst running verifyIdentityClaims + if (isAborted) { + throw ctx.signal.reason; + } + // Only setVertexProcessedTime if we have succeeded in processing all identities await this.gestaltGraph.setVertexProcessedTime( ['identity', providerIdentityId], Date.now(), @@ -838,22 +856,33 @@ class Discovery { * Helper function to retrieve and verify the claims of an identity on a given * provider. Connects with each node the identity claims to be linked with, * and verifies the claim with the public key of the node. + * + * This method never throws if ctx has aborted, opting instead to return early + * with a lastProviderPaginationToken so that the caller can process the partially + * requested Claims as well as resume progress when calling again. */ protected async verifyIdentityClaims( providerId: ProviderId, identityId: IdentityId, + providerPaginationToken: ProviderPaginationToken | undefined, ctx: ContextTimed, - ): Promise>> { + ): Promise<{ + identityClaims: Record< + ProviderIdentityClaimId, + SignedClaim + >; + lastProviderPaginationToken?: ProviderPaginationToken; + }> { const provider = this.identitiesManager.getProvider(providerId); // If we don't have this provider, no identity info to find if (provider == null) { - return {}; + return { identityClaims: {} }; } // Get our own auth identity id const authIdentityIds = await provider.getAuthIdentityIds(); // If we don't have one then we can't request data so just skip if (authIdentityIds.length === 0 || authIdentityIds[0] == null) { - return {}; + return { identityClaims: {} }; } const authIdentityId = authIdentityIds[0]; const identityClaims: Record< @@ -861,7 +890,6 @@ class Discovery { SignedClaim > = {}; - let nextPaginationToken: ProviderPaginationToken | undefined; const identitySignedClaimDb = ( identitySignedClaim: IdentitySignedClaim, ) => { @@ -877,6 +905,8 @@ class Discovery { identityClaims[identitySignedClaim.id] = claim; } }; + let nextPaginationToken: ProviderPaginationToken | undefined = + providerPaginationToken; while (true) { // Refresh before each request made with identitySignedClaimGenerator ctx.timer.refresh(); @@ -892,7 +922,10 @@ class Discovery { // 1. throw if the getClaimIdsPage takes too much time // 2. the rest of this loop iteration takes too much time if (ctx.signal.aborted) { - throw ctx.signal.reason; + return { + identityClaims: identityClaims, + lastProviderPaginationToken: nextPaginationToken, + }; } const claimId = wrapper.claimId; nextPaginationToken = wrapper.nextPaginationToken; @@ -920,7 +953,10 @@ class Discovery { // 1. throw if the getClaimIdsPage takes too much time // 2. the rest of this loop iteration takes too much time if (ctx.signal.aborted) { - throw ctx.signal.reason; + return { + identityClaims: identityClaims, + lastProviderPaginationToken: nextPaginationToken, + }; } nextPaginationToken = wrapper.nextPaginationToken; // Claims on an identity provider will always be node -> identity @@ -931,7 +967,9 @@ class Discovery { break; } } - return identityClaims; + return { + identityClaims, + }; } /** diff --git a/src/gestalts/types.ts b/src/gestalts/types.ts index 294b6e30ce..1cfb039f24 100644 --- a/src/gestalts/types.ts +++ b/src/gestalts/types.ts @@ -9,6 +9,7 @@ import type { } from '../ids/types'; import type { SignedClaim, SignedClaimJSON } from '../claims/types'; import type { ClaimLinkIdentity, ClaimLinkNode } from '../claims/payloads'; +import type { ProviderPaginationToken } from '../identities/types'; const gestaltActions = ['notify', 'scan', 'claim'] as const; @@ -41,6 +42,11 @@ type GestaltIdentityInfo = { name?: string; email?: string; url?: string; + /** + * Used to determine the last pagination token to resume + * from when discovering the identity vertex + */ + lastProviderPaginationToken?: ProviderPaginationToken; // The `undefined` is a hack to include the optional reserved properties [key: string]: JSONValue | undefined; };