subquery · stwiname · Nov 12, 2024 · Oct 30, 2024 · Oct 30, 2024 · Oct 30, 2024
diff --git a/packages/node-core/CHANGELOG.md b/packages/node-core/CHANGELOG.md
@@ -6,6 +6,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Fixed
+- When configuring multiple endpoints, poor network conditions may lead to block crawling delays. (#2572)
+
 ## [14.1.7] - 2024-10-30
 ### Changed
 - Bump `@subql/common` dependency

diff --git a/packages/node-core/src/indexer/connectionPool.service.spec.ts b/packages/node-core/src/indexer/connectionPool.service.spec.ts
@@ -2,7 +2,7 @@
 // SPDX-License-Identifier: GPL-3.0
 
 import {delay} from '@subql/common';
-import {ApiErrorType, ConnectionPoolStateManager, IApiConnectionSpecific, NodeConfig} from '..';
+import {ApiErrorType, ConnectionPoolStateManager, getLogger, IApiConnectionSpecific, NodeConfig} from '..';
 import {ConnectionPoolService} from './connectionPool.service';
 
 async function waitFor(conditionFn: () => boolean, timeout = 50000, interval = 100): Promise<void> {
@@ -127,4 +127,30 @@ describe('ConnectionPoolService', () => {
       expect(handleApiDisconnectsSpy).toHaveBeenCalledTimes(1);
     }, 15000);
   });
+
+  describe('Rate limit endpoint delay 20s', () => {
+    it('call delay', async () => {
+      const logger = getLogger('connection-pool');
+      const consoleSpy = jest.spyOn(logger, 'info');
+
+      await connectionPoolService.addToConnections(mockApiConnection, TEST_URL);
+      await connectionPoolService.addToConnections(mockApiConnection, `${TEST_URL}/2`);
+      await connectionPoolService.handleApiError(TEST_URL, {
+        name: 'timeout',
+        errorType: ApiErrorType.Timeout,
+        message: 'timeout error',
+      });
+      await connectionPoolService.handleApiError(`${TEST_URL}/2`, {
+        name: 'DefaultError',
+        errorType: ApiErrorType.Default,
+        message: 'Default error',
+      });
+      await (connectionPoolService as any).flushResultCache();
+
+      await connectionPoolService.api.fetchBlocks([34365]);
+
+      expect(consoleSpy).toHaveBeenCalledWith('throtling on ratelimited endpoint 20s');
+      consoleSpy.mockRestore();
+    }, 30000);
+  });
 });
diff --git a/packages/node-core/src/indexer/connectionPool.service.ts b/packages/node-core/src/indexer/connectionPool.service.ts
@@ -104,9 +104,9 @@ export class ConnectionPoolService<T extends IApiConnectionSpecific<any, any, an
             try {
               // Check if the endpoint is rate-limited
               if (await this.poolStateManager.getFieldValue(endpoint, 'rateLimited')) {
-                logger.info('throtling on ratelimited endpoint');
-                const backoffDelay = await this.poolStateManager.getFieldValue(endpoint, 'backoffDelay');
-                await delay(backoffDelay / 1000);
+                const rateLimitDelay = await this.poolStateManager.getFieldValue(endpoint, 'rateLimitDelay');
+                logger.info(`throtling on ratelimited endpoint ${rateLimitDelay / 1000}s`);
+                await delay(rateLimitDelay / 1000);
               }
 
               const start = Date.now();

diff --git a/packages/node-core/src/indexer/connectionPoolState.manager.spec.ts b/packages/node-core/src/indexer/connectionPoolState.manager.spec.ts
@@ -1,7 +1,8 @@
 // Copyright 2020-2024 SubQuery Pte Ltd authors & contributors
 // SPDX-License-Identifier: GPL-3.0
 
-import {ConnectionPoolStateManager} from './connectionPoolState.manager';
+import {ApiErrorType} from '../api.connection.error';
+import {ConnectionPoolItem, ConnectionPoolStateManager} from './connectionPoolState.manager';
 
 describe('ConnectionPoolStateManager', function () {
   let connectionPoolStateManager: ConnectionPoolStateManager<any>;
@@ -12,62 +13,54 @@ describe('ConnectionPoolStateManager', function () {
     connectionPoolStateManager = new ConnectionPoolStateManager();
   });
 
+  afterEach(async function () {
+    await connectionPoolStateManager.onApplicationShutdown();
+  });
+
   it('chooses primary endpoint first', async function () {
-    (connectionPoolStateManager as any).pool[EXAMPLE_ENDPOINT1] = {
-      primary: true,
-      performanceScore: 100,
-      failureCount: 0,
-      endpoint: '',
-      backoffDelay: 0,
-      rateLimited: false,
-      failed: false,
-      connected: true,
-      lastRequestTime: 0,
-    };
-
-    (connectionPoolStateManager as any).pool[EXAMPLE_ENDPOINT2] = {
-      primary: false,
-      performanceScore: 100,
-      failureCount: 0,
-      endpoint: '',
-      backoffDelay: 0,
-      rateLimited: false,
-      failed: false,
-      connected: true,
-      lastRequestTime: 0,
-    };
+    await connectionPoolStateManager.addToConnections(EXAMPLE_ENDPOINT1, true);
+    await connectionPoolStateManager.addToConnections(EXAMPLE_ENDPOINT2, false);
 
     expect(await connectionPoolStateManager.getNextConnectedEndpoint()).toEqual(EXAMPLE_ENDPOINT1);
   });
 
   it('does not choose primary endpoint if failed', async function () {
-    (connectionPoolStateManager as any).pool[EXAMPLE_ENDPOINT1] = {
-      primary: true,
-      performanceScore: 100,
-      failureCount: 0,
-      endpoint: '',
-      backoffDelay: 0,
-      rateLimited: false,
-      failed: false,
-      connected: false,
-      lastRequestTime: 0,
-    };
-
-    (connectionPoolStateManager as any).pool[EXAMPLE_ENDPOINT2] = {
-      primary: false,
-      performanceScore: 100,
-      failureCount: 0,
-      endpoint: '',
-      backoffDelay: 0,
-      rateLimited: false,
-      failed: false,
-      connected: true,
-      lastRequestTime: 0,
-    };
+    await connectionPoolStateManager.addToConnections(EXAMPLE_ENDPOINT1, true);
+    await connectionPoolStateManager.addToConnections(EXAMPLE_ENDPOINT2, false);
+
+    await connectionPoolStateManager.handleApiError(EXAMPLE_ENDPOINT1, ApiErrorType.Default);
 
     expect(await connectionPoolStateManager.getNextConnectedEndpoint()).toEqual(EXAMPLE_ENDPOINT2);
   });
 
+  it('All endpoints backoff; select a rateLimited endpoint. reason: ApiErrorType.Timeout', async function () {
+    await connectionPoolStateManager.addToConnections(EXAMPLE_ENDPOINT1, false);
+    await connectionPoolStateManager.addToConnections(EXAMPLE_ENDPOINT2, false);
+
+    await connectionPoolStateManager.handleApiError(EXAMPLE_ENDPOINT1, ApiErrorType.Default);
+    await connectionPoolStateManager.handleApiError(EXAMPLE_ENDPOINT2, ApiErrorType.Timeout);
+
+    const nextEndpoint = await connectionPoolStateManager.getNextConnectedEndpoint();
+    const endpointInfo = (connectionPoolStateManager as any).pool[EXAMPLE_ENDPOINT2] as ConnectionPoolItem<any>;
+    expect(nextEndpoint).toEqual(EXAMPLE_ENDPOINT2);
+    expect(endpointInfo.rateLimited).toBe(true);
+    expect((connectionPoolStateManager as any).pool[EXAMPLE_ENDPOINT2].rateLimitDelay).toBe(20 * 1000);
+  });
+
+  it('All endpoints backoff; select a rateLimited endpoint. reason: ApiErrorType.RateLimit', async function () {
+    await connectionPoolStateManager.addToConnections(EXAMPLE_ENDPOINT1, false);
+    await connectionPoolStateManager.addToConnections(EXAMPLE_ENDPOINT2, false);
+
+    await connectionPoolStateManager.handleApiError(EXAMPLE_ENDPOINT1, ApiErrorType.Default);
+    await connectionPoolStateManager.handleApiError(EXAMPLE_ENDPOINT2, ApiErrorType.RateLimit);
+
+    const nextEndpoint = await connectionPoolStateManager.getNextConnectedEndpoint();
+    const endpointInfo = (connectionPoolStateManager as any).pool[EXAMPLE_ENDPOINT2] as ConnectionPoolItem<any>;
+    expect(nextEndpoint).toEqual(EXAMPLE_ENDPOINT2);
+    expect(endpointInfo.rateLimited).toBe(true);
+    expect((connectionPoolStateManager as any).pool[EXAMPLE_ENDPOINT2].rateLimitDelay).toBe(20 * 1000);
+  });
+
   it('can calculate performance score for response time of zero', function () {
     const score = (connectionPoolStateManager as any).calculatePerformanceScore(0, 0);
     expect(score).not.toBeNaN();

diff --git a/packages/node-core/src/indexer/connectionPoolState.manager.ts b/packages/node-core/src/indexer/connectionPoolState.manager.ts
@@ -11,9 +11,11 @@ import {exitWithError} from '../process';
 import {errorTypeToScoreAdjustment} from './connectionPool.service';
 
 const RETRY_DELAY = 60 * 1000;
+const MAX_RETRY_DELAY = 60 * RETRY_DELAY;
 const MAX_FAILURES = 5;
 const RESPONSE_TIME_WEIGHT = 0.7;
 const FAILURE_WEIGHT = 0.3;
+const RATE_LIMIT_DELAY = 20 * 1000;
 
 export interface ConnectionPoolItem<T> {
   endpoint: string;
@@ -22,6 +24,7 @@ export interface ConnectionPoolItem<T> {
   backoffDelay: number;
   failureCount: number;
   rateLimited: boolean;
+  rateLimitDelay: number;
   failed: boolean;
   lastRequestTime: number;
   connected: boolean;
@@ -72,6 +75,7 @@ export class ConnectionPoolStateManager<T extends IApiConnectionSpecific<any, an
       endpoint,
       backoffDelay: 0,
       rateLimited: false,
+      rateLimitDelay: 0,
       failed: false,
       connected: true,
       lastRequestTime: 0,
@@ -191,13 +195,14 @@ export class ConnectionPoolStateManager<T extends IApiConnectionSpecific<any, an
   }
 
   //eslint-disable-next-line @typescript-eslint/require-await
-  async setTimeout(endpoint: string, delay: number): Promise<void> {
+  async setRecoverTimeout(endpoint: string, delay: number): Promise<void> {
     // Make sure there is no existing timeout
     await this.clearTimeout(endpoint);
 
     this.pool[endpoint].timeoutId = setTimeout(() => {
       this.pool[endpoint].backoffDelay = 0; // Reset backoff delay only if there are no consecutive errors
       this.pool[endpoint].rateLimited = false;
+      this.pool[endpoint].rateLimitDelay = 0;
       this.pool[endpoint].failed = false;
       this.pool[endpoint].timeoutId = undefined; // Clear the timeout ID
 
@@ -247,35 +252,42 @@ export class ConnectionPoolStateManager<T extends IApiConnectionSpecific<any, an
     switch (errorType) {
       case ApiErrorType.Connection: {
         if (this.pool[endpoint].connected) {
-          //handleApiDisconnects was already called if this is false
-          //this.handleApiDisconnects(endpoint);
+          // The connected status does not provide service. handleApiDisconnects() will be called to handle this.
           this.pool[endpoint].connected = false;
         }
         return;
       }
+      case ApiErrorType.Timeout:
+      case ApiErrorType.RateLimit: {
+        // The “rateLimited” status will be selected when no endpoints are available, so we should avoid setting a large delay.
+        this.pool[endpoint].rateLimited = true;
+        this.pool[endpoint].rateLimitDelay = RATE_LIMIT_DELAY;
+        break;
+      }
       case ApiErrorType.Default: {
-        const nextDelay = RETRY_DELAY * Math.pow(2, this.pool[endpoint].failureCount - 1); // Exponential backoff using failure count // Start with RETRY_DELAY and double on each failure
-        this.pool[endpoint].backoffDelay = nextDelay;
-
-        if (ApiErrorType.Timeout || ApiErrorType.RateLimit) {
-          this.pool[endpoint].rateLimited = true;
-        } else {
-          this.pool[endpoint].failed = true;
-        }
-
-        await this.setTimeout(endpoint, nextDelay);
-
-        logger.warn(
-          `Endpoint ${this.pool[endpoint].endpoint} experienced an error (${errorType}). Suspending for ${
-            nextDelay / 1000
-          }s.`
-        );
-        return;
+        // The “failed” status does not provide service.
+        this.pool[endpoint].failed = true;
+        break;
       }
       default: {
         throw new Error(`Unknown error type ${errorType}`);
       }
     }
+
+    const nextDelay = this.calculateNextDelay(this.pool[endpoint]);
+    this.pool[endpoint].backoffDelay = nextDelay;
+    await this.setRecoverTimeout(endpoint, nextDelay);
+
+    logger.warn(
+      `Endpoint ${this.pool[endpoint].endpoint} experienced an error (${errorType}). Suspending for ${
+        nextDelay / 1000
+      }s.`
+    );
+  }
+
+  private calculateNextDelay(poolItem: ConnectionPoolItem<T>): number {
+    // Exponential backoff using failure count, Start with RETRY_DELAY and double on each failure, MAX_RETRY_DELAY is the maximum delay
+    return Math.min(RETRY_DELAY * Math.pow(2, poolItem.failureCount - 1), MAX_RETRY_DELAY);
   }
 
   private calculatePerformanceScore(responseTime: number, failureCount: number): number {