From b2877243bd8a724a651650debeacc7817d9585bb Mon Sep 17 00:00:00 2001 From: Ronit Agarwala Date: Tue, 21 Nov 2023 11:19:27 -0500 Subject: [PATCH 1/6] Add batch indexing Helps reduce single request load and avoids timeout errors from JS client --- data.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/data.ts b/data.ts index a1d2f8d..ff8fee2 100644 --- a/data.ts +++ b/data.ts @@ -44,6 +44,11 @@ export async function populate(path: string, opts: ClientOptions) { const data = await getData(path) if (data) { const client = new Client(opts) - await client.bulk({ body: data }) + const batch_size = 10 + for (let i = 0; i < data.length; i += batch_size) { + const batch = data.slice(i, i + batch_size) + await client.bulk({ body: batch }) + console.log(`Indexed ${batch.length + i} records`) + } } } From fd281347ae53833838877e3d56cb99a70956e054 Mon Sep 17 00:00:00 2001 From: Ronit Agarwala Date: Tue, 21 Nov 2023 13:07:15 -0500 Subject: [PATCH 2/6] Change batch index loop -Switch to Array.forEach method for faster indexing -Use lodash chunk method for batching --- data.ts | 8 ++++---- package-lock.json | 6 ++++++ package.json | 1 + 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/data.ts b/data.ts index ff8fee2..cf60cd2 100644 --- a/data.ts +++ b/data.ts @@ -12,6 +12,7 @@ import { pathToFileURL } from 'url' import { Client } from '@opensearch-project/opensearch' import type { ClientOptions } from '@opensearch-project/opensearch' import { exists } from './paths' +import _ from 'lodash' const jsonFilename = 'sandbox-search.json' const jsFilename = 'sandbox-search.js' @@ -45,10 +46,9 @@ export async function populate(path: string, opts: ClientOptions) { if (data) { const client = new Client(opts) const batch_size = 10 - for (let i = 0; i < data.length; i += batch_size) { - const batch = data.slice(i, i + batch_size) + const batches = _.chunk(data, batch_size) + batches.forEach(async (batch: object[]) => { await client.bulk({ body: batch }) - console.log(`Indexed ${batch.length + i} records`) - } + }) } } diff --git a/package-lock.json b/package-lock.json index bc0ed54..b02e4ec 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,6 +11,7 @@ "dependencies": { "@opensearch-project/opensearch": "^2.2.0", "env-paths": "^3.0.0", + "lodash": "^4.17.21", "make-fetch-happen": "^11.0.3", "rimraf": "^4.1.2", "tar": "^6.1.13", @@ -2798,6 +2799,11 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/lodash": { + "version": "4.17.21", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", + "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==" + }, "node_modules/lodash.merge": { "version": "4.6.2", "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz", diff --git a/package.json b/package.json index bc7b329..209eaa0 100644 --- a/package.json +++ b/package.json @@ -27,6 +27,7 @@ "dependencies": { "@opensearch-project/opensearch": "^2.2.0", "env-paths": "^3.0.0", + "lodash": "^4.17.21", "make-fetch-happen": "^11.0.3", "rimraf": "^4.1.2", "tar": "^6.1.13", From 3ee6af81128b4d6d6953d9bf698f4f5e3b616a34 Mon Sep 17 00:00:00 2001 From: ronitagarwala01 <34790361+ronitagarwala01@users.noreply.github.com> Date: Tue, 21 Nov 2023 14:44:10 -0500 Subject: [PATCH 3/6] Update data.ts Import only chunk from lodash Co-authored-by: Leo Singer --- data.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data.ts b/data.ts index cf60cd2..215e4ea 100644 --- a/data.ts +++ b/data.ts @@ -12,7 +12,7 @@ import { pathToFileURL } from 'url' import { Client } from '@opensearch-project/opensearch' import type { ClientOptions } from '@opensearch-project/opensearch' import { exists } from './paths' -import _ from 'lodash' +import chunk from 'lodash/chunk' const jsonFilename = 'sandbox-search.json' const jsFilename = 'sandbox-search.js' From 88c59696e576a922419c71d913eb2e198e6641e8 Mon Sep 17 00:00:00 2001 From: Ronit Agarwala Date: Tue, 21 Nov 2023 14:49:02 -0500 Subject: [PATCH 4/6] Change chunk call in data.ts --- data.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data.ts b/data.ts index 215e4ea..d6324e6 100644 --- a/data.ts +++ b/data.ts @@ -46,7 +46,7 @@ export async function populate(path: string, opts: ClientOptions) { if (data) { const client = new Client(opts) const batch_size = 10 - const batches = _.chunk(data, batch_size) + const batches = chunk(data, batch_size) batches.forEach(async (batch: object[]) => { await client.bulk({ body: batch }) }) From 773cfe3a1461518a41471db7f291b2066540b425 Mon Sep 17 00:00:00 2001 From: Leo Singer Date: Tue, 21 Nov 2023 14:55:17 -0500 Subject: [PATCH 5/6] Fix types for batches --- data.ts | 4 ++-- package-lock.json | 7 +++++++ package.json | 1 + 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/data.ts b/data.ts index d6324e6..7f439a4 100644 --- a/data.ts +++ b/data.ts @@ -17,7 +17,7 @@ import chunk from 'lodash/chunk' const jsonFilename = 'sandbox-search.json' const jsFilename = 'sandbox-search.js' -async function getData(path: string) { +async function getData(path: string): Promise { let result const jsonPath = join(path, jsonFilename) const jsPath = join(path, jsFilename) @@ -47,7 +47,7 @@ export async function populate(path: string, opts: ClientOptions) { const client = new Client(opts) const batch_size = 10 const batches = chunk(data, batch_size) - batches.forEach(async (batch: object[]) => { + batches.forEach(async (batch) => { await client.bulk({ body: batch }) }) } diff --git a/package-lock.json b/package-lock.json index b02e4ec..91d575b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -21,6 +21,7 @@ "devDependencies": { "@nasa-gcn/eslint-config-gitignore": "^0.0.1", "@tsconfig/node14": "^1.0.3", + "@types/lodash": "^4.14.202", "@types/make-fetch-happen": "^10.0.1", "@types/node": "^18.13.0", "@types/tar": "^6.1.4", @@ -545,6 +546,12 @@ "integrity": "sha512-wOuvG1SN4Us4rez+tylwwwCV1psiNVOkJeM3AUWUNWg/jDQY2+HE/444y5gc+jBmRqASOm2Oeh5c1axHobwRKQ==", "dev": true }, + "node_modules/@types/lodash": { + "version": "4.14.202", + "resolved": "https://registry.npmjs.org/@types/lodash/-/lodash-4.14.202.tgz", + "integrity": "sha512-OvlIYQK9tNneDlS0VN54LLd5uiPCBOp7gS5Z0f1mjoJYBrtStzgmJBxONW3U6OZqdtNzZPmn9BS/7WI7BFFcFQ==", + "dev": true + }, "node_modules/@types/make-fetch-happen": { "version": "10.0.1", "resolved": "https://registry.npmjs.org/@types/make-fetch-happen/-/make-fetch-happen-10.0.1.tgz", diff --git a/package.json b/package.json index 209eaa0..f1ba9cf 100644 --- a/package.json +++ b/package.json @@ -37,6 +37,7 @@ "devDependencies": { "@nasa-gcn/eslint-config-gitignore": "^0.0.1", "@tsconfig/node14": "^1.0.3", + "@types/lodash": "^4.14.202", "@types/make-fetch-happen": "^10.0.1", "@types/node": "^18.13.0", "@types/tar": "^6.1.4", From fe598f71049c5898ab0a3e289651dddc13299a25 Mon Sep 17 00:00:00 2001 From: Ronit Agarwala Date: Wed, 22 Nov 2023 10:16:47 -0500 Subject: [PATCH 6/6] Change forEach to for..of for index population. --- data.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data.ts b/data.ts index 7f439a4..950f13f 100644 --- a/data.ts +++ b/data.ts @@ -47,8 +47,8 @@ export async function populate(path: string, opts: ClientOptions) { const client = new Client(opts) const batch_size = 10 const batches = chunk(data, batch_size) - batches.forEach(async (batch) => { + for (const batch of batches) { await client.bulk({ body: batch }) - }) + } } }