From 8a32117d765aa4cfd596ae340d686ef6132e37ce Mon Sep 17 00:00:00 2001 From: Lezek123 Date: Thu, 24 Oct 2024 14:04:46 +0200 Subject: [PATCH 01/19] Gitignore .venv for localstack purposes --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index e050832a1d..567c6da55c 100644 --- a/.gitignore +++ b/.gitignore @@ -48,3 +48,5 @@ runtime-inputs/ devops/infrastructure joystream.tar.gz + +.venv \ No newline at end of file From 64852518d8a143b2b1633fe97de9a39e27c5f4a1 Mon Sep 17 00:00:00 2001 From: Lezek123 Date: Fri, 25 Oct 2024 15:31:42 +0200 Subject: [PATCH 02/19] Colossus: Archive script --- setup.sh | 4 +- storage-node/CHANGELOG.md | 4 + storage-node/README.md | 585 ++++++--- storage-node/package.json | 5 +- storage-node/src/commands/archive.ts | 349 +++++ storage-node/src/commands/server.ts | 52 +- .../src/services/archive/ArchiveService.ts | 635 +++++++++ .../src/services/archive/SevenZipService.ts | 55 + storage-node/src/services/archive/tasks.ts | 126 ++ storage-node/src/services/archive/tracking.ts | 162 +++ .../helpers/{moveFile.ts => filesystem.ts} | 12 + storage-node/src/services/logger.ts | 4 +- .../{sync => processing}/workingProcess.ts | 67 +- .../queryNode/queries/queries.graphql | 1 + storage-node/src/services/runtime/queries.ts | 5 + .../src/services/s3/AwsConnectionHandler.ts | 193 +++ .../src/services/s3/IConnectionHandler.ts | 60 + .../src/services/sync/acceptPendingObjects.ts | 2 +- .../src/services/sync/cleanupService.ts | 2 +- .../src/services/sync/storageObligations.ts | 39 +- .../src/services/sync/synchronizer.ts | 5 +- storage-node/src/services/sync/tasks.ts | 59 +- .../services/webApi/controllers/filesApi.ts | 2 +- yarn.lock | 1156 +++++++++++++++++ 24 files changed, 3288 insertions(+), 296 deletions(-) create mode 100644 storage-node/src/commands/archive.ts create mode 100644 storage-node/src/services/archive/ArchiveService.ts create mode 100644 storage-node/src/services/archive/SevenZipService.ts create mode 100644 storage-node/src/services/archive/tasks.ts create mode 100644 storage-node/src/services/archive/tracking.ts rename storage-node/src/services/helpers/{moveFile.ts => filesystem.ts} (72%) rename storage-node/src/services/{sync => processing}/workingProcess.ts (66%) create mode 100644 storage-node/src/services/s3/AwsConnectionHandler.ts create mode 100644 storage-node/src/services/s3/IConnectionHandler.ts diff --git a/setup.sh b/setup.sh index 797fa00d2a..552d72d5a8 100755 --- a/setup.sh +++ b/setup.sh @@ -8,7 +8,7 @@ if [[ "$OSTYPE" == "linux-gnu" ]]; then # code build tools sudo apt-get update -y sudo apt-get install -y coreutils clang llvm jq curl gcc xz-utils sudo pkg-config \ - unzip libc6-dev make libssl-dev python3 cmake protobuf-compiler libprotobuf-dev + unzip libc6-dev make libssl-dev python3 cmake protobuf-compiler libprotobuf-dev p7zip-full # Docker: do not replace existing installation to avoid distrupting running containers if ! command -v docker &> /dev/null @@ -23,7 +23,7 @@ elif [[ "$OSTYPE" == "darwin"* ]]; then fi # install additional packages brew update - brew install coreutils gnu-tar jq curl llvm gnu-sed cmake protobuf || : + brew install coreutils gnu-tar jq curl llvm gnu-sed cmake protobuf p7zip || : echo "It is recommended to setup Docker desktop from: https://www.docker.com/products/docker-desktop" echo "It is also recommended to install qemu emulators with following command:" echo "docker run --privileged --rm tonistiigi/binfmt --install all" diff --git a/storage-node/CHANGELOG.md b/storage-node/CHANGELOG.md index bba365321f..1c07b042b9 100644 --- a/storage-node/CHANGELOG.md +++ b/storage-node/CHANGELOG.md @@ -1,3 +1,7 @@ +### 4.3.0 + +- Adds `archive` mode / command, which allows downloading, compressing and uploading assigned data objects to an external S3 bucket that can be used as a backup. + ### 4.2.0 - Fix `util:cleanup` script (call `loadDataObjectIdCache` first) diff --git a/storage-node/README.md b/storage-node/README.md index 646f2c6302..52fa902d43 100644 --- a/storage-node/README.md +++ b/storage-node/README.md @@ -147,6 +147,7 @@ There is also an option to run Colossus as [Docker container](../colossus.Docker # CLI Commands +* [`storage-node archive`](#storage-node-archive) * [`storage-node help [COMMAND]`](#storage-node-help-command) * [`storage-node leader:cancel-invite`](#storage-node-leadercancel-invite) * [`storage-node leader:create-bucket`](#storage-node-leadercreate-bucket) @@ -171,6 +172,134 @@ There is also an option to run Colossus as [Docker container](../colossus.Docker * [`storage-node util:multihash`](#storage-node-utilmultihash) * [`storage-node util:verify-bag-id`](#storage-node-utilverify-bag-id) +## `storage-node archive` + +Starts running in a write-only, archive mode (no external API exposed). Downloads, compresses and uploads all assigned data objects to a specified S3 bucket. + +``` +USAGE + $ storage-node archive + +OPTIONS + -b, --buckets=buckets + [default: 1] Comma separated list of bucket IDs to sync. Buckets that are not assigned to + worker are ignored. + If not specified all buckets will be synced. + + -e, --elasticSearchEndpoint=elasticSearchEndpoint + Elasticsearch endpoint (e.g.: http://some.com:8081). + Log level could be set using the ELASTIC_LOG_LEVEL environment variable. + Supported values: warn, error, debug, info. Default:debug + + -h, --help + show CLI help + + -i, --syncInterval=syncInterval + [default: 20] Interval between synchronizations (in minutes) + + -k, --keyFile=keyFile + Path to key file to add to the keyring. + + -l, --logFilePath=logFilePath + Absolute path to the rolling log files. + + -m, --dev + Use development mode + + -n, --logMaxFileNumber=logMaxFileNumber + [default: 7] Maximum rolling log files number. + + -p, --password=password + Password to unlock keyfiles. Multiple passwords can be passed, to try against all files. If + not specified a single password can be set in ACCOUNT_PWD environment variable. + + -q, --storageSquidEndpoint=storageSquidEndpoint + (required) [default: http://localhost:4352/graphql] Storage Squid graphql server endpoint + (e.g.: http://some.com:4352/graphql) + + -r, --syncWorkersNumber=syncWorkersNumber + [default: 8] Sync workers number (max async operations in progress). + + -t, --syncWorkersTimeout=syncWorkersTimeout + [default: 30] Asset downloading timeout for the syncronization (in minutes). + + -u, --apiUrl=apiUrl + [default: ws://localhost:9944] Runtime API URL. Mandatory in non-dev environment. + + -w, --worker=worker + (required) Storage provider worker ID + + -x, --logMaxFileSize=logMaxFileSize + [default: 50000000] Maximum rolling log files size in bytes. + + -y, --accountUri=accountUri + Account URI (optional). If not specified a single key can be set in ACCOUNT_URI environment + variable. + + -z, --logFileChangeFrequency=(yearly|monthly|daily|hourly|none) + [default: daily] Log files update frequency. + + --archiveFileSizeLimitMB=archiveFileSizeLimitMB + [default: 1000] Try to avoid creating archive files larger than this size limit (in MB) + unless necessary + + --archiveTrackfileBackupFreqMinutes=archiveTrackfileBackupFreqMinutes + [default: 60] Determines how frequently the archive tracking file (containing information + about .7z files content) should be uploaded to S3 in case a change is detected. + + --awsS3BucketName=awsS3BucketName + (required) Name of the AWS S3 bucket where the files will be stored. + + --awsS3BucketRegion=awsS3BucketRegion + (required) AWS region of the AWS S3 bucket where the files will be stored. + + --elasticSearchIndexPrefix=elasticSearchIndexPrefix + [default: logs-colossus] Elasticsearch index prefix. Node ID will be appended to the prefix. + Default: logs-colossus. Can be passed through ELASTIC_INDEX_PREFIX environment variable. + + --elasticSearchPassword=elasticSearchPassword + Elasticsearch password for basic authentication. Can be passed through ELASTIC_PASSWORD + environment variable. + + --elasticSearchUser=elasticSearchUser + Elasticsearch user for basic authentication. Can be passed through ELASTIC_USER environment + variable. + + --keyStore=keyStore + Path to a folder with multiple key files to load into keystore. + + --localAgeTriggerThresholdMinutes=localAgeTriggerThresholdMinutes + [default: 1440] Compress and upload local data objects to S3 if the oldest of them was + downloaded more than X minutes ago + + --localCountTriggerThreshold=localCountTriggerThreshold + Compress and upload local data objects to S3 if the number of them reaches this threshold. + + --localSizeTriggerThresholdMB=localSizeTriggerThresholdMB + [default: 10000] Compress and upload local data objects to S3 if the combined size of them + reaches this threshold (in MB) + + --tmpDownloadDir=tmpDownloadDir + (required) Directory to store tempory files during sync (absolute path). + + --uploadQueueDir=uploadQueueDir + (required) Directory to store fully downloaded data objects before compressing them and + uploading to S3 (absolute path). + + --uploadQueueDirSizeLimitMB=uploadQueueDirSizeLimitMB + (required) [default: 20000] Limits the total size of files stored in upload queue directory + (in MB). Download of the new objects will be limitted in order to prevent exceeding this + limit. To leave a safe margin of error, it should be set to ~50% of available disk space. + + --uploadRetryInterval=uploadRetryInterval + [default: 3] Interval before retrying failed upload (in minutes) + + --uploadWorkersNumber=uploadWorkersNumber + [default: 4] Upload workers number (max async operations in progress). +``` + +_See code: [src/commands/archive.ts](https://github.com/Joystream/joystream/blob/v4.3.0/src/commands/archive.ts)_ + ## `storage-node help [COMMAND]` display help for storage-node @@ -202,18 +331,20 @@ OPTIONS -k, --keyFile=keyFile Path to key file to add to the keyring. -m, --dev Use development mode - -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, to try against all files. - If not specified a single password can be set in ACCOUNT_PWD environment variable. + -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, to + try against all files. If not specified a single password can be + set in ACCOUNT_PWD environment variable. - -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in non-dev environment. + -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in + non-dev environment. - -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be set in ACCOUNT_URI - environment variable. + -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be set + in ACCOUNT_URI environment variable. --keyStore=keyStore Path to a folder with multiple key files to load into keystore. ``` -_See code: [src/commands/leader/cancel-invite.ts](https://github.com/Joystream/joystream/blob/v4.2.0/src/commands/leader/cancel-invite.ts)_ +_See code: [src/commands/leader/cancel-invite.ts](https://github.com/Joystream/joystream/blob/v4.3.0/src/commands/leader/cancel-invite.ts)_ ## `storage-node leader:create-bucket` @@ -231,20 +362,22 @@ OPTIONS -m, --dev Use development mode -n, --number=number Storage bucket max total objects number - -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, to try against all files. - If not specified a single password can be set in ACCOUNT_PWD environment variable. + -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, to + try against all files. If not specified a single password can be + set in ACCOUNT_PWD environment variable. -s, --size=size Storage bucket max total objects size - -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in non-dev environment. + -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in + non-dev environment. - -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be set in ACCOUNT_URI - environment variable. + -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be set + in ACCOUNT_URI environment variable. --keyStore=keyStore Path to a folder with multiple key files to load into keystore. ``` -_See code: [src/commands/leader/create-bucket.ts](https://github.com/Joystream/joystream/blob/v4.2.0/src/commands/leader/create-bucket.ts)_ +_See code: [src/commands/leader/create-bucket.ts](https://github.com/Joystream/joystream/blob/v4.3.0/src/commands/leader/create-bucket.ts)_ ## `storage-node leader:delete-bucket` @@ -260,18 +393,20 @@ OPTIONS -k, --keyFile=keyFile Path to key file to add to the keyring. -m, --dev Use development mode - -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, to try against all files. - If not specified a single password can be set in ACCOUNT_PWD environment variable. + -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, to + try against all files. If not specified a single password can be + set in ACCOUNT_PWD environment variable. - -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in non-dev environment. + -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in + non-dev environment. - -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be set in ACCOUNT_URI - environment variable. + -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be set + in ACCOUNT_URI environment variable. --keyStore=keyStore Path to a folder with multiple key files to load into keystore. ``` -_See code: [src/commands/leader/delete-bucket.ts](https://github.com/Joystream/joystream/blob/v4.2.0/src/commands/leader/delete-bucket.ts)_ +_See code: [src/commands/leader/delete-bucket.ts](https://github.com/Joystream/joystream/blob/v4.3.0/src/commands/leader/delete-bucket.ts)_ ## `storage-node leader:invite-operator` @@ -287,20 +422,22 @@ OPTIONS -k, --keyFile=keyFile Path to key file to add to the keyring. -m, --dev Use development mode - -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, to try against all files. - If not specified a single password can be set in ACCOUNT_PWD environment variable. + -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, to + try against all files. If not specified a single password can be + set in ACCOUNT_PWD environment variable. - -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in non-dev environment. + -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in + non-dev environment. -w, --operatorId=operatorId (required) Storage bucket operator ID (storage group worker ID) - -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be set in ACCOUNT_URI - environment variable. + -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be set + in ACCOUNT_URI environment variable. --keyStore=keyStore Path to a folder with multiple key files to load into keystore. ``` -_See code: [src/commands/leader/invite-operator.ts](https://github.com/Joystream/joystream/blob/v4.2.0/src/commands/leader/invite-operator.ts)_ +_See code: [src/commands/leader/invite-operator.ts](https://github.com/Joystream/joystream/blob/v4.3.0/src/commands/leader/invite-operator.ts)_ ## `storage-node leader:remove-operator` @@ -316,18 +453,20 @@ OPTIONS -k, --keyFile=keyFile Path to key file to add to the keyring. -m, --dev Use development mode - -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, to try against all files. - If not specified a single password can be set in ACCOUNT_PWD environment variable. + -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, to + try against all files. If not specified a single password can be + set in ACCOUNT_PWD environment variable. - -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in non-dev environment. + -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in + non-dev environment. - -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be set in ACCOUNT_URI - environment variable. + -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be set + in ACCOUNT_URI environment variable. --keyStore=keyStore Path to a folder with multiple key files to load into keystore. ``` -_See code: [src/commands/leader/remove-operator.ts](https://github.com/Joystream/joystream/blob/v4.2.0/src/commands/leader/remove-operator.ts)_ +_See code: [src/commands/leader/remove-operator.ts](https://github.com/Joystream/joystream/blob/v4.3.0/src/commands/leader/remove-operator.ts)_ ## `storage-node leader:set-bucket-limits` @@ -344,20 +483,22 @@ OPTIONS -m, --dev Use development mode -o, --objects=objects (required) New 'voucher object number limit' value - -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, to try against all files. - If not specified a single password can be set in ACCOUNT_PWD environment variable. + -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, to + try against all files. If not specified a single password can be + set in ACCOUNT_PWD environment variable. -s, --size=size (required) New 'voucher object size limit' value - -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in non-dev environment. + -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in + non-dev environment. - -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be set in ACCOUNT_URI - environment variable. + -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be set + in ACCOUNT_URI environment variable. --keyStore=keyStore Path to a folder with multiple key files to load into keystore. ``` -_See code: [src/commands/leader/set-bucket-limits.ts](https://github.com/Joystream/joystream/blob/v4.2.0/src/commands/leader/set-bucket-limits.ts)_ +_See code: [src/commands/leader/set-bucket-limits.ts](https://github.com/Joystream/joystream/blob/v4.3.0/src/commands/leader/set-bucket-limits.ts)_ ## `storage-node leader:set-global-uploading-status` @@ -372,20 +513,22 @@ OPTIONS -k, --keyFile=keyFile Path to key file to add to the keyring. -m, --dev Use development mode - -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, to try against all files. - If not specified a single password can be set in ACCOUNT_PWD environment variable. + -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, to + try against all files. If not specified a single password can be + set in ACCOUNT_PWD environment variable. -s, --set=(on|off) (required) Sets global uploading block (on/off). - -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in non-dev environment. + -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in + non-dev environment. - -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be set in ACCOUNT_URI - environment variable. + -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be set + in ACCOUNT_URI environment variable. --keyStore=keyStore Path to a folder with multiple key files to load into keystore. ``` -_See code: [src/commands/leader/set-global-uploading-status.ts](https://github.com/Joystream/joystream/blob/v4.2.0/src/commands/leader/set-global-uploading-status.ts)_ +_See code: [src/commands/leader/set-global-uploading-status.ts](https://github.com/Joystream/joystream/blob/v4.3.0/src/commands/leader/set-global-uploading-status.ts)_ ## `storage-node leader:update-bag-limit` @@ -401,18 +544,20 @@ OPTIONS -l, --limit=limit (required) New StorageBucketsPerBagLimit value -m, --dev Use development mode - -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, to try against all files. - If not specified a single password can be set in ACCOUNT_PWD environment variable. + -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, to + try against all files. If not specified a single password can be + set in ACCOUNT_PWD environment variable. - -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in non-dev environment. + -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in + non-dev environment. - -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be set in ACCOUNT_URI - environment variable. + -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be set + in ACCOUNT_URI environment variable. --keyStore=keyStore Path to a folder with multiple key files to load into keystore. ``` -_See code: [src/commands/leader/update-bag-limit.ts](https://github.com/Joystream/joystream/blob/v4.2.0/src/commands/leader/update-bag-limit.ts)_ +_See code: [src/commands/leader/update-bag-limit.ts](https://github.com/Joystream/joystream/blob/v4.3.0/src/commands/leader/update-bag-limit.ts)_ ## `storage-node leader:update-bags` @@ -449,8 +594,8 @@ OPTIONS Use development mode -p, --password=password - Password to unlock keyfiles. Multiple passwords can be passed, to try against all files. If not specified a single - password can be set in ACCOUNT_PWD environment variable. + Password to unlock keyfiles. Multiple passwords can be passed, to try against all files. If + not specified a single password can be set in ACCOUNT_PWD environment variable. -r, --remove=remove [default: ] Comma separated list of bucket IDs to remove from all bag/s @@ -462,13 +607,14 @@ OPTIONS [default: ws://localhost:9944] Runtime API URL. Mandatory in non-dev environment. -y, --accountUri=accountUri - Account URI (optional). If not specified a single key can be set in ACCOUNT_URI environment variable. + Account URI (optional). If not specified a single key can be set in ACCOUNT_URI environment + variable. --keyStore=keyStore Path to a folder with multiple key files to load into keystore. ``` -_See code: [src/commands/leader/update-bags.ts](https://github.com/Joystream/joystream/blob/v4.2.0/src/commands/leader/update-bags.ts)_ +_See code: [src/commands/leader/update-bags.ts](https://github.com/Joystream/joystream/blob/v4.3.0/src/commands/leader/update-bags.ts)_ ## `storage-node leader:update-blacklist` @@ -484,20 +630,22 @@ OPTIONS -k, --keyFile=keyFile Path to key file to add to the keyring. -m, --dev Use development mode - -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, to try against all files. - If not specified a single password can be set in ACCOUNT_PWD environment variable. + -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, to + try against all files. If not specified a single password can be + set in ACCOUNT_PWD environment variable. -r, --remove=remove [default: ] Content ID to remove - -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in non-dev environment. + -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in + non-dev environment. - -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be set in ACCOUNT_URI - environment variable. + -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be set + in ACCOUNT_URI environment variable. --keyStore=keyStore Path to a folder with multiple key files to load into keystore. ``` -_See code: [src/commands/leader/update-blacklist.ts](https://github.com/Joystream/joystream/blob/v4.2.0/src/commands/leader/update-blacklist.ts)_ +_See code: [src/commands/leader/update-blacklist.ts](https://github.com/Joystream/joystream/blob/v4.3.0/src/commands/leader/update-blacklist.ts)_ ## `storage-node leader:update-bucket-status` @@ -513,20 +661,23 @@ OPTIONS -k, --keyFile=keyFile Path to key file to add to the keyring. -m, --dev Use development mode - -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, to try against all files. - If not specified a single password can be set in ACCOUNT_PWD environment variable. + -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, to + try against all files. If not specified a single password can be + set in ACCOUNT_PWD environment variable. - -s, --set=(on|off) (required) Sets 'accepting new bags' parameter for the bucket (on/off). + -s, --set=(on|off) (required) Sets 'accepting new bags' parameter for the bucket + (on/off). - -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in non-dev environment. + -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in + non-dev environment. - -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be set in ACCOUNT_URI - environment variable. + -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be set + in ACCOUNT_URI environment variable. --keyStore=keyStore Path to a folder with multiple key files to load into keystore. ``` -_See code: [src/commands/leader/update-bucket-status.ts](https://github.com/Joystream/joystream/blob/v4.2.0/src/commands/leader/update-bucket-status.ts)_ +_See code: [src/commands/leader/update-bucket-status.ts](https://github.com/Joystream/joystream/blob/v4.3.0/src/commands/leader/update-bucket-status.ts)_ ## `storage-node leader:update-data-fee` @@ -542,18 +693,20 @@ OPTIONS -k, --keyFile=keyFile Path to key file to add to the keyring. -m, --dev Use development mode - -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, to try against all files. - If not specified a single password can be set in ACCOUNT_PWD environment variable. + -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, to + try against all files. If not specified a single password can be + set in ACCOUNT_PWD environment variable. - -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in non-dev environment. + -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in + non-dev environment. - -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be set in ACCOUNT_URI - environment variable. + -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be set + in ACCOUNT_URI environment variable. --keyStore=keyStore Path to a folder with multiple key files to load into keystore. ``` -_See code: [src/commands/leader/update-data-fee.ts](https://github.com/Joystream/joystream/blob/v4.2.0/src/commands/leader/update-data-fee.ts)_ +_See code: [src/commands/leader/update-data-fee.ts](https://github.com/Joystream/joystream/blob/v4.3.0/src/commands/leader/update-data-fee.ts)_ ## `storage-node leader:update-data-object-bloat-bond` @@ -568,20 +721,22 @@ OPTIONS -k, --keyFile=keyFile Path to key file to add to the keyring. -m, --dev Use development mode - -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, to try against all files. - If not specified a single password can be set in ACCOUNT_PWD environment variable. + -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, to + try against all files. If not specified a single password can be + set in ACCOUNT_PWD environment variable. - -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in non-dev environment. + -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in + non-dev environment. -v, --value=value (required) New data object bloat bond value - -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be set in ACCOUNT_URI - environment variable. + -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be set + in ACCOUNT_URI environment variable. --keyStore=keyStore Path to a folder with multiple key files to load into keystore. ``` -_See code: [src/commands/leader/update-data-object-bloat-bond.ts](https://github.com/Joystream/joystream/blob/v4.2.0/src/commands/leader/update-data-object-bloat-bond.ts)_ +_See code: [src/commands/leader/update-data-object-bloat-bond.ts](https://github.com/Joystream/joystream/blob/v4.3.0/src/commands/leader/update-data-object-bloat-bond.ts)_ ## `storage-node leader:update-dynamic-bag-policy` @@ -597,21 +752,23 @@ OPTIONS -m, --dev Use development mode -n, --number=number (required) New storage buckets number - -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, to try against all - files. If not specified a single password can be set in ACCOUNT_PWD environment - variable. + -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, + to try against all files. If not specified a single password + can be set in ACCOUNT_PWD environment variable. -t, --bagType=(Channel|Member) (required) Dynamic bag type (Channel, Member). - -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in non-dev environment. + -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in + non-dev environment. - -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be set in ACCOUNT_URI - environment variable. + -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be + set in ACCOUNT_URI environment variable. - --keyStore=keyStore Path to a folder with multiple key files to load into keystore. + --keyStore=keyStore Path to a folder with multiple key files to load into + keystore. ``` -_See code: [src/commands/leader/update-dynamic-bag-policy.ts](https://github.com/Joystream/joystream/blob/v4.2.0/src/commands/leader/update-dynamic-bag-policy.ts)_ +_See code: [src/commands/leader/update-dynamic-bag-policy.ts](https://github.com/Joystream/joystream/blob/v4.3.0/src/commands/leader/update-dynamic-bag-policy.ts)_ ## `storage-node leader:update-voucher-limits` @@ -627,20 +784,22 @@ OPTIONS -m, --dev Use development mode -o, --objects=objects (required) New 'max voucher object number limit' value - -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, to try against all files. - If not specified a single password can be set in ACCOUNT_PWD environment variable. + -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, to + try against all files. If not specified a single password can be + set in ACCOUNT_PWD environment variable. -s, --size=size (required) New 'max voucher object size limit' value - -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in non-dev environment. + -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in + non-dev environment. - -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be set in ACCOUNT_URI - environment variable. + -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be set + in ACCOUNT_URI environment variable. --keyStore=keyStore Path to a folder with multiple key files to load into keystore. ``` -_See code: [src/commands/leader/update-voucher-limits.ts](https://github.com/Joystream/joystream/blob/v4.2.0/src/commands/leader/update-voucher-limits.ts)_ +_See code: [src/commands/leader/update-voucher-limits.ts](https://github.com/Joystream/joystream/blob/v4.3.0/src/commands/leader/update-voucher-limits.ts)_ ## `storage-node operator:accept-invitation` @@ -656,24 +815,27 @@ OPTIONS -k, --keyFile=keyFile Path to key file to add to the keyring. -m, --dev Use development mode - -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, to try - against all files. If not specified a single password can be set in + -p, --password=password Password to unlock keyfiles. Multiple passwords + can be passed, to try against all files. If not + specified a single password can be set in ACCOUNT_PWD environment variable. -t, --transactorAccountId=transactorAccountId (required) Transactor account ID (public key) - -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in non-dev - environment. + -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. + Mandatory in non-dev environment. -w, --workerId=workerId (required) Storage operator worker ID - -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be set in - ACCOUNT_URI environment variable. + -y, --accountUri=accountUri Account URI (optional). If not specified a + single key can be set in ACCOUNT_URI + environment variable. - --keyStore=keyStore Path to a folder with multiple key files to load into keystore. + --keyStore=keyStore Path to a folder with multiple key files to + load into keystore. ``` -_See code: [src/commands/operator/accept-invitation.ts](https://github.com/Joystream/joystream/blob/v4.2.0/src/commands/operator/accept-invitation.ts)_ +_See code: [src/commands/operator/accept-invitation.ts](https://github.com/Joystream/joystream/blob/v4.3.0/src/commands/operator/accept-invitation.ts)_ ## `storage-node operator:set-metadata` @@ -691,20 +853,22 @@ OPTIONS -k, --keyFile=keyFile Path to key file to add to the keyring. -m, --dev Use development mode - -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, to try against all files. - If not specified a single password can be set in ACCOUNT_PWD environment variable. + -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, to + try against all files. If not specified a single password can be + set in ACCOUNT_PWD environment variable. - -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in non-dev environment. + -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in + non-dev environment. -w, --workerId=workerId (required) Storage operator worker ID - -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be set in ACCOUNT_URI - environment variable. + -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be set + in ACCOUNT_URI environment variable. --keyStore=keyStore Path to a folder with multiple key files to load into keystore. ``` -_See code: [src/commands/operator/set-metadata.ts](https://github.com/Joystream/joystream/blob/v4.2.0/src/commands/operator/set-metadata.ts)_ +_See code: [src/commands/operator/set-metadata.ts](https://github.com/Joystream/joystream/blob/v4.3.0/src/commands/operator/set-metadata.ts)_ ## `storage-node server` @@ -715,102 +879,109 @@ USAGE $ storage-node server OPTIONS - -b, --buckets=buckets [default: ] Comma separated list of bucket IDs to - service. Buckets that are not assigned to worker are - ignored. If not specified all buckets will be - serviced. + -b, --buckets=buckets + [default: ] Comma separated list of bucket IDs to service. Buckets that are not assigned to + worker are ignored. If not specified all buckets will be serviced. - -c, --cleanup Enable cleanup/pruning of no-longer assigned assets. + -c, --cleanup + Enable cleanup/pruning of no-longer assigned assets. - -d, --uploads=uploads (required) Data uploading directory (absolute path). + -d, --uploads=uploads + (required) Data uploading directory (absolute path). - -e, --elasticSearchEndpoint=elasticSearchEndpoint Elasticsearch endpoint (e.g.: http://some.com:8081). - Log level could be set using the ELASTIC_LOG_LEVEL - environment variable. - Supported values: warn, error, debug, info. - Default:debug + -e, --elasticSearchEndpoint=elasticSearchEndpoint + Elasticsearch endpoint (e.g.: http://some.com:8081). + Log level could be set using the ELASTIC_LOG_LEVEL environment variable. + Supported values: warn, error, debug, info. Default:debug - -h, --help show CLI help + -h, --help + show CLI help - -i, --cleanupInterval=cleanupInterval [default: 360] Interval between periodic cleanup - actions (in minutes) + -i, --cleanupInterval=cleanupInterval + [default: 360] Interval between periodic cleanup actions (in minutes) - -i, --syncInterval=syncInterval [default: 20] Interval between synchronizations (in - minutes) + -i, --syncInterval=syncInterval + [default: 20] Interval between synchronizations (in minutes) - -k, --keyFile=keyFile Path to key file to add to the keyring. + -k, --keyFile=keyFile + Path to key file to add to the keyring. - -l, --logFilePath=logFilePath Absolute path to the rolling log files. + -l, --logFilePath=logFilePath + Absolute path to the rolling log files. - -m, --dev Use development mode + -m, --dev + Use development mode - -n, --logMaxFileNumber=logMaxFileNumber [default: 7] Maximum rolling log files number. + -n, --logMaxFileNumber=logMaxFileNumber + [default: 7] Maximum rolling log files number. - -o, --port=port (required) Server port. + -o, --port=port + (required) Server port. - -p, --password=password Password to unlock keyfiles. Multiple passwords can - be passed, to try against all files. If not specified - a single password can be set in ACCOUNT_PWD - environment variable. + -p, --password=password + Password to unlock keyfiles. Multiple passwords can be passed, to try against all files. If + not specified a single password can be set in ACCOUNT_PWD environment variable. - -q, --storageSquidEndpoint=storageSquidEndpoint (required) [default: http://localhost:4352/graphql] - Storage Squid graphql server endpoint (e.g.: - http://some.com:4352/graphql) + -q, --storageSquidEndpoint=storageSquidEndpoint + (required) [default: http://localhost:4352/graphql] Storage Squid graphql server endpoint + (e.g.: http://some.com:4352/graphql) - -r, --syncWorkersNumber=syncWorkersNumber [default: 20] Sync workers number (max async - operations in progress). + -r, --syncWorkersNumber=syncWorkersNumber + [default: 20] Sync workers number (max async operations in progress). - -s, --sync Enable data synchronization. + -s, --sync + Enable data synchronization. - -t, --syncWorkersTimeout=syncWorkersTimeout [default: 30] Asset downloading timeout for the - syncronization (in minutes). + -t, --syncWorkersTimeout=syncWorkersTimeout + [default: 30] Asset downloading timeout for the syncronization (in minutes). - -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. - Mandatory in non-dev environment. + -u, --apiUrl=apiUrl + [default: ws://localhost:9944] Runtime API URL. Mandatory in non-dev environment. - -w, --worker=worker (required) Storage provider worker ID + -w, --worker=worker + (required) Storage provider worker ID - -x, --logMaxFileSize=logMaxFileSize [default: 50000000] Maximum rolling log files size in - bytes. + -x, --logMaxFileSize=logMaxFileSize + [default: 50000000] Maximum rolling log files size in bytes. - -y, --accountUri=accountUri Account URI (optional). If not specified a single key - can be set in ACCOUNT_URI environment variable. + -y, --accountUri=accountUri + Account URI (optional). If not specified a single key can be set in ACCOUNT_URI environment + variable. - -z, --logFileChangeFrequency=(yearly|monthly|daily|hourly|none) [default: daily] Log files update frequency. + -z, --logFileChangeFrequency=(yearly|monthly|daily|hourly|none) + [default: daily] Log files update frequency. - --elasticSearchIndexPrefix=elasticSearchIndexPrefix Elasticsearch index prefix. Node ID will be appended - to the prefix. Default: logs-colossus. Can be passed - through ELASTIC_INDEX_PREFIX environment variable. + --elasticSearchIndexPrefix=elasticSearchIndexPrefix + Elasticsearch index prefix. Node ID will be appended to the prefix. Default: logs-colossus. + Can be passed through ELASTIC_INDEX_PREFIX environment variable. - --elasticSearchPassword=elasticSearchPassword Elasticsearch password for basic authentication. Can - be passed through ELASTIC_PASSWORD environment - variable. + --elasticSearchPassword=elasticSearchPassword + Elasticsearch password for basic authentication. Can be passed through ELASTIC_PASSWORD + environment variable. - --elasticSearchUser=elasticSearchUser Elasticsearch user for basic authentication. Can be - passed through ELASTIC_USER environment variable. + --elasticSearchUser=elasticSearchUser + Elasticsearch user for basic authentication. Can be passed through ELASTIC_USER environment + variable. - --keyStore=keyStore Path to a folder with multiple key files to load into - keystore. + --keyStore=keyStore + Path to a folder with multiple key files to load into keystore. - --maxBatchTxSize=maxBatchTxSize [default: 20] Maximum number of - `accept_pending_data_objects` in a batch - transactions. + --maxBatchTxSize=maxBatchTxSize + [default: 20] Maximum number of `accept_pending_data_objects` in a batch transactions. - --pendingFolder=pendingFolder Directory to store pending files which are uploaded - upload (absolute path). - If not specified a subfolder under the uploads - directory will be used. + --pendingFolder=pendingFolder + Directory to store pending files which are being uploaded (absolute path). + If not specified a subfolder under the uploads directory will be used. - --syncRetryInterval=syncRetryInterval [default: 3] Interval before retrying failed - synchronization run (in minutes) + --syncRetryInterval=syncRetryInterval + [default: 3] Interval before retrying failed synchronization run (in minutes) - --tempFolder=tempFolder Directory to store tempory files during sync and - upload (absolute path). - If not specified a subfolder under the uploads - directory will be used. + --tempFolder=tempFolder + Directory to store tempory files during sync (absolute path). + If not specified a subfolder under the uploads directory will be used. ``` -_See code: [src/commands/server.ts](https://github.com/Joystream/joystream/blob/v4.2.0/src/commands/server.ts)_ +_See code: [src/commands/server.ts](https://github.com/Joystream/joystream/blob/v4.3.0/src/commands/server.ts)_ ## `storage-node util:cleanup` @@ -822,33 +993,42 @@ USAGE OPTIONS -b, --bucketId=bucketId (required) The buckerId to sync prune/cleanup - -d, --uploads=uploads (required) Data uploading directory (absolute path). + + -d, --uploads=uploads (required) Data uploading directory (absolute + path). + -h, --help show CLI help + -k, --keyFile=keyFile Path to key file to add to the keyring. + -m, --dev Use development mode - -p, --cleanupWorkersNumber=cleanupWorkersNumber [default: 20] Cleanup/Pruning workers number (max async operations in - progress). + -p, --cleanupWorkersNumber=cleanupWorkersNumber [default: 20] Cleanup/Pruning workers number + (max async operations in progress). - -p, --password=password Password to unlock keyfiles. Multiple passwords can be passed, to try - against all files. If not specified a single password can be set in - ACCOUNT_PWD environment variable. + -p, --password=password Password to unlock keyfiles. Multiple + passwords can be passed, to try against all + files. If not specified a single password can + be set in ACCOUNT_PWD environment variable. - -q, --queryNodeEndpoint=queryNodeEndpoint [default: http://localhost:4352/graphql] Storage Squid graphql server - endpoint (e.g.: http://some.com:4352/graphql) + -q, --queryNodeEndpoint=queryNodeEndpoint [default: http://localhost:4352/graphql] + Storage Squid graphql server endpoint (e.g.: + http://some.com:4352/graphql) - -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API URL. Mandatory in non-dev - environment. + -u, --apiUrl=apiUrl [default: ws://localhost:9944] Runtime API + URL. Mandatory in non-dev environment. -w, --workerId=workerId (required) Storage node operator worker ID. - -y, --accountUri=accountUri Account URI (optional). If not specified a single key can be set in - ACCOUNT_URI environment variable. + -y, --accountUri=accountUri Account URI (optional). If not specified a + single key can be set in ACCOUNT_URI + environment variable. - --keyStore=keyStore Path to a folder with multiple key files to load into keystore. + --keyStore=keyStore Path to a folder with multiple key files to + load into keystore. ``` -_See code: [src/commands/util/cleanup.ts](https://github.com/Joystream/joystream/blob/v4.2.0/src/commands/util/cleanup.ts)_ +_See code: [src/commands/util/cleanup.ts](https://github.com/Joystream/joystream/blob/v4.3.0/src/commands/util/cleanup.ts)_ ## `storage-node util:fetch-bucket` @@ -860,28 +1040,33 @@ USAGE OPTIONS -b, --bucketId=bucketId (required) The buckerId to fetch - -d, --uploads=uploads (required) Data uploading directory (absolute path). + + -d, --uploads=uploads (required) Data uploading directory + (absolute path). + -h, --help show CLI help - -n, --syncWorkersNumber=syncWorkersNumber [default: 20] Sync workers number (max async operations in - progress). + -n, --syncWorkersNumber=syncWorkersNumber [default: 20] Sync workers number (max + async operations in progress). - -o, --dataSourceOperatorUrl=dataSourceOperatorUrl Storage node url base (e.g.: http://some.com:3333) to get data - from. + -o, --dataSourceOperatorUrl=dataSourceOperatorUrl Storage node url base (e.g.: + http://some.com:3333) to get data from. - -q, --queryNodeEndpoint=queryNodeEndpoint [default: http://localhost:4352/graphql] Storage Squid graphql - server endpoint (e.g.: http://some.com:4352/graphql) + -q, --queryNodeEndpoint=queryNodeEndpoint [default: http://localhost:4352/graphql] + Storage Squid graphql server endpoint + (e.g.: http://some.com:4352/graphql) - -t, --syncWorkersTimeout=syncWorkersTimeout [default: 30] Asset downloading timeout for the syncronization (in - minutes). + -t, --syncWorkersTimeout=syncWorkersTimeout [default: 30] Asset downloading timeout for + the syncronization (in minutes). - --tempFolder=tempFolder Directory to store tempory files during sync and upload (absolute - path). - ,Temporary directory (absolute path). If not specified a subfolder - under the uploads directory will be used. + --tempFolder=tempFolder Directory to store tempory files during + sync and upload (absolute path). + ,Temporary directory (absolute path). If + not specified a subfolder under the uploads + directory will be used. ``` -_See code: [src/commands/util/fetch-bucket.ts](https://github.com/Joystream/joystream/blob/v4.2.0/src/commands/util/fetch-bucket.ts)_ +_See code: [src/commands/util/fetch-bucket.ts](https://github.com/Joystream/joystream/blob/v4.3.0/src/commands/util/fetch-bucket.ts)_ ## `storage-node util:multihash` @@ -896,7 +1081,7 @@ OPTIONS -h, --help show CLI help ``` -_See code: [src/commands/util/multihash.ts](https://github.com/Joystream/joystream/blob/v4.2.0/src/commands/util/multihash.ts)_ +_See code: [src/commands/util/multihash.ts](https://github.com/Joystream/joystream/blob/v4.3.0/src/commands/util/multihash.ts)_ ## `storage-node util:verify-bag-id` @@ -924,5 +1109,5 @@ OPTIONS - dynamic:member:4 ``` -_See code: [src/commands/util/verify-bag-id.ts](https://github.com/Joystream/joystream/blob/v4.2.0/src/commands/util/verify-bag-id.ts)_ +_See code: [src/commands/util/verify-bag-id.ts](https://github.com/Joystream/joystream/blob/v4.3.0/src/commands/util/verify-bag-id.ts)_ diff --git a/storage-node/package.json b/storage-node/package.json index be0e2a0ee7..9cd12d5801 100644 --- a/storage-node/package.json +++ b/storage-node/package.json @@ -1,7 +1,7 @@ { "name": "storage-node", "description": "Joystream storage subsystem.", - "version": "4.2.0", + "version": "4.3.0", "author": "Joystream contributors", "bin": { "storage-node": "./bin/run" @@ -9,6 +9,8 @@ "bugs": "https://github.com/Joystream/joystream/issues", "dependencies": { "@apollo/client": "^3.3.21", + "@aws-sdk/client-s3": "^3.675.0", + "@aws-sdk/s3-request-presigner": "^3.675.0", "@elastic/ecs-winston-format": "^1.3.1", "@joystream/metadata-protobuf": "^2.15.0", "@joystream/opentelemetry": "1.0.0", @@ -36,6 +38,7 @@ "await-lock": "^2.1.0", "base64url": "^3.0.1", "blake3-wasm": "^2.1.5", + "chokidar": "4.0.1", "cors": "^2.8.5", "cross-fetch": "^3.1.4", "express": "4.17.1", diff --git a/storage-node/src/commands/archive.ts b/storage-node/src/commands/archive.ts new file mode 100644 index 0000000000..b5286284de --- /dev/null +++ b/storage-node/src/commands/archive.ts @@ -0,0 +1,349 @@ +import { flags } from '@oclif/command' +import { ApiPromise } from '@polkadot/api' +import _ from 'lodash' +import path from 'path' +import { v4 as uuidv4 } from 'uuid' +import ApiCommandBase from '../command-base/ApiCommandBase' +import { customFlags } from '../command-base/CustomFlags' +import logger, { DatePatternByFrequency, Frequency, initNewLogger } from '../services/logger' +import { QueryNodeApi } from '../services/queryNode/api' +import { constructBucketToAddressMapping } from '../services/sync/storageObligations' +import { verifyWorkerId } from '../services/runtime/queries' +import { ArchiveService } from '../services/archive/ArchiveService' +import ExitCodes from './../command-base/ExitCodes' +import { IConnectionHandler } from '../services/s3/IConnectionHandler' +import { AwsConnectionHandler } from '../services/s3/AwsConnectionHandler' +import { createDirectory } from '../services/helpers/filesystem' +import { promises as fsp } from 'fs' + +// TODO: Add command for retrieving archive links by object ids. + +/** + * CLI command: + * Starts running in a write-only archive mode (no external API exposed). + * Downloads, compresses and uploads all assigned data objects to a specified S3 bucket. + * + * @remarks + * Shell command: "archive" + */ +export default class Archive extends ApiCommandBase { + static description = + 'Starts running in a write-only, archive mode (no external API exposed). ' + + 'Downloads, compresses and uploads all assigned data objects to a specified S3 bucket.' + + static flags = { + worker: flags.integer({ + char: 'w', + required: true, + description: 'Storage provider worker ID', + env: 'WORKER_ID', + }), + buckets: customFlags.integerArr({ + char: 'b', + description: + 'Comma separated list of bucket IDs to sync. Buckets that are not assigned to worker are ignored.\n' + + 'If not specified all buckets belonging to the worker will be synced.', + default: process.env.BUCKETS ? _.uniq(process.env.BUCKETS.split(',').map((b) => parseInt(b))) : [], + }), + uploadQueueDir: flags.string({ + description: + 'Directory to store fully downloaded data objects before compressing them and uploading to S3 (absolute path).', + required: true, + env: 'UPLOAD_QUEUE_DIR', + }), + uploadQueueDirSizeLimitMB: flags.integer({ + description: + 'Limits the total size of files stored in upload queue directory (in MB). ' + + 'Download of the new objects may be slowed down in order to try to prevent exceeding this limit. ' + + 'WARNING: To leave a safe margin of error (for compression etc.), it should be set to ~50% of available disk space.', + required: true, + env: 'UPLOAD_QUEUE_DIR_SIZE_LIMIT', + default: 20_000, + }), + tmpDownloadDir: flags.string({ + description: 'Directory to store temporary data (downloads in progress) during sync (absolute path).', + required: true, + env: 'TMP_DOWNLOAD_DIR', + }), + localCountTriggerThreshold: flags.integer({ + required: false, + description: 'Compress and upload all local data objects to S3 if the number of them reaches this threshold.', + env: 'LOCAL_COUNT_TRIGGER_THRESHOLD', + }), + localSizeTriggerThresholdMB: flags.integer({ + description: + 'Compress and upload all local data objects to S3 if the combined size of them reaches this threshold (in MB)', + env: 'LOCAL_SIZE_TRIGGER_THRESHOLD_MB', + default: 10_000, + }), + localAgeTriggerThresholdMinutes: flags.integer({ + description: + 'Compress and upload all local data objects to S3 if the oldest of them was downloaded more than X minutes ago', + env: 'LOCAL_AGE_TRIGGER_THRESHOLD_MINUTES', + default: 24 * 60, + }), + archiveFileSizeLimitMB: flags.integer({ + description: 'Try to avoid creating archive files larger than this size limit (in MB) unless unaviodable.', + default: 1_000, + }), + archiveTrackfileBackupFreqMinutes: flags.integer({ + description: + 'Specifies how frequently the archive tracking file (containing information about .7z files content)' + + " should be uploaded to S3 (in case it's changed).", + env: 'ARCHIVE_TRACKFILE_BACKUP_FREQ_MINUTES', + default: 60, + }), + uploadWorkersNumber: flags.integer({ + required: false, + description: 'Upload workers number (max async operations in progress).', + env: 'UPLOAD_WORKERS_NUMBER', + default: 4, + }), + syncInterval: flags.integer({ + char: 'i', + description: 'Interval between synchronizations (in minutes)', + env: 'SYNC_INTERVAL_MINUTES', + default: 20, + }), + storageSquidEndpoint: flags.string({ + char: 'q', + required: true, + env: 'STORAGE_SQUID_ENDPOINT', + default: 'http://localhost:4352/graphql', + description: 'Storage Squid graphql server endpoint (e.g.: http://some.com:4352/graphql)', + }), + syncWorkersNumber: flags.integer({ + char: 'r', + required: false, + description: 'Sync workers number (max async operations in progress).', + env: 'SYNC_WORKERS_NUMBER', + default: 8, + }), + syncWorkersTimeout: flags.integer({ + char: 't', + required: false, + description: 'Asset downloading timeout for the syncronization (in minutes).', + env: 'SYNC_WORKERS_TIMEOUT_MINUTES', + default: 30, + }), + elasticSearchEndpoint: flags.string({ + char: 'e', + required: false, + env: 'ELASTIC_ENDPOINT', + description: `Elasticsearch endpoint (e.g.: http://some.com:8081). +Log level could be set using the ELASTIC_LOG_LEVEL environment variable. +Supported values: warn, error, debug, info. Default:debug`, + }), + elasticSearchIndexPrefix: flags.string({ + required: false, + env: 'ELASTIC_INDEX_PREFIX', + description: + 'Elasticsearch index prefix. Node ID will be appended to the prefix. Default: logs-colossus. Can be passed through ELASTIC_INDEX_PREFIX environment variable.', + default: 'logs-colossus', + }), + elasticSearchUser: flags.string({ + dependsOn: ['elasticSearchEndpoint', 'elasticSearchPassword'], + env: 'ELASTIC_USER', + description: + 'Elasticsearch user for basic authentication. Can be passed through ELASTIC_USER environment variable.', + }), + elasticSearchPassword: flags.string({ + dependsOn: ['elasticSearchEndpoint', 'elasticSearchUser'], + env: 'ELASTIC_PASSWORD', + description: + 'Elasticsearch password for basic authentication. Can be passed through ELASTIC_PASSWORD environment variable.', + }), + logFilePath: flags.string({ + char: 'l', + required: false, + description: `Absolute path to the rolling log files.`, + env: 'LOG_FILE_PATH', + }), + logMaxFileNumber: flags.integer({ + char: 'n', + required: false, + description: `Maximum rolling log files number.`, + env: 'LOG_MAX_FILE_NUMBER', + default: 7, + }), + logMaxFileSize: flags.integer({ + char: 'x', + required: false, + description: `Maximum rolling log files size in bytes.`, + env: 'LOG_MAX_FILE_SIZE', + default: 50_000_000, + }), + logFileChangeFrequency: flags.enum({ + char: 'z', + description: `Log files update frequency.`, + options: Object.keys(DatePatternByFrequency), + required: false, + env: 'LOG_FILE_CHANGE_FREQUENCY', + default: 'daily', + }), + awsS3BucketRegion: flags.string({ + description: 'AWS region of the AWS S3 bucket where the files will be stored.', + env: 'AWS_REGION', + required: true, + }), + awsS3BucketName: flags.string({ + description: 'Name of the AWS S3 bucket where the files will be stored.', + env: 'AWS_BUCKET_NAME', + required: true, + }), + ...ApiCommandBase.flags, + } + + async getSyncableBuckets(api: ApiPromise, qnApi: QueryNodeApi): Promise { + const { flags } = this.parse(Archive) + const workerId = flags.worker + + if (!(await verifyWorkerId(api, workerId))) { + logger.error(`workerId ${workerId} does not exist in the storage working group`) + this.exit(ExitCodes.InvalidWorkerId) + } + + if (!flags.buckets.length) { + logger.info(`No buckets provided. Will use all bucket belonging to worker ${workerId}.`) + } + + const selectedBucketsAndAccounts = await constructBucketToAddressMapping(api, qnApi, workerId, flags.buckets) + const selectedBuckets = selectedBucketsAndAccounts.map(([bucketId]) => bucketId) + const selectedVsProvidedDiff = _.difference( + flags.buckets.map((id) => id.toString()), + selectedBuckets + ) + + if (selectedVsProvidedDiff.length) { + logger.warn( + `Buckets: ${JSON.stringify( + selectedVsProvidedDiff + )} do not belong to worker with ID=${workerId} and will NOT be synced!` + ) + } + + let syncableBuckets = selectedBuckets + if (process.env.DISABLE_BUCKET_AUTH === 'true') { + logger.warn('Bucket authentication is disabled! This is not recommended for production use!') + } else { + const keystoreAddresses = this.getUnlockedAccounts() + const bucketsWithKeysInKeyring = selectedBucketsAndAccounts.filter(([bucketId, address]) => { + if (!keystoreAddresses.includes(address)) { + this.warn(`Missing transactor key for bucket ${bucketId}. It will NOT be synced!`) + return false + } + return true + }) + + syncableBuckets = bucketsWithKeysInKeyring.map(([bucketId]) => bucketId) + } + + if (!syncableBuckets.length) { + this.error('No buckets to serve. Exiting...') + } + + if (syncableBuckets.length !== flags.buckets.length) { + logger.warn(`Only ${syncableBuckets.length} out of ${flags.buckets.length} provided buckets will be synced!`) + } + + return syncableBuckets + } + + initLogger(): void { + const { flags } = this.parse(Archive) + if (!_.isEmpty(flags.elasticSearchEndpoint) || !_.isEmpty(flags.logFilePath)) { + initNewLogger({ + elasticSearchlogSource: `StorageProvider_${flags.worker}`, + elasticSearchEndpoint: flags.elasticSearchEndpoint, + elasticSearchIndexPrefix: flags.elasticSearchIndexPrefix, + elasticSearchUser: flags.elasticSearchUser, + elasticSearchPassword: flags.elasticSearchPassword, + filePath: flags.logFilePath, + maxFileNumber: flags.logMaxFileNumber, + maxFileSize: flags.logMaxFileSize, + fileFrequency: flags.logFileChangeFrequency as Frequency, // type checked in the flags.enum + }) + } + } + + async checkAndNormalizeDirs>(dirs: T): Promise { + const dirsSet = new Set() + const resolvedPaths: Record = {} + for (const [dirName, dirPath] of Object.entries(dirs)) { + const resolved = path.resolve(dirPath) + if (dirsSet.has(resolved)) { + this.error(`All specified directories should be unique. ${dirPath} is not.`) + } + dirsSet.add(resolved) + await createDirectory(resolved) + try { + await fsp.access(resolved, fsp.constants.W_OK | fsp.constants.R_OK) + } catch (e) { + this.error(`Cannot access directory ${resolved} for read or write operations: ${e.toString()}`) + } + resolvedPaths[dirName] = resolved + } + + return resolvedPaths as T + } + + async run(): Promise { + const { flags } = this.parse(Archive) + + // Init logger + this.initLogger() + + // Init APIs + logger.info(`Storage Squid endpoint set: ${flags.storageSquidEndpoint}`) + const api = await this.getApi() + const qnApi = new QueryNodeApi(flags.storageSquidEndpoint) + + if (flags.dev) { + await this.ensureDevelopmentChain() + } + + // Try to construct S3 connection handler + const s3ConnectionHandler: IConnectionHandler = new AwsConnectionHandler({ + bucketName: flags.awsS3BucketName, + region: flags.awsS3BucketRegion, + }) + + // Get buckets to sync + const syncableBuckets = await this.getSyncableBuckets(api, qnApi) + logger.info(`Buckets to sync: [${syncableBuckets}]`) + + // Check and normalize input directories + const { tmpDownloadDir, uploadQueueDir } = await this.checkAndNormalizeDirs({ + tmpDownloadDir: flags.tmpDownloadDir, + uploadQueueDir: flags.uploadQueueDir, + }) + + // Build and run archive service + const X_HOST_ID = uuidv4() + const archiveService = new ArchiveService({ + buckets: syncableBuckets.map((id) => id.toString()), + archiveTrackfileBackupFreqMinutes: flags.archiveTrackfileBackupFreqMinutes, + localCountTriggerThreshold: flags.localCountTriggerThreshold, + localSizeTriggerThreshold: flags.localSizeTriggerThresholdMB * 1_000_000, + localAgeTriggerThresholdMinutes: flags.localAgeTriggerThresholdMinutes, + archiveSizeLimit: flags.archiveFileSizeLimitMB * 1_000_000, + uploadDirSizeLimit: flags.uploadQueueDirSizeLimitMB * 1_000_000, + uploadQueueDir, + tmpDownloadDir, + s3ConnectionHandler, + queryNodeApi: qnApi, + uploadWorkersNum: flags.uploadWorkersNumber, + hostId: X_HOST_ID, + syncWorkersNum: flags.syncWorkersNumber, + syncWorkersTimeout: flags.syncWorkersTimeout, + syncInterval: flags.syncInterval, + }) + + await archiveService.init() + await archiveService.run() + } + + // Override exiting. + /* eslint-disable @typescript-eslint/no-empty-function */ + async finally(): Promise {} +} diff --git a/storage-node/src/commands/server.ts b/storage-node/src/commands/server.ts index 8b65be0429..6f61b38fc9 100644 --- a/storage-node/src/commands/server.ts +++ b/storage-node/src/commands/server.ts @@ -1,15 +1,13 @@ import { flags } from '@oclif/command' import { ApiPromise } from '@polkadot/api' import { KeyringPair } from '@polkadot/keyring/types' -import { PalletStorageStorageBucketRecord } from '@polkadot/types/lookup' -import fs from 'fs' import _ from 'lodash' import path from 'path' import sleep from 'sleep-promise' import { v4 as uuidv4 } from 'uuid' import ApiCommandBase from '../command-base/ApiCommandBase' import { customFlags } from '../command-base/CustomFlags' -import { loadDataObjectIdCache } from '../services/caching/localDataObjects' +import { addDataObjectIdToCache, loadDataObjectIdCache } from '../services/caching/localDataObjects' import logger, { DatePatternByFrequency, Frequency, initNewLogger } from '../services/logger' import { QueryNodeApi } from '../services/queryNode/api' import { AcceptPendingObjectsService } from '../services/sync/acceptPendingObjects' @@ -18,11 +16,13 @@ import { MINIMUM_REPLICATION_THRESHOLD, performCleanup, } from '../services/sync/cleanupService' -import { getStorageBucketIdsByWorkerId } from '../services/sync/storageObligations' +import { constructBucketToAddressMapping } from '../services/sync/storageObligations' import { PendingDirName, TempDirName, performSync } from '../services/sync/synchronizer' +import { downloadEvents } from '../services/sync/tasks' import { createApp } from '../services/webApi/app' import ExitCodes from './../command-base/ExitCodes' -const fsPromises = fs.promises +import { createDirectory } from '../services/helpers/filesystem' +import { verifyWorkerId } from '../services/runtime/queries' /** * CLI command: @@ -53,11 +53,11 @@ export default class Server extends ApiCommandBase { }), tempFolder: flags.string({ description: - 'Directory to store tempory files during sync and upload (absolute path).\nIf not specified a subfolder under the uploads directory will be used.', + 'Directory to store tempory files during sync (absolute path).\nIf not specified a subfolder under the uploads directory will be used.', }), pendingFolder: flags.string({ description: - 'Directory to store pending files which are uploaded upload (absolute path).\nIf not specified a subfolder under the uploads directory will be used.', + 'Directory to store pending files which are being uploaded (absolute path).\nIf not specified a subfolder under the uploads directory will be used.', }), port: flags.integer({ char: 'o', @@ -284,6 +284,9 @@ Supported values: warn, error, debug, info. Default:debug`, // any assets. if (flags.sync && selectedBuckets.length) { logger.info(`Synchronization is Enabled.`) + downloadEvents.on('success', (id) => { + addDataObjectIdToCache(id) + }) setTimeout( // eslint-disable-next-line @typescript-eslint/no-misused-promises async () => @@ -445,38 +448,3 @@ async function runCleanupWithInterval( } } } - -/** - * Creates a directory recursivly. Like `mkdir -p` - * - * @param tempDirName - full path to temporary directory - * @returns void promise. - */ -async function createDirectory(dirName: string): Promise { - logger.info(`Creating directory ${dirName}`) - await fsPromises.mkdir(dirName, { recursive: true }) -} - -async function verifyWorkerId(api: ApiPromise, workerId: number): Promise { - const worker = await api.query.storageWorkingGroup.workerById(workerId) - return worker.isSome -} - -async function constructBucketToAddressMapping( - api: ApiPromise, - qnApi: QueryNodeApi, - workerId: number, - bucketsToServe: number[] -): Promise<[string, string][]> { - const bucketIds = await getStorageBucketIdsByWorkerId(qnApi, workerId) - const buckets: [string, PalletStorageStorageBucketRecord][] = ( - await Promise.all( - bucketIds.map(async (bucketId) => [bucketId, await api.query.storage.storageBucketById(bucketId)] as const) - ) - ) - .filter(([bucketId]) => bucketsToServe.length === 0 || bucketsToServe.includes(parseInt(bucketId))) - .filter(([, optBucket]) => optBucket.isSome && optBucket.unwrap().operatorStatus.isStorageWorker) - .map(([bucketId, optBucket]) => [bucketId, optBucket.unwrap()]) - - return buckets.map(([bucketId, bucket]) => [bucketId, bucket.operatorStatus.asStorageWorker[1].toString()]) -} diff --git a/storage-node/src/services/archive/ArchiveService.ts b/storage-node/src/services/archive/ArchiveService.ts new file mode 100644 index 0000000000..af28b2c364 --- /dev/null +++ b/storage-node/src/services/archive/ArchiveService.ts @@ -0,0 +1,635 @@ +import { promises as fsp } from 'fs' +import path from 'path' +import logger from '../logger' +import { CompressFilesTask, UploadArchiveFileTask } from './tasks' +import { WorkingStack, TaskProcessorSpawner } from '../processing/workingProcess' +import { downloadEvents, DownloadFileTask } from '../sync/tasks' +import _ from 'lodash' +import { IConnectionHandler } from '../s3/IConnectionHandler' +import { + ObjectTrackingService, + ArchivesTrackingService, + ARCHIVES_TRACKING_FILENAME, + OBJECTS_TRACKING_FILENAME, +} from './tracking' +import { QueryNodeApi } from '../queryNode/api' +import { getStorageObligationsFromRuntime } from '../sync/storageObligations' +import { getDownloadTasks } from '../sync/synchronizer' +import sleep from 'sleep-promise' +import { Logger } from 'winston' + +type DataObjectData = { + id: string + size: number + birthtime: Date +} + +/** + * Manages downloaded data objects before the upload threshold is reached. + */ +class DataObjectsQueue { + private logger: Logger + private dataObjects: Map = new Map() + private dataDir: string + + constructor(dataDir: string) { + this.dataDir = dataDir + this.logger = logger.child('DataObjectsQueue') + } + + public get totalSize() { + return Array.from(this.dataObjects.values()).reduce((a, b) => a + b.size, 0) + } + + public get objectsCount(): number { + return this.dataObjects.size + } + + public get oldestObjectBirthtime(): Date | undefined { + return _.minBy(Array.from(this.dataObjects.values()), (o) => o.birthtime)?.birthtime + } + + public async add(dataObjectId: string): Promise { + const { size, birthtime } = await fsp.stat(path.join(this.dataDir, dataObjectId)) + this.dataObjects.set(dataObjectId, { id: dataObjectId, size, birthtime }) + } + + public has(dataObjectId: string): boolean { + return this.dataObjects.has(dataObjectId) + } + + public remove(dataObjectId: string): void { + this.dataObjects.delete(dataObjectId) + } + + // Pop data objects sorted by id until size limit is reached + public popUntilSizeLimit(objectsSizeLimit: number): DataObjectData[] { + const dataObjects = Array.from(this.dataObjects.values()) + // Objects are sorted from highest to lowest id, + // so that the objects with lowest id are removed first + dataObjects.sort((a, b) => parseInt(b.id) - parseInt(a.id)) + const removedItems = [] + let combinedSize = 0 + while (combinedSize < objectsSizeLimit) { + const removedItem = dataObjects.pop() + if (!removedItem) { + break + } + this.remove(removedItem.id) + removedItems.push(removedItem) + combinedSize += removedItem.size + } + + return removedItems + } + + public empty(objectsSizeLimit: number): DataObjectData[][] { + const { objectsCount, totalSize } = this + this.logger.debug(`Emptying local data objects queue. objects_count=${objectsCount}, total_size=${totalSize}`) + const batches: DataObjectData[][] = [] + let dataObjectsBatch: DataObjectData[] = this.popUntilSizeLimit(objectsSizeLimit) + while (dataObjectsBatch.length) { + batches.push(dataObjectsBatch) + this.logger.debug(`Prepared batch: ${dataObjectsBatch.map((o) => o.id).join(', ')}`) + dataObjectsBatch = this.popUntilSizeLimit(objectsSizeLimit) + } + this.logger.debug(`Local data objects queue emptied. Prepared ${batches.length} batches.`) + return batches + } + + public get oldestObjectAgeMinutes() { + if (this.oldestObjectBirthtime) { + const diffMs = new Date().getTime() - this.oldestObjectBirthtime.getTime() + return diffMs / 1000 / 60 + } + return 0 + } +} + +type ArchiveServiceParams = { + // Supported buckets + buckets: string[] + // Upload trigger Thresholds + localCountTriggerThreshold: number | undefined + localSizeTriggerThreshold: number + localAgeTriggerThresholdMinutes: number + // Size limits + archiveSizeLimit: number + uploadDirSizeLimit: number + // Directory paths + uploadQueueDir: string + tmpDownloadDir: string + // API's + s3ConnectionHandler: IConnectionHandler + queryNodeApi: QueryNodeApi + // Upload tasks config + uploadWorkersNum: number + // Sync tasks config + hostId: string + syncWorkersNum: number + syncWorkersTimeout: number + syncInterval: number + // Archive tracking backup + archiveTrackfileBackupFreqMinutes: number +} + +export class ArchiveService { + private logger: Logger + // Buckets + private buckets: string[] + // Thresholds + private localCountTriggerThreshold: number | undefined + private localSizeTriggerThreshold: number + private localAgeTriggerThresholdMinutes: number + // Size limits + private archiveSizeLimit: number + private uploadDirSizeLimit: number + // Directory paths + private uploadQueueDir: string + private tmpDownloadDir: string + // API's and services + private queryNodeApi: QueryNodeApi + private s3ConnectionHandler: IConnectionHandler + // Tracking services + private objectTrackingService: ObjectTrackingService + private archivesTrackingService: ArchivesTrackingService + // Archive tracking backup + private archiveTrackfileBackupFreqMinutes: number + private archiveTrackfileLastMtime: Date | undefined + // Upload tasks + private preparingForUpload = false + private uploadWorkersNum: number + private uploadWorkingStack: WorkingStack + private uploadProcessorSpawner: TaskProcessorSpawner | undefined + private syncProcessorSpawner: TaskProcessorSpawner | undefined + private dataObjectsQueue: DataObjectsQueue + // Sync tasks + private hostId: string + private syncWorkersNum: number + private syncWorkingStack: WorkingStack + private syncQueueObjectsSize = 0 + private syncWorkersTimeout: number + private syncInterval: number + + constructor(params: ArchiveServiceParams) { + // From params: + this.buckets = params.buckets + this.localCountTriggerThreshold = params.localCountTriggerThreshold + this.localSizeTriggerThreshold = params.localSizeTriggerThreshold + this.localAgeTriggerThresholdMinutes = params.localAgeTriggerThresholdMinutes + this.archiveSizeLimit = params.archiveSizeLimit + this.uploadDirSizeLimit = params.uploadDirSizeLimit + this.uploadQueueDir = params.uploadQueueDir + this.tmpDownloadDir = params.tmpDownloadDir + this.s3ConnectionHandler = params.s3ConnectionHandler + this.queryNodeApi = params.queryNodeApi + this.uploadWorkersNum = params.uploadWorkersNum + this.hostId = params.hostId + this.syncWorkersNum = params.syncWorkersNum + this.syncWorkersTimeout = params.syncWorkersTimeout + this.syncInterval = params.syncInterval + this.archiveTrackfileBackupFreqMinutes = params.archiveTrackfileBackupFreqMinutes + // Other: + this.objectTrackingService = new ObjectTrackingService(this.uploadQueueDir) + this.archivesTrackingService = new ArchivesTrackingService(this.uploadQueueDir) + this.dataObjectsQueue = new DataObjectsQueue(this.uploadQueueDir) + this.uploadWorkingStack = new WorkingStack() + this.syncWorkingStack = new WorkingStack() + this.logger = logger.child({ label: 'ArchiveService' }) + } + + /** + * Starts infinite task processing loop and returns the TaskProcessorSpawner instance + */ + private startProcessorSpawner(name: string, stack: WorkingStack, workersNum: number) { + const spawner = new TaskProcessorSpawner(stack, workersNum, false) + spawner + .process() + .then(() => { + this.logger.error(`${name} task processing loop returned unexpectedly!`) + process.exit(1) + }) + .catch((e) => { + this.logger.error(`${name} task processing loop broken: ${e.toString()}`) + process.exit(1) + }) + + return spawner + } + + /** + * Initializes downloadEvent handlers and archive trackfile backup interval. + */ + private installTriggers(): void { + downloadEvents.on('success', (dataObjectId, size) => { + this.logger.debug(`Download success event received for object: ${dataObjectId}`) + this.handleSuccessfulDownload(dataObjectId).catch((e) => { + this.logger.error(`Critical error on handleSuccessfulDownload: ${e.toString()}`) + process.exit(1) + }) + this.syncQueueObjectsSize -= size + }) + downloadEvents.on('fail', (dataObjectId, size) => { + this.syncQueueObjectsSize -= size + }) + setInterval(() => { + this.backupArchiveTrackfile().catch((e) => { + this.logger.error(`Failed to upload archive trackfile backup to S3: ${e.toString()}`) + }) + }, this.archiveTrackfileBackupFreqMinutes * 60_000) + } + + /** + * Uploads a backup of the archive trackfile to S3. + */ + protected async backupArchiveTrackfile(): Promise { + const trackfilePath = this.archivesTrackingService.getTrackfilePath() + const lastModified = (await fsp.stat(trackfilePath)).mtime + if (!this.archiveTrackfileLastMtime || lastModified.getTime() > this.archiveTrackfileLastMtime.getTime()) { + this.logger.info('Backing up the archive trackfile...') + await this.s3ConnectionHandler.uploadFileToRemoteBucket(path.basename(trackfilePath), trackfilePath) + this.archiveTrackfileLastMtime = lastModified + } + } + + /** + * Waits until there are no compression / upload / download tasks in progress. + */ + public async noPendingTasks(): Promise { + while (!this.uploadProcessorSpawner?.isIdle || !this.syncProcessorSpawner?.isIdle || this.preparingForUpload) { + await sleep(1000) + } + } + + /** + * Starts the core processing loop divided into 4 stages: + * 1. Data integrity check: Checks the uploadQueueDir, cleaning up + * any corrupted data and re-scheduling failed uploads. + * 2. Sync stage: Downloads new objects and uploads to S3 once upload + * thresholds are reached. + * 3. Remaining uploads stage: Checks for upload triggers + * (for example, whether the data object age threshold is reached) + * and uploads any remaining data objects if needed. + * 4. Idle stage: Waits out the syncInterval. + */ + public async run(): Promise { + // Start processing loops and install triggers + this.uploadProcessorSpawner = this.startProcessorSpawner('Sync', this.syncWorkingStack, this.syncWorkersNum) + this.syncProcessorSpawner = this.startProcessorSpawner('Upload', this.uploadWorkingStack, this.uploadWorkersNum) + this.installTriggers() + + while (true) { + this.logger.info('Runnning data integrity check...') + try { + // Run data integrity check (WARNING: It assumes no there are not tasks in progress!) + await this.dataIntegrityCheck() + } catch (e) { + this.logger.error(`Data integrity check failed: ${e.toString()}`) + } + this.logger.info('Data integrity check done.') + + this.logger.info('Started syncing...') + try { + await this.performSync() + } catch (e) { + this.logger.error(`Sync failed: ${e.toString()}`) + } + // Wait until the full sync and all triggered uploads are done + // (this may take a very long time during first sync) + await this.noPendingTasks() + this.logger.info('Sync done.') + + this.logger.info('Checking for uploads to prepare...') + try { + await this.uploadIfReady() + } catch (e) { + this.logger.error(`uploadIfReady failed: ${e.toString()}`) + } + // Wait until remaining uploads are done + await this.noPendingTasks() + this.logger.info('Uploads check done.') + + this.logger.info(`All done, pausing for ${this.syncInterval} minute(s)...`) + // Wait out the syncInterval + await sleep(this.syncInterval * 60_000) + } + } + + /** + * Adds new download task to the working stack + */ + private async addDownloadTask(task: DownloadFileTask, size: number) { + this.syncQueueObjectsSize += size + await this.syncWorkingStack.add([task]) + } + + /** + * Calculates upload queue directory size + * + * @throws Error If there's some problem with file access + */ + private async getUploadDirSize(): Promise { + const uploadDirObjects = await fsp.readdir(this.uploadQueueDir, { withFileTypes: true }) + const uploadFileStats = await Promise.all( + uploadDirObjects.filter((o) => o.isFile()).map((f) => fsp.stat(path.join(this.uploadQueueDir, f.name))) + ) + return uploadFileStats.reduce((a, b) => a + b.size, 0) + } + + /** + * Runs the data synchronization workflow. + * Adds tasks to the sync working stack while trying to prevent + * exceeding the upload queue directory size limit. + * DOES NOT WAIT UNTIL ALL OF THE TASKS ARE DONE! + * + * @throws Error If there's an issue w/ file access or the query node + */ + public async performSync(): Promise { + const model = await getStorageObligationsFromRuntime(this.queryNodeApi, this.buckets) + + const assignedObjects = model.dataObjects + const added = assignedObjects.filter((obj) => !this.objectTrackingService.isTracked(obj.id)) + added.sort((a, b) => parseInt(b.id) - parseInt(a.id)) + + this.logger.info(`Sync - new objects: ${added.length}`) + + // Add new download tasks while the upload dir size limit allows + while (added.length) { + const uploadDirectorySize = await this.getUploadDirSize() + while (true) { + const object = added.pop() + if (!object) { + break + } + if (object.size + uploadDirectorySize + this.syncQueueObjectsSize > this.uploadDirSizeLimit) { + this.logger.debug( + `Waiting for some disk space to free ` + + `(upload_dir: ${uploadDirectorySize} / ${this.uploadDirSizeLimit}, ` + + `sync_q=${this.syncQueueObjectsSize}, obj_size=${object.size})... ` + ) + added.push(object) + await sleep(60_000) + break + } + const [downloadTask] = await getDownloadTasks( + model, + this.buckets, + [object], + this.uploadQueueDir, + this.tmpDownloadDir, + this.syncWorkersTimeout, + this.hostId + ) + await this.addDownloadTask(downloadTask, object.size) + } + } + } + + /** + * Runs the uploadQueueDir data integrity check, removing corrupted data + * and re-scheduling failed uploads. + * + * @throws Error In case of an upload directory access issue. + */ + private async dataIntegrityCheck(): Promise { + const uploadDirContents = await fsp.readdir(this.uploadQueueDir, { withFileTypes: true }) + for (const item of uploadDirContents) { + if (item.isFile()) { + const [name, ext1, ext2] = item.name.split('.') + // 1. If file name is an int and has no ext: We assume it's a fully downloaded data object + if (parseInt(name).toString() === name && !ext1) { + const dataObjectId = name + // 1.1. If the object is not in dataObjectsQueue: remove + if (!this.dataObjectsQueue.has(dataObjectId)) { + this.logger.error( + `Data object ${dataObjectId} found in the directory, but not in internal upload queue. Removing...` + ) + await this.tryRemovingLocalDataObject(dataObjectId) + } + // 1.2. If the object is not tracked by objectTrackingService: remove + else if (!this.objectTrackingService.isTracked(dataObjectId)) { + this.logger.error( + `Data object ${dataObjectId} found in the directory, but not in tracking service. Removing...` + ) + await this.tryRemovingLocalDataObject(dataObjectId) + } + } + // 2. If file is .7z: We assume it's a valid archive with data objects + else if (ext1 === '7z') { + if (!this.archivesTrackingService.isTracked(item.name)) { + // 2.1. If not tracked by archiveTrackingService - try to re-upload: + this.logger.warn(`Found unuploaded archive: ${item.name}. Scheduling for re-upload...`) + await this.uploadWorkingStack.add([ + new UploadArchiveFileTask( + path.join(this.uploadQueueDir, item.name), + item.name, + this.uploadQueueDir, + this.archivesTrackingService, + this.s3ConnectionHandler + ), + ]) + // 2.2. If it's already tracked by archiveTrackingService (already uploaded): remove + } else { + this.logger.warn(`Found already uploaded archive: ${item.name}. Removing...`) + await this.tryRemovingLocalFile(path.join(this.uploadQueueDir, item.name)) + } + // 3. If file is .tmp.7z: remove + } else if (ext1 === 'tmp' && ext2 === '7z') { + this.logger.warn(`Found broken archive: ${item.name}. Removing...`) + await this.tryRemovingLocalFile(path.join(this.uploadQueueDir, item.name)) + } else if (item.name !== ARCHIVES_TRACKING_FILENAME && item.name !== OBJECTS_TRACKING_FILENAME) { + this.logger.warn(`Found unrecognized file: ${item.name}`) + } + } else { + this.logger.warn(`Found unrecognized subdirectory: ${item.name}`) + } + } + } + + /** + * Discover data objects present in the uploadQueueDir and + * initialize the DataObjectsQueue with all the objects found. + * (only executed during startup) + */ + private async initDataObjectsQueue(): Promise { + this.logger.debug('Initializing data objects queue...') + const uploadDirContents = await fsp.readdir(this.uploadQueueDir, { withFileTypes: true }) + for (const item of uploadDirContents) { + if (item.isFile()) { + const [name, ext] = item.name.split('.') + // If file name is an int and has no ext: Process as new data object + if (parseInt(name).toString() === name && !ext) { + await this.processNewDataObject(item.name) + } + } + } + this.logger.debug('Done initializing data objects queue.') + } + + /** + * Initialize the ArchiveService and its child services. + * + * @throws Error In case one of the services fails to initialize. + */ + public async init(): Promise { + try { + this.logger.info('Initializing...') + await this.objectTrackingService.init() + await this.archivesTrackingService.init() + await this.initDataObjectsQueue() + this.logger.info('Done initializing.') + } catch (e) { + this.logger.error(`ArchiveService failed to initialize: ${e.toString()}`) + process.exit(1) + } + } + + /** + * Try removing a local file and log error if it fails. + */ + private async tryRemovingLocalFile(filePath: string, force = true) { + try { + await fsp.rm(filePath, { force }) + } catch (e) { + this.logger.error(`Failed to remove local file (${filePath}): ${e.toString()}`) + } + } + + /** + * Try removing a data object and all associated state. + * Log error if it fails. + */ + private async tryRemovingLocalDataObject(dataObjectId: string): Promise { + this.logger.info(`Removing object ${dataObjectId}...`) + this.dataObjectsQueue.remove(dataObjectId) + try { + await this.objectTrackingService.untrack(dataObjectId) + } catch (e) { + this.logger.error(`Failed to untrack local object ${dataObjectId}`) + } + const localObjectPath = path.join(this.uploadQueueDir, dataObjectId) + await this.tryRemovingLocalFile(localObjectPath) + } + + /** + * Process a new data object by adding it to tracking service and data objects queue. + * Log error and try to remove the object if it fails + * (it should be re-downloaded in this case) + */ + public async processNewDataObject(dataObjectId: string): Promise { + this.logger.debug(`Processing new data object: ${dataObjectId}`) + try { + await this.objectTrackingService.track(dataObjectId) + await this.dataObjectsQueue.add(dataObjectId) + } catch (e) { + this.logger.error(`ArchiveService couldn't proccess data object (${dataObjectId}): ${e.toString()}`) + this.logger.warn('Object will be removed...') + await this.tryRemovingLocalDataObject(dataObjectId) + } + } + + /** + * Check if any of the upload thresholds have been reached. + */ + public checkThresholds(): boolean { + const { + objectsCount: localObjectsCount, + totalSize: localObjectsTotalSize, + oldestObjectAgeMinutes: localObjectsMaxAge, + } = this.dataObjectsQueue + + if (localObjectsTotalSize >= this.localSizeTriggerThreshold) { + this.logger.info( + `Total objects size threshold reached (${localObjectsTotalSize} B / ${this.localSizeTriggerThreshold} B)` + ) + return true + } else if (this.localCountTriggerThreshold && localObjectsCount >= this.localCountTriggerThreshold) { + this.logger.info( + `Total objects count threshold reached (${localObjectsCount} / ${this.localCountTriggerThreshold})` + ) + return true + } else if (localObjectsMaxAge >= this.localAgeTriggerThresholdMinutes) { + this.logger.info( + `Oldest object age threshold reached (${localObjectsMaxAge}m / ${this.localAgeTriggerThresholdMinutes}m)` + ) + return true + } + return false + } + + /** + * Trigger compression & upload workflow if any of the upload thresholds + * have been reached. + */ + public async uploadIfReady(): Promise { + if (this.checkThresholds()) { + const dataObjectBatches = this.dataObjectsQueue.empty(this.archiveSizeLimit) + await this.prepareAndUploadBatches(dataObjectBatches) + } + } + + /** + * Process data object after successful download and check if any trigger + * compression & upload workflow if any of the thresholds have been reached. + */ + private async handleSuccessfulDownload(dataObjectId: string): Promise { + await this.processNewDataObject(dataObjectId) + await this.uploadIfReady() + } + + /** + * Compresses batches of data objects into 7zip archives and + * schedules the uploads to S3. + */ + public async prepareAndUploadBatches(dataObjectBatches: DataObjectData[][]): Promise { + if (!dataObjectBatches.length) { + this.logger.warn('prepareAndUploadBatches: No batches received.') + return + } + + this.preparingForUpload = true + + this.logger.info(`Preparing ${dataObjectBatches.length} batches for upload...`) + const compressionTasks: CompressFilesTask[] = [] + for (const batch of dataObjectBatches) { + const compressionTask = new CompressFilesTask( + this.uploadQueueDir, + batch.map((o) => o.id) + ) + compressionTasks.push(compressionTask) + } + + // We run compression tasks one by one, because they spawn 7zip, which uses all available threads + // by default, ie. we probably won't benefit from running multiple 7zip tasks in parallel. + this.logger.info(`Creating ${compressionTasks.length} archive file(s)...`) + const archiveFiles = [] + for (const compressionTask of compressionTasks) { + this.logger.debug(compressionTask.description()) + try { + await compressionTask.execute() + archiveFiles.push(compressionTask.getArchiveFilePath()) + } catch (e) { + this.logger.error(`Data objects compression task failed: ${e.toString()}`) + } + } + + // After collecting the archive files we add them to upload queue + const uploadFileTasks = archiveFiles.map( + (filePath) => + new UploadArchiveFileTask( + filePath, + path.basename(filePath), + this.uploadQueueDir, + this.archivesTrackingService, + this.s3ConnectionHandler + ) + ) + + if (uploadFileTasks.length) { + this.logger.info(`Scheduling ${uploadFileTasks.length} uploads to S3...`) + await this.uploadWorkingStack.add(uploadFileTasks) + } + + this.preparingForUpload = false + } +} diff --git a/storage-node/src/services/archive/SevenZipService.ts b/storage-node/src/services/archive/SevenZipService.ts new file mode 100644 index 0000000000..9a6c1a8d51 --- /dev/null +++ b/storage-node/src/services/archive/SevenZipService.ts @@ -0,0 +1,55 @@ +import path from 'path' +import internal from 'stream' +import { ChildProcessByStdio, spawn, exec } from 'child_process' +import { promisify } from 'util' +import logger from '../logger' + +const execPromise = promisify(exec) + +export class SevenZipService { + public spawnCompressionProcess( + archiveFilePath: string, + compressFilePaths: string[], + onClose: (exitCode: number) => unknown + ): ChildProcessByStdio { + const p7z = spawn( + '7z', + [ + 'a', // Create an archive + '-mx=5', // Compression level (1-9) + '-ms=on', // Enable solid mode + '-y', // Answer "yes" to any prompts (like overriding existing archive file etc.) + '-bb0', // Output error messages only + '-bd', // Disable progress indicator + archiveFilePath, // Archive file path + ...compressFilePaths, // Files to include in the archive + ], + { + // Ignore stdin and stdout, pipe stderr + stdio: ['ignore', 'ignore', 'pipe'], + } + ) + p7z.stderr.on('data', (data) => { + logger.error(`7zip stderr: ${data}`) + }) + p7z.on('error', (error) => { + logger.error(`7zip spawn error: ${error.toString()}`) + }) + // Close will be emitted even if there was an error + p7z.on('close', onClose) + return p7z + } + + public async listFiles(archiveFilePath: string): Promise { + try { + const { stdout } = await execPromise(`7z l -ba ${archiveFilePath} | awk '{print $NF}'`) + const files = stdout + .trim() + .split('\n') + .map((o) => path.basename(o.trim())) + return files + } catch (e) { + throw new Error(`Cannot list archive files in ${archiveFilePath}: ${e.toString()}`) + } + } +} diff --git a/storage-node/src/services/archive/tasks.ts b/storage-node/src/services/archive/tasks.ts new file mode 100644 index 0000000000..28a6a3a5b6 --- /dev/null +++ b/storage-node/src/services/archive/tasks.ts @@ -0,0 +1,126 @@ +import { promises as fsp } from 'fs' +import { Task } from '../processing/workingProcess' +import _ from 'lodash' +import path from 'path' +import logger from '../../services/logger' +import { blake2AsHex } from '@polkadot/util-crypto' +import { IConnectionHandler } from '../s3/IConnectionHandler' +import { SevenZipService } from './SevenZipService' +import { ArchivesTrackingService } from './tracking' + +/** + * Compresses provided files into a 7zip archive and removes them. + */ +export class CompressFilesTask implements Task { + private dataObjectPaths: string[] + private archiveFileName: string + private tmpArchiveFilePath: string + private archiveFilePath: string + private _7z: SevenZipService + + constructor(private uploadsDirectory: string, private dataObjectIds: string[]) { + this.archiveFileName = blake2AsHex(_.sortBy(this.dataObjectIds, (id) => parseInt(id)).join(',')).substring(2) + this.tmpArchiveFilePath = path.join(this.uploadsDirectory, `${this.archiveFileName}.tmp.7z`) + this.archiveFilePath = path.join(this.uploadsDirectory, `${this.archiveFileName}.7z`) + this.dataObjectPaths = dataObjectIds.map((id) => path.join(uploadsDirectory, id)) + this._7z = new SevenZipService() + } + + public description(): string { + return `Compressing data objects: (${this.dataObjectIds.join(', ')})...` + } + + public getArchiveFilePath(): string { + return this.archiveFilePath + } + + private async verifyAndMoveArchive(): Promise { + try { + await fsp.access(this.tmpArchiveFilePath, fsp.constants.W_OK | fsp.constants.R_OK) + } catch (e) { + throw new Error(`7z archive access error: ${e.toString()}`) + } + + const packedObjects = await this._7z.listFiles(this.tmpArchiveFilePath) + if (_.difference(this.dataObjectIds, packedObjects).length) { + throw new Error(`7z archive is missing some files`) + } + + try { + await fsp.rename(this.tmpArchiveFilePath, this.archiveFilePath) + } catch (e) { + throw new Error(`Cannot rename ${this.tmpArchiveFilePath} to ${this.archiveFilePath}: ${e.toString()}`) + } + } + + private async clenaup(): Promise { + // Remove packed objects from uploadsDir + try { + await Promise.all(this.dataObjectPaths.map((p) => fsp.rm(p))) + } catch (e) { + logger.error(`Couldn't fully cleanup files after compression: ${e.toString()}`) + } + } + + public async execute(): Promise { + return new Promise((resolve, reject) => { + this._7z.spawnCompressionProcess(this.tmpArchiveFilePath, this.dataObjectPaths, (exitCode) => { + if (exitCode === 0) { + this.verifyAndMoveArchive() + .then(() => this.clenaup()) + .then(() => resolve()) + .catch((e) => reject(Error(`Compression task failed: ${e.toString()}`))) + } else { + reject(Error(`Compression task failed: 7z process failed with exit code: ${exitCode || 'null'}`)) + } + }) + }) + } +} + +/** + * Uploads a specified file to S3. + */ +export class UploadArchiveFileTask implements Task { + private _7z: SevenZipService + + constructor( + private archiveFilePath: string, + private objectKey: string, + private uploadsDirectory: string, + private archivesTrackingService: ArchivesTrackingService, + private connectionHandler: IConnectionHandler + ) { + this._7z = new SevenZipService() + } + + public description(): string { + return `Uploading ${this.archiveFilePath} to S3 (key: ${this.objectKey})...` + } + + public async getPackedFiles(): Promise { + const packedFiles = await this._7z.listFiles(this.archiveFilePath) + return packedFiles + } + + public async cleanup(dataObjectIds: string[]): Promise { + const paths = [this.archiveFilePath, ...dataObjectIds.map((id) => path.join(this.uploadsDirectory, id))] + try { + await Promise.all(paths.map((p) => fsp.rm(p, { force: true }))) + } catch (e) { + logger.error(`Upload task cleanup failed: ${e.toString()}`) + } + } + + public async execute(): Promise { + const dataObjectIds = await this.getPackedFiles() + try { + await this.connectionHandler.uploadFileToRemoteBucket(this.objectKey, this.archiveFilePath) + await this.archivesTrackingService.track({ name: this.objectKey, dataObjectIds: dataObjectIds }) + logger.info(`${this.archiveFilePath} successfully uploaded to S3!`) + } catch (e) { + logger.error(`Upload job failed for ${this.archiveFilePath}: ${e.toString()}`) + } + await this.cleanup(dataObjectIds) + } +} diff --git a/storage-node/src/services/archive/tracking.ts b/storage-node/src/services/archive/tracking.ts new file mode 100644 index 0000000000..436e295641 --- /dev/null +++ b/storage-node/src/services/archive/tracking.ts @@ -0,0 +1,162 @@ +import path from 'path' +import { createReadStream, promises as fsp } from 'fs' +import lockfile from 'proper-lockfile' +import readline from 'node:readline/promises' + +export const OBJECTS_TRACKING_FILENAME = 'objects_trackfile' +export const ARCHIVES_TRACKING_FILENAME = 'archives_trackfile.jsonl' + +abstract class TrackfileService { + protected abstract trackfilePath: string + + protected async acquireLock(): Promise<() => Promise> { + return lockfile.lock(this.trackfilePath, { + // Retry timeout formula is: + // Math.min(minTimeout * Math.pow(factor, attempt), maxTimeout) + // Source: https://www.npmjs.com/package/retry + retries: { + minTimeout: 10, + maxTimeout: 100, + factor: 1.5, + retries: 10, + }, + }) + } + + protected async withLock(func: () => Promise): Promise { + const release = await this.acquireLock() + const result = await func() + await release() + return result + } + + protected abstract load(): Promise + public async init(): Promise { + // Create tracking file if it doesn't exist + const fp = await fsp.open(this.trackfilePath, 'a') + await fp.close() + await this.load() + } +} + +type TrackedArchive = { name: string; dataObjectIds: string[] } + +export class ArchivesTrackingService extends TrackfileService { + protected trackfilePath: string + protected trackedArchiveNames: Set | undefined + + constructor(private directory: string) { + super() + this.trackfilePath = path.join(this.directory, ARCHIVES_TRACKING_FILENAME) + } + + public getTrackfilePath(): string { + return this.trackfilePath + } + + public getTrackedArchiveNames(): Set { + if (!this.trackedArchiveNames) { + throw new Error('Tracked archives not initialized!') + } + return this.trackedArchiveNames + } + + public isTracked(archiveName: string): boolean { + return this.getTrackedArchiveNames().has(archiveName) + } + + public async track(archive: TrackedArchive): Promise { + if (this.isTracked(archive.name)) { + return + } + await this.withLock(async () => { + await fsp.appendFile(this.trackfilePath, JSON.stringify(archive) + '\n') + this.getTrackedArchiveNames().add(archive.name) + }) + } + + public async load(): Promise { + await this.withLock(async () => { + const rl = readline.createInterface({ input: createReadStream(this.trackfilePath) }) + const trackedArchiveNames = new Set() + for await (const line of rl) { + const trackedArchive: TrackedArchive = JSON.parse(line.trim()) + trackedArchiveNames.add(trackedArchive.name) + } + rl.close() + this.trackedArchiveNames = trackedArchiveNames + }) + } +} + +export class ObjectTrackingService extends TrackfileService { + protected trackfilePath: string + protected trackedObjects: Set | undefined + + constructor(private directory: string) { + super() + this.trackfilePath = path.join(this.directory, OBJECTS_TRACKING_FILENAME) + } + + public getTrackedObjects(): Set { + if (!this.trackedObjects) { + throw new Error('Tracked objects not initialized!') + } + return this.trackedObjects + } + + public isTracked(objectId: string): boolean { + return this.getTrackedObjects().has(objectId) + } + + public async track(dataObjectId: string): Promise { + if (this.isTracked(dataObjectId)) { + return + } + await this.withLock(async () => { + await fsp.appendFile(this.trackfilePath, `${dataObjectId}\n`) + this.getTrackedObjects().add(dataObjectId) + }) + } + + public async untrack(dataObjectId: string): Promise { + await this.withLock(async () => { + await fsp.appendFile(this.trackfilePath, `${dataObjectId} D\n`) + this.getTrackedObjects().delete(dataObjectId) + }) + } + + protected async load(): Promise { + await this.loadTrackedObjects() + const trackedObjects = this.getTrackedObjects() + + // Perform defragmentation of the trackfile + await this.withLock(async () => { + await fsp.rename(this.trackfilePath, `${this.trackfilePath}.old`) + + const fp = await fsp.open(this.trackfilePath, 'w') + for (const dataObjectId of trackedObjects) { + await fp.write(`${dataObjectId}\n`) + } + await fp.close() + await fsp.unlink(`${this.trackfilePath}.old`) + }) + } + + protected async loadTrackedObjects(): Promise { + await this.withLock(async () => { + const rl = readline.createInterface({ input: createReadStream(this.trackfilePath) }) + const trackedObjects = new Set() + for await (const line of rl) { + const [dataObjectId, isDeleted] = line.split(' ') + if (isDeleted) { + trackedObjects.delete(dataObjectId) + } else { + trackedObjects.add(dataObjectId) + } + } + rl.close() + this.trackedObjects = trackedObjects + }) + } +} diff --git a/storage-node/src/services/helpers/moveFile.ts b/storage-node/src/services/helpers/filesystem.ts similarity index 72% rename from storage-node/src/services/helpers/moveFile.ts rename to storage-node/src/services/helpers/filesystem.ts index 3ca58461a5..337086e704 100644 --- a/storage-node/src/services/helpers/moveFile.ts +++ b/storage-node/src/services/helpers/filesystem.ts @@ -1,4 +1,5 @@ import fs from 'fs' +import logger from '../logger' const fsPromises = fs.promises /** @@ -19,3 +20,14 @@ export async function moveFile(src: fs.PathLike, dest: fs.PathLike): Promise { + logger.info(`Creating directory ${dirName}`) + await fsPromises.mkdir(dirName, { recursive: true }) +} diff --git a/storage-node/src/services/logger.ts b/storage-node/src/services/logger.ts index 4929b675b1..2cb2d14e5e 100644 --- a/storage-node/src/services/logger.ts +++ b/storage-node/src/services/logger.ts @@ -47,7 +47,9 @@ function createDefaultLoggerOptions(): winston.LoggerOptions { const format = winston.format.combine( winston.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss:ms' }), winston.format.colorize(), - winston.format.printf((info) => `${info.timestamp} ${info.level}: ${info.message}`) + winston.format.printf( + (info) => `${info.timestamp} ${info.level}: ${info?.label ? `[${info.label}] ` : ''}${info.message}` + ) ) // Redirect all logs to the stderr diff --git a/storage-node/src/services/sync/workingProcess.ts b/storage-node/src/services/processing/workingProcess.ts similarity index 66% rename from storage-node/src/services/sync/workingProcess.ts rename to storage-node/src/services/processing/workingProcess.ts index 1fa6ce88dc..7ff3de8fa6 100644 --- a/storage-node/src/services/sync/workingProcess.ts +++ b/storage-node/src/services/processing/workingProcess.ts @@ -1,6 +1,20 @@ import sleep from 'sleep-promise' -import { SyncTask } from './tasks' -import logger from '../../services/logger' +import logger from '../logger' + +/** + * Defines a task abstraction. + */ +export interface Task { + /** + * Returns human-friendly task description. + */ + description(): string + + /** + * Performs the task. + */ + execute(): Promise +} /** * Defines task destination abstraction. @@ -11,7 +25,7 @@ export interface TaskSink { * * @param tasks tasks to add. */ - add(tasks: SyncTask[]): Promise + add(tasks: Task[]): Promise } /** @@ -23,20 +37,31 @@ export interface TaskSource { * * @returns next task or null if empty. */ - get(): Promise + get(): Promise + + /** + * Allows checking whether the source is currently empty. + * + * @returns Emptiness status + */ + isEmpty(): boolean } /** * Defines pending tasks collections. Implements LIFO semantics. */ export class WorkingStack implements TaskSink, TaskSource { - workingStack: SyncTask[] + workingStack: Task[] constructor() { this.workingStack = [] } - async get(): Promise { + isEmpty(): boolean { + return !this.workingStack.length + } + + async get(): Promise { const task = this.workingStack.pop() if (task !== undefined) { @@ -46,7 +71,7 @@ export class WorkingStack implements TaskSink, TaskSource { } } - async add(tasks: SyncTask[]): Promise { + async add(tasks: Task[]): Promise { // Avoid using: // this.workingStack.push(...tasks) // When tasks array is very large, javasctipy call stack size might @@ -69,6 +94,7 @@ export class TaskProcessor { taskSource: TaskSource exitOnCompletion: boolean sleepTime: number + isIdle: boolean | null = null constructor(taskSource: TaskSource, exitOnCompletion = true, sleepTime = 3000) { this.taskSource = taskSource @@ -85,9 +111,13 @@ export class TaskProcessor { */ async process(): Promise { while (true) { + // To prevent race condition, set isIdle to null (unknown) until + // async callback is executed after this.taskSource.get() + this.isIdle = null const task = await this.taskSource.get() if (task !== null) { + this.isIdle = false logger.debug(task.description()) try { await task.execute() @@ -96,6 +126,7 @@ export class TaskProcessor { logger.warn(`task failed: ${err.message}`) } } else { + this.isIdle = true if (this.exitOnCompletion) { return } @@ -113,9 +144,23 @@ export class TaskProcessor { export class TaskProcessorSpawner { processNumber: number taskSource: TaskSource - constructor(taskSource: TaskSource, processNumber: number) { + exitOnCompletion: boolean + processors: TaskProcessor[] + + constructor(taskSource: TaskSource, processNumber: number, exitOnCompletion = true) { this.taskSource = taskSource this.processNumber = processNumber + this.exitOnCompletion = exitOnCompletion + this.processors = [] + } + + /** + * Only returns true if: + * - taskSource is empty + * - all processors are idle + */ + get isIdle(): boolean { + return this.taskSource.isEmpty() && this.processors.every((p) => p.isIdle) } /** @@ -124,10 +169,10 @@ export class TaskProcessorSpawner { * @returns empty promise */ async process(): Promise { - const processes = [] - + const processes: Promise[] = [] for (let i = 0; i < this.processNumber; i++) { - const processor = new TaskProcessor(this.taskSource) + const processor = new TaskProcessor(this.taskSource, this.exitOnCompletion) + this.processors.push(processor) processes.push(processor.process()) } diff --git a/storage-node/src/services/queryNode/queries/queries.graphql b/storage-node/src/services/queryNode/queries/queries.graphql index 8f1d1f254e..4e2701730f 100644 --- a/storage-node/src/services/queryNode/queries/queries.graphql +++ b/storage-node/src/services/queryNode/queries/queries.graphql @@ -55,6 +55,7 @@ query getStorageBagDetails($bucketIds: [String!]) { fragment DataObjectByBagIdsDetails on StorageDataObject { id + size ipfsHash storageBag { id diff --git a/storage-node/src/services/runtime/queries.ts b/storage-node/src/services/runtime/queries.ts index 4df57842a0..56d897e507 100644 --- a/storage-node/src/services/runtime/queries.ts +++ b/storage-node/src/services/runtime/queries.ts @@ -29,3 +29,8 @@ export async function getLeadRoleAccount(api: ApiPromise): Promise { + const worker = await api.query.storageWorkingGroup.workerById(workerId) + return worker.isSome +} diff --git a/storage-node/src/services/s3/AwsConnectionHandler.ts b/storage-node/src/services/s3/AwsConnectionHandler.ts new file mode 100644 index 0000000000..e7a0b7c225 --- /dev/null +++ b/storage-node/src/services/s3/AwsConnectionHandler.ts @@ -0,0 +1,193 @@ +import { IConnectionHandler, UploadFileIfNotExistsOutput, UploadFileOutput } from './IConnectionHandler' +import { + CreateMultipartUploadCommand, + CreateMultipartUploadCommandOutput, + DeleteObjectCommand, + GetObjectCommand, + HeadObjectCommand, + ListObjectsCommand, + ListObjectsCommandInput, + PutObjectCommand, + PutObjectCommandInput, + S3Client, +} from '@aws-sdk/client-s3' +import { getSignedUrl } from '@aws-sdk/s3-request-presigner' +import { fromEnv } from '@aws-sdk/credential-provider-env' +import logger from '../logger' +import fs from 'fs' + +export type AwsConnectionHandlerParams = { + region: string + bucketName: string +} + +export class AwsConnectionHandler implements IConnectionHandler { + private client: S3Client + private bucket: string + + // Official doc at https://docs.aws.amazon.com/AmazonS3/latest/userguide/upload-objects.html: + // Upload an object in a single operation by using the AWS SDKs, REST API, or AWS CLI – With a single PUT operation, you can upload a single object up to 5 GB in size. + private multiPartThresholdGB = 5 + + constructor(opts: AwsConnectionHandlerParams) { + if (process.env.LOCALSTACK_ENABLED === 'true') { + this.client = this.constructWithLocalstack(opts) + } else { + this.client = this.constructProduction(opts) + } + this.bucket = opts.bucketName + logger.info( + `AWS connection handler initialized with bucket config ${ + process.env.LOCALSTACK_ENABLED === 'true' ? 'LOCALSTACK' : 'PRODUCTION' + }` + ) + } + + private constructProduction(opts: AwsConnectionHandlerParams): S3Client { + return new S3Client({ + credentials: fromEnv(), + region: opts.region, + }) + } + + private constructWithLocalstack(opts: AwsConnectionHandlerParams): S3Client { + return new S3Client({ + region: opts.region, + credentials: fromEnv(), + endpoint: process.env.LOCALSTACK_ENDPOINT || 'http://localhost:4566', + tls: false, + forcePathStyle: true, + }) + } + + private isSuccessfulResponse(response: any): boolean { + // Response status code info: https://docs.aws.amazon.com/AmazonS3/latest/API/ErrorResponses.html + return response.$metadata.httpStatusCode === 200 + } + + private isMultiPartNeeded(filePath: string): boolean { + const stats = fs.statSync(filePath) + const fileSizeInBytes = stats.size + return fileSizeInBytes > this.multiPartThresholdGB * 1_000_000_000 + } + + async uploadFileToRemoteBucket(key: string, filePath: string): Promise { + await this.uploadFileToAWSBucket(key, filePath) + return { + key, + filePath, + } + } + + async uploadFileToRemoteBucketIfNotExists(key: string, filePath: string): Promise { + // check if file exists at key + const fileExists = await this.checkIfFileExists(key) + // if it does, return + if (fileExists) { + return { + key, + filePath, + alreadyExists: true, + } + } + // if it doesn't, upload the file + await this.uploadFileToAWSBucket(key, filePath) + return { + key, + filePath, + alreadyExists: false, + } + } + + private async uploadFileToAWSBucket(filename: string, filePath: string): Promise { + const fileStream = fs.createReadStream(filePath) + + const input: PutObjectCommandInput = { + Bucket: this.bucket, + Key: filename, + Body: fileStream, + } + + // Uploading files to the bucket: multipart + const command = this.isMultiPartNeeded(filePath) + ? new CreateMultipartUploadCommand(input) + : new PutObjectCommand(input) + + return await this.client.send(command) + } + + async getRedirectUrlForObject(filename: string): Promise { + const input = { + Bucket: this.bucket, + Key: filename, + } + + const command = new GetObjectCommand(input) + return await getSignedUrl(this.client, command, { expiresIn: 60 * 60 }) + } + + async listFilesOnRemoteBucket(): Promise { + let listingComplete = false + let input: ListObjectsCommandInput = { + Bucket: this.bucket, + } + + const files = [] + + // the listing is paginated so we need to keep track of the marker + while (!listingComplete) { + const response = await this.client.send(new ListObjectsCommand(input)) + if (!this.isSuccessfulResponse(response)) { + throw new Error('Response unsuccessful when listing files in S3 bucket') + } + if (!response.Contents) { + throw new Error('Response contents are undefined when listing files in S3 bucket, bucket possibly empty') + } + files.push(...response.Contents.filter((file) => file.Key).map((file) => file.Key!)) + listingComplete = !response.IsTruncated + input = { + Bucket: this.bucket, + Marker: files[files.length - 1], // https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/Package/-aws-sdk-client-s3/Interface/ListObjectsCommandOutput/ + } + } + + return files + } + + async removeObject(filename: string): Promise { + const input = { + Bucket: this.bucket, + Key: filename, + } + + await this.client.send(new DeleteObjectCommand(input)) + } + + private async checkIfFileExists(filename: string): Promise { + const input = { + Bucket: this.bucket, + Key: filename, + } + + const command = new HeadObjectCommand(input) + try { + await this.client.send(command) + return true + } catch (error) { + if (error.$metadata && error.$metadata.httpStatusCode) { + switch (error.$metadata.httpStatusCode) { + case 404: + return false + case 403: + throw new Error('Insufficient permissions to check if file exists in S3 bucket') + default: + throw new Error( + `Unknown error when checking if file exists in S3 bucket: error ${error.$metadata.httpStatusCode}` + ) + } + } else { + throw new Error('Unexpected error format when checking if file exists in S3 bucket') + } + } + } +} diff --git a/storage-node/src/services/s3/IConnectionHandler.ts b/storage-node/src/services/s3/IConnectionHandler.ts new file mode 100644 index 0000000000..a98a80206c --- /dev/null +++ b/storage-node/src/services/s3/IConnectionHandler.ts @@ -0,0 +1,60 @@ +export type UploadFileOutput = { + key: string + filePath: string +} + +export type UploadFileIfNotExistsOutput = { + key: string + filePath: string + alreadyExists: boolean +} +/** + * Represents a connection handler for interacting with a remote storage unit. + * The storage unit can be a bucket in S3, a container in Azure Blob Storage, or similar concepts in other cloud storage services. + * Within this storage unit, objects are organized using keys. A key is a string that defines the location of an object + * within the storage unit. Keys use the format "/" with "/" as a delimiter to separate directories. + */ +export interface IConnectionHandler { + /** + * Asynchronously uploads an object to the storage unit. It doesn't check if the object already exists. + * @param key - The key of the object in the storage unit. + * @param filePath - The local file path of the object to upload. + * @returns A promise that resolves when the upload is complete or rejects with an error. + */ + uploadFileToRemoteBucket(key: string, filePath: string): Promise + + /** + * Asynchronously uploads an object to the storage unit if it does not exist. + * @param key - The key of the object in the storage unit. + * @param filePath - The local file path of the object to upload. + * @returns A promise that resolves when the upload is complete or rejects with an error. + */ + uploadFileToRemoteBucketIfNotExists(key: string, filePath: string): Promise + + /** + * Asynchronously retrieves a presigned URL for an object in the storage unit. + * @param key - The key of the object in the storage unit. + * @returns A promise that resolves with the presigned URL of the object (1h expiry) or rejects with an error. + */ + getRedirectUrlForObject(key: string): Promise + + // /** + // * Asynchronously retrieves an URL for an object in the storage unit. + // * @param key - The key of the object in the storage unit. + // * @returns A promise that resolves with the URL of the object or rejects with an error. + // */ + // getUrlForObject(key: string): Promise + + /** + * Asynchronously lists ALL objects in the storage unit. To be used during cache initialization only as it can be very slow. + * @returns A promise that resolves with an array of object keys or rejects with an error. + */ + listFilesOnRemoteBucket(): Promise + + /** + * Asynchronously removes an object from the storage unit. + * @param key - The key of the object to remove from the storage unit. + * @returns A promise that resolves when the removal is complete or rejects with an error. + */ + removeObject(key: string): Promise +} diff --git a/storage-node/src/services/sync/acceptPendingObjects.ts b/storage-node/src/services/sync/acceptPendingObjects.ts index cc3a74e67e..170498688d 100644 --- a/storage-node/src/services/sync/acceptPendingObjects.ts +++ b/storage-node/src/services/sync/acceptPendingObjects.ts @@ -6,7 +6,7 @@ import path from 'path' import { addDataObjectIdToCache } from '../caching/localDataObjects' import { registerNewDataObjectId } from '../caching/newUploads' import { hashFile } from '../helpers/hashing' -import { moveFile } from '../helpers/moveFile' +import { moveFile } from '../helpers/filesystem' import logger from '../logger' import { QueryNodeApi } from '../queryNode/api' import { acceptPendingDataObjectsBatch } from '../runtime/extrinsics' diff --git a/storage-node/src/services/sync/cleanupService.ts b/storage-node/src/services/sync/cleanupService.ts index f1318456c5..bfc99e54e6 100644 --- a/storage-node/src/services/sync/cleanupService.ts +++ b/storage-node/src/services/sync/cleanupService.ts @@ -8,7 +8,7 @@ import { QueryNodeApi } from '../queryNode/api' import { DataObjectDetailsFragment } from '../queryNode/generated/queries' import { DataObligations, getDataObjectsByIDs, getStorageObligationsFromRuntime } from './storageObligations' import { DeleteLocalFileTask } from './tasks' -import { TaskProcessorSpawner, WorkingStack } from './workingProcess' +import { TaskProcessorSpawner, WorkingStack } from '../processing/workingProcess' /** * The maximum allowed threshold by which the QN processor can lag behind diff --git a/storage-node/src/services/sync/storageObligations.ts b/storage-node/src/services/sync/storageObligations.ts index 86fa98ec15..b8bc4e7a4f 100644 --- a/storage-node/src/services/sync/storageObligations.ts +++ b/storage-node/src/services/sync/storageObligations.ts @@ -7,6 +7,8 @@ import { StorageBagDetailsFragment, StorageBucketDetailsFragment, } from '../queryNode/generated/queries' +import { ApiPromise } from '@polkadot/api' +import { PalletStorageStorageBucketRecord } from '@polkadot/types/lookup' /** * Defines storage provider data obligations. @@ -66,7 +68,7 @@ type Bag = { /** * Data object abstraction. */ -type DataObject = { +export type DataObject = { /** * Data object ID */ @@ -81,6 +83,11 @@ type DataObject = { * Data Object hash */ ipfsHash: string + + /** + * Data Object size + */ + size: number } /** @@ -114,6 +121,7 @@ export async function getStorageObligationsFromRuntime( })), dataObjects: assignedDataObjects.map((dataObject) => ({ id: dataObject.id, + size: parseInt(dataObject.size), bagId: dataObject.storageBag.id, ipfsHash: dataObject.ipfsHash, })), @@ -239,3 +247,32 @@ async function getAllObjectsWithPaging( return result } + +/** + * Given a list of bucket ids, constructs a list of [bucketId, operatorAddress] entries. + * Filters out buckets that are not assigned to the provided workerId. + * + * @param api - runtime API + * @param qnApi - query node API + * @param workerId - ID of the worker + * @param bucketsToServe - list of buckets to serve / construct the mapping for + * @returns [bucketId, operatorAddress] entries + */ +export async function constructBucketToAddressMapping( + api: ApiPromise, + qnApi: QueryNodeApi, + workerId: number, + bucketsToServe: number[] +): Promise<[string, string][]> { + const bucketIds = await getStorageBucketIdsByWorkerId(qnApi, workerId) + const buckets: [string, PalletStorageStorageBucketRecord][] = ( + await Promise.all( + bucketIds.map(async (bucketId) => [bucketId, await api.query.storage.storageBucketById(bucketId)] as const) + ) + ) + .filter(([bucketId]) => bucketsToServe.length === 0 || bucketsToServe.includes(parseInt(bucketId))) + .filter(([, optBucket]) => optBucket.isSome && optBucket.unwrap().operatorStatus.isStorageWorker) + .map(([bucketId, optBucket]) => [bucketId, optBucket.unwrap()]) + + return buckets.map(([bucketId, bucket]) => [bucketId, bucket.operatorStatus.asStorageWorker[1].toString()]) +} diff --git a/storage-node/src/services/sync/synchronizer.ts b/storage-node/src/services/sync/synchronizer.ts index a3298779f7..a4a7f0a409 100644 --- a/storage-node/src/services/sync/synchronizer.ts +++ b/storage-node/src/services/sync/synchronizer.ts @@ -3,7 +3,7 @@ import logger from '../../services/logger' import { QueryNodeApi } from '../queryNode/api' import { DataObligations, getStorageObligationsFromRuntime } from './storageObligations' import { DownloadFileTask } from './tasks' -import { TaskProcessorSpawner, WorkingStack } from './workingProcess' +import { TaskProcessorSpawner, WorkingStack } from '../processing/workingProcess' import _ from 'lodash' /** @@ -93,7 +93,7 @@ export async function performSync( * @param hostId - Random host UUID assigned to each node during bootstrap * @param selectedOperatorUrl - operator URL selected for syncing objects */ -async function getDownloadTasks( +export async function getDownloadTasks( dataObligations: DataObligations, ownBuckets: string[], added: DataObligations['dataObjects'], @@ -156,6 +156,7 @@ async function getDownloadTasks( selectedOperatorUrl ? [selectedOperatorUrl] : operatorUrls, dataObject.id, dataObject.ipfsHash, + dataObject.size, uploadDirectory, tempDirectory, asyncWorkersTimeout, diff --git a/storage-node/src/services/sync/tasks.ts b/storage-node/src/services/sync/tasks.ts index 0abbbb0f6d..a359bd754b 100644 --- a/storage-node/src/services/sync/tasks.ts +++ b/storage-node/src/services/sync/tasks.ts @@ -7,35 +7,23 @@ import urljoin from 'url-join' import { promisify } from 'util' import { v4 as uuidv4 } from 'uuid' import logger from '../../services/logger' -import { - addDataObjectIdToCache, - deleteDataObjectIdFromCache, - getDataObjectIdFromCache, -} from '../caching/localDataObjects' +import { deleteDataObjectIdFromCache, getDataObjectIdFromCache } from '../caching/localDataObjects' import { isNewDataObject } from '../caching/newUploads' import { hashFile } from '../helpers/hashing' -import { moveFile } from '../helpers/moveFile' +import { moveFile } from '../helpers/filesystem' +import { Task } from '../processing/workingProcess' +import { EventEmitter } from 'node:events' const fsPromises = fs.promises -/** - * Defines syncronization task abstraction. - */ -export interface SyncTask { - /** - * Returns human-friendly task description. - */ - description(): string - - /** - * Performs the task. - */ - execute(): Promise -} +export const downloadEvents = new EventEmitter<{ + 'success': [string, number] + 'fail': [string, number] +}>() /** * Deletes the file in the local storage by its name. */ -export class DeleteLocalFileTask implements SyncTask { +export class DeleteLocalFileTask implements Task { uploadsDirectory: string filename: string @@ -72,13 +60,14 @@ export class DeleteLocalFileTask implements SyncTask { /** * Download the file from the remote storage node to the local storage. */ -export class DownloadFileTask implements SyncTask { +export class DownloadFileTask implements Task { operatorUrls: string[] constructor( baseUrls: string[], private dataObjectId: string, private expectedHash: string, + private expectedSize: number, private uploadsDirectory: string, private tempDirectory: string, private downloadTimeout: number, @@ -106,14 +95,18 @@ export class DownloadFileTask implements SyncTask { const filepath = path.join(this.uploadsDirectory, this.dataObjectId) try { // Try downloading file - await this.tryDownload(chosenBaseUrl, filepath) - - // if download succeeds, break the loop - try { - await fsPromises.access(filepath, fs.constants.F_OK) - return - } catch (err) { - continue + const tempFilePath = await this.tryDownload(chosenBaseUrl) + // If download succeeds, try to move the file to uploads directory + if (tempFilePath) { + try { + await moveFile(tempFilePath, filepath) + await fsPromises.access(filepath, fs.constants.F_OK) + downloadEvents.emit('success', this.dataObjectId, this.expectedSize) + return + } catch (err) { + logger.error(`Sync - error trying to move file ${tempFilePath} to ${filepath}: ${err.toString()}`) + continue + } } } catch (err) { logger.error(`Sync - fetching data error for ${this.dataObjectId}: ${err}`, { err }) @@ -121,9 +114,10 @@ export class DownloadFileTask implements SyncTask { } logger.warn(`Sync - Failed to download ${this.dataObjectId}`) + downloadEvents.emit('fail', this.dataObjectId, this.expectedSize) } - async tryDownload(url: string, filepath: string): Promise { + async tryDownload(url: string): Promise { const streamPipeline = promisify(pipeline) // We create tempfile first to mitigate partial downloads on app (or remote node) crash. // This partial downloads will be cleaned up during the next sync iteration. @@ -153,8 +147,7 @@ export class DownloadFileTask implements SyncTask { await streamPipeline(request, fileStream) await this.verifyDownloadedFile(tempFilePath) - await moveFile(tempFilePath, filepath) - addDataObjectIdToCache(this.dataObjectId) + return tempFilePath } catch (err) { logger.warn(`Sync - fetching data error for ${url}: ${err}`, { err }) try { diff --git a/storage-node/src/services/webApi/controllers/filesApi.ts b/storage-node/src/services/webApi/controllers/filesApi.ts index 99a4cbb8e4..ffe06fcf3b 100644 --- a/storage-node/src/services/webApi/controllers/filesApi.ts +++ b/storage-node/src/services/webApi/controllers/filesApi.ts @@ -12,7 +12,7 @@ import { pinDataObjectIdToCache, unpinDataObjectIdFromCache } from '../../cachin import { parseBagId } from '../../helpers/bagTypes' import { getFileInfo, FileInfo } from '../../helpers/fileInfo' import { hashFile } from '../../helpers/hashing' -import { moveFile } from '../../helpers/moveFile' +import { moveFile } from '../../helpers/filesystem' import logger from '../../logger' import { getStorageBucketIdsByWorkerId } from '../../sync/storageObligations' import { GetFileHeadersRequestParams, GetFileRequestParams, UploadFileQueryParams } from '../types' diff --git a/yarn.lock b/yarn.lock index 2cd0b2a2ac..a48b2f4120 100644 --- a/yarn.lock +++ b/yarn.lock @@ -196,6 +196,638 @@ resolved "https://registry.npmjs.org/@arr/every/-/every-1.0.1.tgz" integrity sha512-UQFQ6SgyJ6LX42W8rHCs8KVc0JS0tzVL9ct4XYedJukskYVWTo49tNiMEK9C2HTyarbNiT/RVIRSY82vH+6sTg== +"@aws-crypto/crc32@5.2.0": + version "5.2.0" + resolved "https://registry.yarnpkg.com/@aws-crypto/crc32/-/crc32-5.2.0.tgz#cfcc22570949c98c6689cfcbd2d693d36cdae2e1" + integrity sha512-nLbCWqQNgUiwwtFsen1AdzAtvuLRsQS8rYgMuxCrdKf9kOssamGLuPwyTY9wyYblNr9+1XM8v6zoDTPPSIeANg== + dependencies: + "@aws-crypto/util" "^5.2.0" + "@aws-sdk/types" "^3.222.0" + tslib "^2.6.2" + +"@aws-crypto/crc32c@5.2.0": + version "5.2.0" + resolved "https://registry.yarnpkg.com/@aws-crypto/crc32c/-/crc32c-5.2.0.tgz#4e34aab7f419307821509a98b9b08e84e0c1917e" + integrity sha512-+iWb8qaHLYKrNvGRbiYRHSdKRWhto5XlZUEBwDjYNf+ly5SVYG6zEoYIdxvf5R3zyeP16w4PLBn3rH1xc74Rag== + dependencies: + "@aws-crypto/util" "^5.2.0" + "@aws-sdk/types" "^3.222.0" + tslib "^2.6.2" + +"@aws-crypto/sha1-browser@5.2.0": + version "5.2.0" + resolved "https://registry.yarnpkg.com/@aws-crypto/sha1-browser/-/sha1-browser-5.2.0.tgz#b0ee2d2821d3861f017e965ef3b4cb38e3b6a0f4" + integrity sha512-OH6lveCFfcDjX4dbAvCFSYUjJZjDr/3XJ3xHtjn3Oj5b9RjojQo8npoLeA/bNwkOkrSQ0wgrHzXk4tDRxGKJeg== + dependencies: + "@aws-crypto/supports-web-crypto" "^5.2.0" + "@aws-crypto/util" "^5.2.0" + "@aws-sdk/types" "^3.222.0" + "@aws-sdk/util-locate-window" "^3.0.0" + "@smithy/util-utf8" "^2.0.0" + tslib "^2.6.2" + +"@aws-crypto/sha256-browser@5.2.0": + version "5.2.0" + resolved "https://registry.yarnpkg.com/@aws-crypto/sha256-browser/-/sha256-browser-5.2.0.tgz#153895ef1dba6f9fce38af550e0ef58988eb649e" + integrity sha512-AXfN/lGotSQwu6HNcEsIASo7kWXZ5HYWvfOmSNKDsEqC4OashTp8alTmaz+F7TC2L083SFv5RdB+qU3Vs1kZqw== + dependencies: + "@aws-crypto/sha256-js" "^5.2.0" + "@aws-crypto/supports-web-crypto" "^5.2.0" + "@aws-crypto/util" "^5.2.0" + "@aws-sdk/types" "^3.222.0" + "@aws-sdk/util-locate-window" "^3.0.0" + "@smithy/util-utf8" "^2.0.0" + tslib "^2.6.2" + +"@aws-crypto/sha256-js@5.2.0", "@aws-crypto/sha256-js@^5.2.0": + version "5.2.0" + resolved "https://registry.yarnpkg.com/@aws-crypto/sha256-js/-/sha256-js-5.2.0.tgz#c4fdb773fdbed9a664fc1a95724e206cf3860042" + integrity sha512-FFQQyu7edu4ufvIZ+OadFpHHOt+eSTBaYaki44c+akjg7qZg9oOQeLlk77F6tSYqjDAFClrHJk9tMf0HdVyOvA== + dependencies: + "@aws-crypto/util" "^5.2.0" + "@aws-sdk/types" "^3.222.0" + tslib "^2.6.2" + +"@aws-crypto/supports-web-crypto@^5.2.0": + version "5.2.0" + resolved "https://registry.yarnpkg.com/@aws-crypto/supports-web-crypto/-/supports-web-crypto-5.2.0.tgz#a1e399af29269be08e695109aa15da0a07b5b5fb" + integrity sha512-iAvUotm021kM33eCdNfwIN//F77/IADDSs58i+MDaOqFrVjZo9bAal0NK7HurRuWLLpF1iLX7gbWrjHjeo+YFg== + dependencies: + tslib "^2.6.2" + +"@aws-crypto/util@^5.2.0": + version "5.2.0" + resolved "https://registry.yarnpkg.com/@aws-crypto/util/-/util-5.2.0.tgz#71284c9cffe7927ddadac793c14f14886d3876da" + integrity sha512-4RkU9EsI6ZpBve5fseQlGNUWKMa1RLPQ1dnjnQoe07ldfIzcsGb5hC5W0Dm7u423KWzawlrpbjXBrXCEv9zazQ== + dependencies: + "@aws-sdk/types" "^3.222.0" + "@smithy/util-utf8" "^2.0.0" + tslib "^2.6.2" + +"@aws-sdk/client-s3@^3.675.0": + version "3.675.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/client-s3/-/client-s3-3.675.0.tgz#1588a70afec26be3cc9a7577fa3f37d768951222" + integrity sha512-WKPc9fwFsD0SrWmrj0MdMHE+hQ0YAIGLqACmTnL1yW76qAwjIlFa9TAhR8f29aVCQodt/I6HDf9dHX/F+GyDFg== + dependencies: + "@aws-crypto/sha1-browser" "5.2.0" + "@aws-crypto/sha256-browser" "5.2.0" + "@aws-crypto/sha256-js" "5.2.0" + "@aws-sdk/client-sso-oidc" "3.675.0" + "@aws-sdk/client-sts" "3.675.0" + "@aws-sdk/core" "3.667.0" + "@aws-sdk/credential-provider-node" "3.675.0" + "@aws-sdk/middleware-bucket-endpoint" "3.667.0" + "@aws-sdk/middleware-expect-continue" "3.667.0" + "@aws-sdk/middleware-flexible-checksums" "3.669.0" + "@aws-sdk/middleware-host-header" "3.667.0" + "@aws-sdk/middleware-location-constraint" "3.667.0" + "@aws-sdk/middleware-logger" "3.667.0" + "@aws-sdk/middleware-recursion-detection" "3.667.0" + "@aws-sdk/middleware-sdk-s3" "3.674.0" + "@aws-sdk/middleware-ssec" "3.667.0" + "@aws-sdk/middleware-user-agent" "3.669.0" + "@aws-sdk/region-config-resolver" "3.667.0" + "@aws-sdk/signature-v4-multi-region" "3.674.0" + "@aws-sdk/types" "3.667.0" + "@aws-sdk/util-endpoints" "3.667.0" + "@aws-sdk/util-user-agent-browser" "3.675.0" + "@aws-sdk/util-user-agent-node" "3.669.0" + "@aws-sdk/xml-builder" "3.662.0" + "@smithy/config-resolver" "^3.0.9" + "@smithy/core" "^2.4.8" + "@smithy/eventstream-serde-browser" "^3.0.10" + "@smithy/eventstream-serde-config-resolver" "^3.0.7" + "@smithy/eventstream-serde-node" "^3.0.9" + "@smithy/fetch-http-handler" "^3.2.9" + "@smithy/hash-blob-browser" "^3.1.6" + "@smithy/hash-node" "^3.0.7" + "@smithy/hash-stream-node" "^3.1.6" + "@smithy/invalid-dependency" "^3.0.7" + "@smithy/md5-js" "^3.0.7" + "@smithy/middleware-content-length" "^3.0.9" + "@smithy/middleware-endpoint" "^3.1.4" + "@smithy/middleware-retry" "^3.0.23" + "@smithy/middleware-serde" "^3.0.7" + "@smithy/middleware-stack" "^3.0.7" + "@smithy/node-config-provider" "^3.1.8" + "@smithy/node-http-handler" "^3.2.4" + "@smithy/protocol-http" "^4.1.4" + "@smithy/smithy-client" "^3.4.0" + "@smithy/types" "^3.5.0" + "@smithy/url-parser" "^3.0.7" + "@smithy/util-base64" "^3.0.0" + "@smithy/util-body-length-browser" "^3.0.0" + "@smithy/util-body-length-node" "^3.0.0" + "@smithy/util-defaults-mode-browser" "^3.0.23" + "@smithy/util-defaults-mode-node" "^3.0.23" + "@smithy/util-endpoints" "^2.1.3" + "@smithy/util-middleware" "^3.0.7" + "@smithy/util-retry" "^3.0.7" + "@smithy/util-stream" "^3.1.9" + "@smithy/util-utf8" "^3.0.0" + "@smithy/util-waiter" "^3.1.6" + tslib "^2.6.2" + +"@aws-sdk/client-sso-oidc@3.675.0": + version "3.675.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/client-sso-oidc/-/client-sso-oidc-3.675.0.tgz#a30650a462afcf0386adb26e99283d4989b9bbf4" + integrity sha512-4kEcaa2P/BFz+xy5tagbtzM08gbjHXyYqW+n6SJuUFK7N6bZNnA4cu1hVgHcqOqk8Dbwv7fiseGT0x3Hhqjwqg== + dependencies: + "@aws-crypto/sha256-browser" "5.2.0" + "@aws-crypto/sha256-js" "5.2.0" + "@aws-sdk/core" "3.667.0" + "@aws-sdk/credential-provider-node" "3.675.0" + "@aws-sdk/middleware-host-header" "3.667.0" + "@aws-sdk/middleware-logger" "3.667.0" + "@aws-sdk/middleware-recursion-detection" "3.667.0" + "@aws-sdk/middleware-user-agent" "3.669.0" + "@aws-sdk/region-config-resolver" "3.667.0" + "@aws-sdk/types" "3.667.0" + "@aws-sdk/util-endpoints" "3.667.0" + "@aws-sdk/util-user-agent-browser" "3.675.0" + "@aws-sdk/util-user-agent-node" "3.669.0" + "@smithy/config-resolver" "^3.0.9" + "@smithy/core" "^2.4.8" + "@smithy/fetch-http-handler" "^3.2.9" + "@smithy/hash-node" "^3.0.7" + "@smithy/invalid-dependency" "^3.0.7" + "@smithy/middleware-content-length" "^3.0.9" + "@smithy/middleware-endpoint" "^3.1.4" + "@smithy/middleware-retry" "^3.0.23" + "@smithy/middleware-serde" "^3.0.7" + "@smithy/middleware-stack" "^3.0.7" + "@smithy/node-config-provider" "^3.1.8" + "@smithy/node-http-handler" "^3.2.4" + "@smithy/protocol-http" "^4.1.4" + "@smithy/smithy-client" "^3.4.0" + "@smithy/types" "^3.5.0" + "@smithy/url-parser" "^3.0.7" + "@smithy/util-base64" "^3.0.0" + "@smithy/util-body-length-browser" "^3.0.0" + "@smithy/util-body-length-node" "^3.0.0" + "@smithy/util-defaults-mode-browser" "^3.0.23" + "@smithy/util-defaults-mode-node" "^3.0.23" + "@smithy/util-endpoints" "^2.1.3" + "@smithy/util-middleware" "^3.0.7" + "@smithy/util-retry" "^3.0.7" + "@smithy/util-utf8" "^3.0.0" + tslib "^2.6.2" + +"@aws-sdk/client-sso@3.675.0": + version "3.675.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/client-sso/-/client-sso-3.675.0.tgz#4e400ef0141ee2e19b64c9948af7a27697a3f0cc" + integrity sha512-2goBCEr4acZJ1YJ69eWPTsIfZUbO7enog+lBA5kZShDiwovqzwYSHSlf6OGz4ETs2xT1n7n+QfKY0p+TluTfEw== + dependencies: + "@aws-crypto/sha256-browser" "5.2.0" + "@aws-crypto/sha256-js" "5.2.0" + "@aws-sdk/core" "3.667.0" + "@aws-sdk/middleware-host-header" "3.667.0" + "@aws-sdk/middleware-logger" "3.667.0" + "@aws-sdk/middleware-recursion-detection" "3.667.0" + "@aws-sdk/middleware-user-agent" "3.669.0" + "@aws-sdk/region-config-resolver" "3.667.0" + "@aws-sdk/types" "3.667.0" + "@aws-sdk/util-endpoints" "3.667.0" + "@aws-sdk/util-user-agent-browser" "3.675.0" + "@aws-sdk/util-user-agent-node" "3.669.0" + "@smithy/config-resolver" "^3.0.9" + "@smithy/core" "^2.4.8" + "@smithy/fetch-http-handler" "^3.2.9" + "@smithy/hash-node" "^3.0.7" + "@smithy/invalid-dependency" "^3.0.7" + "@smithy/middleware-content-length" "^3.0.9" + "@smithy/middleware-endpoint" "^3.1.4" + "@smithy/middleware-retry" "^3.0.23" + "@smithy/middleware-serde" "^3.0.7" + "@smithy/middleware-stack" "^3.0.7" + "@smithy/node-config-provider" "^3.1.8" + "@smithy/node-http-handler" "^3.2.4" + "@smithy/protocol-http" "^4.1.4" + "@smithy/smithy-client" "^3.4.0" + "@smithy/types" "^3.5.0" + "@smithy/url-parser" "^3.0.7" + "@smithy/util-base64" "^3.0.0" + "@smithy/util-body-length-browser" "^3.0.0" + "@smithy/util-body-length-node" "^3.0.0" + "@smithy/util-defaults-mode-browser" "^3.0.23" + "@smithy/util-defaults-mode-node" "^3.0.23" + "@smithy/util-endpoints" "^2.1.3" + "@smithy/util-middleware" "^3.0.7" + "@smithy/util-retry" "^3.0.7" + "@smithy/util-utf8" "^3.0.0" + tslib "^2.6.2" + +"@aws-sdk/client-sts@3.675.0": + version "3.675.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/client-sts/-/client-sts-3.675.0.tgz#8efcff1270d1f10e7dafa469f88fb71dcfd70178" + integrity sha512-zgjyR4GyuONeDGJBKNt9lFJ8HfDX7rpxZZVR7LSXr9lUkjf6vUGgD2k/K4UAoOTWCKKCor6TA562ezGlA8su6Q== + dependencies: + "@aws-crypto/sha256-browser" "5.2.0" + "@aws-crypto/sha256-js" "5.2.0" + "@aws-sdk/client-sso-oidc" "3.675.0" + "@aws-sdk/core" "3.667.0" + "@aws-sdk/credential-provider-node" "3.675.0" + "@aws-sdk/middleware-host-header" "3.667.0" + "@aws-sdk/middleware-logger" "3.667.0" + "@aws-sdk/middleware-recursion-detection" "3.667.0" + "@aws-sdk/middleware-user-agent" "3.669.0" + "@aws-sdk/region-config-resolver" "3.667.0" + "@aws-sdk/types" "3.667.0" + "@aws-sdk/util-endpoints" "3.667.0" + "@aws-sdk/util-user-agent-browser" "3.675.0" + "@aws-sdk/util-user-agent-node" "3.669.0" + "@smithy/config-resolver" "^3.0.9" + "@smithy/core" "^2.4.8" + "@smithy/fetch-http-handler" "^3.2.9" + "@smithy/hash-node" "^3.0.7" + "@smithy/invalid-dependency" "^3.0.7" + "@smithy/middleware-content-length" "^3.0.9" + "@smithy/middleware-endpoint" "^3.1.4" + "@smithy/middleware-retry" "^3.0.23" + "@smithy/middleware-serde" "^3.0.7" + "@smithy/middleware-stack" "^3.0.7" + "@smithy/node-config-provider" "^3.1.8" + "@smithy/node-http-handler" "^3.2.4" + "@smithy/protocol-http" "^4.1.4" + "@smithy/smithy-client" "^3.4.0" + "@smithy/types" "^3.5.0" + "@smithy/url-parser" "^3.0.7" + "@smithy/util-base64" "^3.0.0" + "@smithy/util-body-length-browser" "^3.0.0" + "@smithy/util-body-length-node" "^3.0.0" + "@smithy/util-defaults-mode-browser" "^3.0.23" + "@smithy/util-defaults-mode-node" "^3.0.23" + "@smithy/util-endpoints" "^2.1.3" + "@smithy/util-middleware" "^3.0.7" + "@smithy/util-retry" "^3.0.7" + "@smithy/util-utf8" "^3.0.0" + tslib "^2.6.2" + +"@aws-sdk/core@3.667.0": + version "3.667.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/core/-/core-3.667.0.tgz#ecf93bf8e3ebea3bd972576a67b87dd291d7a90a" + integrity sha512-pMcDVI7Tmdsc8R3sDv0Omj/4iRParGY+uJtAfF669WnZfDfaBQaix2Mq7+Mu08vdjqO9K3gicFvjk9S1VLmOKA== + dependencies: + "@aws-sdk/types" "3.667.0" + "@smithy/core" "^2.4.8" + "@smithy/node-config-provider" "^3.1.8" + "@smithy/property-provider" "^3.1.7" + "@smithy/protocol-http" "^4.1.4" + "@smithy/signature-v4" "^4.2.0" + "@smithy/smithy-client" "^3.4.0" + "@smithy/types" "^3.5.0" + "@smithy/util-middleware" "^3.0.7" + fast-xml-parser "4.4.1" + tslib "^2.6.2" + +"@aws-sdk/credential-provider-env@3.667.0": + version "3.667.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/credential-provider-env/-/credential-provider-env-3.667.0.tgz#1b3a4b049fc164a3a3eb3617f7448fed3cb3a2db" + integrity sha512-zZbrkkaPc54WXm+QAnpuv0LPNfsts0HPPd+oCECGs7IQRaFsGj187cwvPg9RMWDFZqpm64MdBDoA8OQHsqzYCw== + dependencies: + "@aws-sdk/core" "3.667.0" + "@aws-sdk/types" "3.667.0" + "@smithy/property-provider" "^3.1.7" + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@aws-sdk/credential-provider-http@3.667.0": + version "3.667.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/credential-provider-http/-/credential-provider-http-3.667.0.tgz#ff78b7f76715a7456976930bff6221dfac70afbc" + integrity sha512-sjtybFfERZWiqTY7fswBxKQLvUkiCucOWyqh3IaPo/4nE1PXRnaZCVG0+kRBPrYIxWqiVwytvZzMJy8sVZcG0A== + dependencies: + "@aws-sdk/core" "3.667.0" + "@aws-sdk/types" "3.667.0" + "@smithy/fetch-http-handler" "^3.2.9" + "@smithy/node-http-handler" "^3.2.4" + "@smithy/property-provider" "^3.1.7" + "@smithy/protocol-http" "^4.1.4" + "@smithy/smithy-client" "^3.4.0" + "@smithy/types" "^3.5.0" + "@smithy/util-stream" "^3.1.9" + tslib "^2.6.2" + +"@aws-sdk/credential-provider-ini@3.675.0": + version "3.675.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/credential-provider-ini/-/credential-provider-ini-3.675.0.tgz#031b75d26ab8e2921c8945a905f6ca7c2005e15e" + integrity sha512-kCBlC6grpbpCvgowk9T4JHZxJ88VfN0r77bDZClcadFRAKQ8UHyO02zhgFCfUdnU1lNv1mr3ngEcGN7XzJlYWA== + dependencies: + "@aws-sdk/core" "3.667.0" + "@aws-sdk/credential-provider-env" "3.667.0" + "@aws-sdk/credential-provider-http" "3.667.0" + "@aws-sdk/credential-provider-process" "3.667.0" + "@aws-sdk/credential-provider-sso" "3.675.0" + "@aws-sdk/credential-provider-web-identity" "3.667.0" + "@aws-sdk/types" "3.667.0" + "@smithy/credential-provider-imds" "^3.2.4" + "@smithy/property-provider" "^3.1.7" + "@smithy/shared-ini-file-loader" "^3.1.8" + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@aws-sdk/credential-provider-node@3.675.0": + version "3.675.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/credential-provider-node/-/credential-provider-node-3.675.0.tgz#25ebe731279dbc1f165e2fb5f7648bae43b7c693" + integrity sha512-VO1WVZCDmAYu4sY/6qIBzdm5vJTxLhWKJWvL5kVFfSe8WiNNoHlTqYYUK9vAm/JYpIgFLTefPbIc5W4MK7o6Pg== + dependencies: + "@aws-sdk/credential-provider-env" "3.667.0" + "@aws-sdk/credential-provider-http" "3.667.0" + "@aws-sdk/credential-provider-ini" "3.675.0" + "@aws-sdk/credential-provider-process" "3.667.0" + "@aws-sdk/credential-provider-sso" "3.675.0" + "@aws-sdk/credential-provider-web-identity" "3.667.0" + "@aws-sdk/types" "3.667.0" + "@smithy/credential-provider-imds" "^3.2.4" + "@smithy/property-provider" "^3.1.7" + "@smithy/shared-ini-file-loader" "^3.1.8" + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@aws-sdk/credential-provider-process@3.667.0": + version "3.667.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/credential-provider-process/-/credential-provider-process-3.667.0.tgz#fa721b1b5b0024156c3852a9fc92c0ed9935959f" + integrity sha512-HZHnvop32fKgsNHkdhVaul7UzQ25sEc0j9yqA4bjhtbk0ECl42kj3f1pJ+ZU/YD9ut8lMJs/vVqiOdNThVdeBw== + dependencies: + "@aws-sdk/core" "3.667.0" + "@aws-sdk/types" "3.667.0" + "@smithy/property-provider" "^3.1.7" + "@smithy/shared-ini-file-loader" "^3.1.8" + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@aws-sdk/credential-provider-sso@3.675.0": + version "3.675.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/credential-provider-sso/-/credential-provider-sso-3.675.0.tgz#d9bf80e25cd7756e959747804484340071ac3e83" + integrity sha512-p/EE2c0ebSgRhg1Fe1OH2+xNl7j1P4DTc7kZy1mX1NJ72fkqnGgBuf1vk5J9RmiRpbauPNMlm+xohjkGS7iodA== + dependencies: + "@aws-sdk/client-sso" "3.675.0" + "@aws-sdk/core" "3.667.0" + "@aws-sdk/token-providers" "3.667.0" + "@aws-sdk/types" "3.667.0" + "@smithy/property-provider" "^3.1.7" + "@smithy/shared-ini-file-loader" "^3.1.8" + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@aws-sdk/credential-provider-web-identity@3.667.0": + version "3.667.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/credential-provider-web-identity/-/credential-provider-web-identity-3.667.0.tgz#439e3aa2fc9a081de53186f6d8aa78a8a6913769" + integrity sha512-t8CFlZMD/1p/8Cli3rvRiTJpjr/8BO64gw166AHgFZYSN2h95L2l1tcW0jpsc3PprA32nLg1iQVKYt4WGM4ugw== + dependencies: + "@aws-sdk/core" "3.667.0" + "@aws-sdk/types" "3.667.0" + "@smithy/property-provider" "^3.1.7" + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@aws-sdk/middleware-bucket-endpoint@3.667.0": + version "3.667.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/middleware-bucket-endpoint/-/middleware-bucket-endpoint-3.667.0.tgz#bd0a0a24f71d3709debf6e48f4e503547398e7eb" + integrity sha512-XGz4jMAkDoTyFdtLz7ZF+C05IAhCTC1PllpvTBaj821z/L0ilhbqVhrT/f2Buw8Id/K5A390csGXgusXyrFFjA== + dependencies: + "@aws-sdk/types" "3.667.0" + "@aws-sdk/util-arn-parser" "3.568.0" + "@smithy/node-config-provider" "^3.1.8" + "@smithy/protocol-http" "^4.1.4" + "@smithy/types" "^3.5.0" + "@smithy/util-config-provider" "^3.0.0" + tslib "^2.6.2" + +"@aws-sdk/middleware-expect-continue@3.667.0": + version "3.667.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/middleware-expect-continue/-/middleware-expect-continue-3.667.0.tgz#d1b9e4871c8bde3402bdd0f73e740f5f5bf190d7" + integrity sha512-0TiSL9S5DSG95NHGIz6qTMuV7GDKVn8tvvGSrSSZu/wXO3JaYSH0AElVpYfc4PtPRqVpEyNA7nnc7W56mMCLWQ== + dependencies: + "@aws-sdk/types" "3.667.0" + "@smithy/protocol-http" "^4.1.4" + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@aws-sdk/middleware-flexible-checksums@3.669.0": + version "3.669.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/middleware-flexible-checksums/-/middleware-flexible-checksums-3.669.0.tgz#00566b4981a6b35d007815d05a5d0d3ee4f9e265" + integrity sha512-01UQLoUzVwWMf+b+AEuwJ2lluBD+Cp8AcbyEHqvEaPdjGKHIS4BCvnY70mZYnAfRtL8R2h9tt7iI61oWU3Gjkg== + dependencies: + "@aws-crypto/crc32" "5.2.0" + "@aws-crypto/crc32c" "5.2.0" + "@aws-sdk/core" "3.667.0" + "@aws-sdk/types" "3.667.0" + "@smithy/is-array-buffer" "^3.0.0" + "@smithy/node-config-provider" "^3.1.8" + "@smithy/protocol-http" "^4.1.4" + "@smithy/types" "^3.5.0" + "@smithy/util-middleware" "^3.0.7" + "@smithy/util-utf8" "^3.0.0" + tslib "^2.6.2" + +"@aws-sdk/middleware-host-header@3.667.0": + version "3.667.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/middleware-host-header/-/middleware-host-header-3.667.0.tgz#d255aa6e73aec9a2d1a241de737679b6d2723c3f" + integrity sha512-Z7fIAMQnPegs7JjAQvlOeWXwpMRfegh5eCoIP6VLJIeR6DLfYKbP35JBtt98R6DXslrN2RsbTogjbxPEDQfw1w== + dependencies: + "@aws-sdk/types" "3.667.0" + "@smithy/protocol-http" "^4.1.4" + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@aws-sdk/middleware-location-constraint@3.667.0": + version "3.667.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/middleware-location-constraint/-/middleware-location-constraint-3.667.0.tgz#e5da0580656a1a385fd5783bb93ea320b4baeb1b" + integrity sha512-ob85H3HhT3/u5O+x0o557xGZ78vSNeSSwMaSitxdsfs2hOuoUl1uk+OeLpi1hkuJnL41FPpokV7TVII2XrFfmg== + dependencies: + "@aws-sdk/types" "3.667.0" + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@aws-sdk/middleware-logger@3.667.0": + version "3.667.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/middleware-logger/-/middleware-logger-3.667.0.tgz#bf072a1aa5b03239e20d75f9b525d8a990caf29f" + integrity sha512-PtTRNpNm/5c746jRgZCNg4X9xEJIwggkGJrF0GP9AB1ANg4pc/sF2Fvn1NtqPe9wtQ2stunJprnm5WkCHN7QiA== + dependencies: + "@aws-sdk/types" "3.667.0" + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@aws-sdk/middleware-recursion-detection@3.667.0": + version "3.667.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/middleware-recursion-detection/-/middleware-recursion-detection-3.667.0.tgz#e3f158d5b5ea1b1d73ab280c0cbe5ef077ed3fdc" + integrity sha512-U5glWD3ehFohzpUpopLtmqAlDurGWo2wRGPNgi4SwhWU7UDt6LS7E/UvJjqC0CUrjlzOw+my2A+Ncf+fisMhxQ== + dependencies: + "@aws-sdk/types" "3.667.0" + "@smithy/protocol-http" "^4.1.4" + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@aws-sdk/middleware-sdk-s3@3.674.0": + version "3.674.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/middleware-sdk-s3/-/middleware-sdk-s3-3.674.0.tgz#ed80913d38ada26ce7ad184cbb77892f5b29ef99" + integrity sha512-IvXnWrKy4mO+I44kLYHd6Wlw+FdB4sg1jvHCmnZo1KNaAFIA3x1iXgOaZynKoBdEmol3xfr2uDbeXUQvIwoIgg== + dependencies: + "@aws-sdk/core" "3.667.0" + "@aws-sdk/types" "3.667.0" + "@aws-sdk/util-arn-parser" "3.568.0" + "@smithy/core" "^2.4.8" + "@smithy/node-config-provider" "^3.1.8" + "@smithy/protocol-http" "^4.1.4" + "@smithy/signature-v4" "^4.2.0" + "@smithy/smithy-client" "^3.4.0" + "@smithy/types" "^3.5.0" + "@smithy/util-config-provider" "^3.0.0" + "@smithy/util-middleware" "^3.0.7" + "@smithy/util-stream" "^3.1.9" + "@smithy/util-utf8" "^3.0.0" + tslib "^2.6.2" + +"@aws-sdk/middleware-ssec@3.667.0": + version "3.667.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/middleware-ssec/-/middleware-ssec-3.667.0.tgz#19d510e4882c170eff33a5ced558781eee0ee716" + integrity sha512-1wuAUZIkmZIvOmGg5qNQU821CGFHhkuKioxXgNh0DpUxZ9+AeiV7yorJr+bqkb2KBFv1i1TnzGRecvKf/KvZIQ== + dependencies: + "@aws-sdk/types" "3.667.0" + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@aws-sdk/middleware-user-agent@3.669.0": + version "3.669.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/middleware-user-agent/-/middleware-user-agent-3.669.0.tgz#a313a4f1fcc9cc77eef3e04573ce0edade931a26" + integrity sha512-K8ScPi45zjJrj5Y2gRqVsvKKQCQbvQBfYGcBw9ZOx9TTavH80bOCBjWg/GFnvs4f37tqVc1wMN2oGvcTF6HveQ== + dependencies: + "@aws-sdk/core" "3.667.0" + "@aws-sdk/types" "3.667.0" + "@aws-sdk/util-endpoints" "3.667.0" + "@smithy/core" "^2.4.8" + "@smithy/protocol-http" "^4.1.4" + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@aws-sdk/region-config-resolver@3.667.0": + version "3.667.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/region-config-resolver/-/region-config-resolver-3.667.0.tgz#1804103246e6b6c7586edc57d26801647d2972d8" + integrity sha512-iNr+JhhA902JMKHG9IwT9YdaEx6KGl6vjAL5BRNeOjfj4cZYMog6Lz/IlfOAltMtT0w88DAHDEFrBd2uO0l2eg== + dependencies: + "@aws-sdk/types" "3.667.0" + "@smithy/node-config-provider" "^3.1.8" + "@smithy/types" "^3.5.0" + "@smithy/util-config-provider" "^3.0.0" + "@smithy/util-middleware" "^3.0.7" + tslib "^2.6.2" + +"@aws-sdk/s3-request-presigner@^3.675.0": + version "3.675.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/s3-request-presigner/-/s3-request-presigner-3.675.0.tgz#0019c3a6b1405e5bca9c933e949bfa80ee7c19d5" + integrity sha512-/2KWrFjB2FWTKV8nKK1gbufY1IX9GZy4yXVVKjdLxMpM0O6JIg79S0KGvkEZtCZW4SKen0sExsCU5Dsc1RMfwA== + dependencies: + "@aws-sdk/signature-v4-multi-region" "3.674.0" + "@aws-sdk/types" "3.667.0" + "@aws-sdk/util-format-url" "3.667.0" + "@smithy/middleware-endpoint" "^3.1.4" + "@smithy/protocol-http" "^4.1.4" + "@smithy/smithy-client" "^3.4.0" + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@aws-sdk/signature-v4-multi-region@3.674.0": + version "3.674.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/signature-v4-multi-region/-/signature-v4-multi-region-3.674.0.tgz#03e37865cd09bed5b047d2b80457ed26e41101bb" + integrity sha512-VMQWbtcbg4FV/fILrODADV21pPg9AghuEzQlW2kH0hCtacvBwFl7eBxIiCBLLtkNple+CVPJvyBcqOZdBkEv/w== + dependencies: + "@aws-sdk/middleware-sdk-s3" "3.674.0" + "@aws-sdk/types" "3.667.0" + "@smithy/protocol-http" "^4.1.4" + "@smithy/signature-v4" "^4.2.0" + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@aws-sdk/token-providers@3.667.0": + version "3.667.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/token-providers/-/token-providers-3.667.0.tgz#ea990ef364d6bd75f0ebcf19a22f9ccd0edb3c41" + integrity sha512-ZecJlG8p6D4UTYlBHwOWX6nknVtw/OBJ3yPXTSajBjhUlj9lE2xvejI8gl4rqkyLXk7z3bki+KR4tATbMaM9yg== + dependencies: + "@aws-sdk/types" "3.667.0" + "@smithy/property-provider" "^3.1.7" + "@smithy/shared-ini-file-loader" "^3.1.8" + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@aws-sdk/types@3.667.0": + version "3.667.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/types/-/types-3.667.0.tgz#1b307c5af5a029ea1893f799fcfa122988f9d025" + integrity sha512-gYq0xCsqFfQaSL/yT1Gl1vIUjtsg7d7RhnUfsXaHt8xTxOKRTdH9GjbesBjXOzgOvB0W0vfssfreSNGFlOOMJg== + dependencies: + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@aws-sdk/types@^3.222.0": + version "3.664.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/types/-/types-3.664.0.tgz#e6de1c0a2cdfe4f1e43271223dc0b55e613ced58" + integrity sha512-+GtXktvVgpreM2b+NJL9OqZGsOzHwlCUrO8jgQUvH/yA6Kd8QO2YFhQCp0C9sSzTteZJVqGBu8E0CQurxJHPbw== + dependencies: + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@aws-sdk/util-arn-parser@3.568.0": + version "3.568.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/util-arn-parser/-/util-arn-parser-3.568.0.tgz#6a19a8c6bbaa520b6be1c278b2b8c17875b91527" + integrity sha512-XUKJWWo+KOB7fbnPP0+g/o5Ulku/X53t7i/h+sPHr5xxYTJJ9CYnbToo95mzxe7xWvkLrsNtJ8L+MnNn9INs2w== + dependencies: + tslib "^2.6.2" + +"@aws-sdk/util-endpoints@3.667.0": + version "3.667.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/util-endpoints/-/util-endpoints-3.667.0.tgz#c880fbc3bda5a11eec81e4ac5f95a256f8dbb24e" + integrity sha512-X22SYDAuQJWnkF1/q17pkX3nGw5XMD9YEUbmt87vUnRq7iyJ3JOpl6UKOBeUBaL838wA5yzdbinmCITJ/VZ1QA== + dependencies: + "@aws-sdk/types" "3.667.0" + "@smithy/types" "^3.5.0" + "@smithy/util-endpoints" "^2.1.3" + tslib "^2.6.2" + +"@aws-sdk/util-format-url@3.667.0": + version "3.667.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/util-format-url/-/util-format-url-3.667.0.tgz#0a10db1697d4427abd390d12052e59b295750b2f" + integrity sha512-S0D731SnEPnTfbJ/Dldw5dDrOc8uipK6NLXHDs2xIq0t61iwZLMEiN8yWCs2wAZVVJKpldUM1THLaaufU9SSSA== + dependencies: + "@aws-sdk/types" "3.667.0" + "@smithy/querystring-builder" "^3.0.7" + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@aws-sdk/util-locate-window@^3.0.0": + version "3.568.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/util-locate-window/-/util-locate-window-3.568.0.tgz#2acc4b2236af0d7494f7e517401ba6b3c4af11ff" + integrity sha512-3nh4TINkXYr+H41QaPelCceEB2FXP3fxp93YZXB/kqJvX0U9j0N0Uk45gvsjmEPzG8XxkPEeLIfT2I1M7A6Lig== + dependencies: + tslib "^2.6.2" + +"@aws-sdk/util-user-agent-browser@3.675.0": + version "3.675.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/util-user-agent-browser/-/util-user-agent-browser-3.675.0.tgz#ad5371e0d4f68733e3dd04d455d99ee99609dbd9" + integrity sha512-HW4vGfRiX54RLcsYjLuAhcBBJ6lRVEZd7njfGpAwBB9s7BH8t48vrpYbyA5XbbqbTvXfYBnugQCUw9HWjEa1ww== + dependencies: + "@aws-sdk/types" "3.667.0" + "@smithy/types" "^3.5.0" + bowser "^2.11.0" + tslib "^2.6.2" + +"@aws-sdk/util-user-agent-node@3.669.0": + version "3.669.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/util-user-agent-node/-/util-user-agent-node-3.669.0.tgz#e83e17d04c65fa2bec942c239b5ad9b02c22ebc1" + integrity sha512-9jxCYrgggy2xd44ZASqI7AMiRVaSiFp+06Kg8BQSU0ijKpBJlwcsqIS8pDT/n6LxuOw2eV5ipvM2C0r1iKzrGA== + dependencies: + "@aws-sdk/middleware-user-agent" "3.669.0" + "@aws-sdk/types" "3.667.0" + "@smithy/node-config-provider" "^3.1.8" + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@aws-sdk/xml-builder@3.662.0": + version "3.662.0" + resolved "https://registry.yarnpkg.com/@aws-sdk/xml-builder/-/xml-builder-3.662.0.tgz#6cbe5aea6205fd2280ec043189985240628d1cb2" + integrity sha512-ikLkXn0igUpnJu2mCZjklvmcDGWT9OaLRv3JyC/cRkTaaSrblPjPM7KKsltxdMTLQ+v7fjCN0TsJpxphMfaOPA== + dependencies: + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + "@babel/code-frame@7.12.11": version "7.12.11" resolved "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.12.11.tgz" @@ -4246,6 +4878,496 @@ resolved "https://registry.npmjs.org/@sinonjs/text-encoding/-/text-encoding-0.7.1.tgz" integrity sha512-+iTbntw2IZPb/anVDbypzfQa+ay64MW0Zo8aJ8gZPWMMK6/OubMVb6lUPMagqjOPnmtauXnFCACVl3O7ogjeqQ== +"@smithy/abort-controller@^3.1.5": + version "3.1.5" + resolved "https://registry.yarnpkg.com/@smithy/abort-controller/-/abort-controller-3.1.5.tgz#ca7a86a3c6b20fabe59667143f58d9e198616d14" + integrity sha512-DhNPnqTqPoG8aZ5dWkFOgsuY+i0GQ3CI6hMmvCoduNsnU9gUZWZBwGfDQsTTB7NvFPkom1df7jMIJWU90kuXXg== + dependencies: + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@smithy/chunked-blob-reader-native@^3.0.0": + version "3.0.0" + resolved "https://registry.yarnpkg.com/@smithy/chunked-blob-reader-native/-/chunked-blob-reader-native-3.0.0.tgz#f1104b30030f76f9aadcbd3cdca4377bd1ba2695" + integrity sha512-VDkpCYW+peSuM4zJip5WDfqvg2Mo/e8yxOv3VF1m11y7B8KKMKVFtmZWDe36Fvk8rGuWrPZHHXZ7rR7uM5yWyg== + dependencies: + "@smithy/util-base64" "^3.0.0" + tslib "^2.6.2" + +"@smithy/chunked-blob-reader@^3.0.0": + version "3.0.0" + resolved "https://registry.yarnpkg.com/@smithy/chunked-blob-reader/-/chunked-blob-reader-3.0.0.tgz#e5d3b04e9b273ba8b7ede47461e2aa96c8aa49e0" + integrity sha512-sbnURCwjF0gSToGlsBiAmd1lRCmSn72nu9axfJu5lIx6RUEgHu6GwTMbqCdhQSi0Pumcm5vFxsi9XWXb2mTaoA== + dependencies: + tslib "^2.6.2" + +"@smithy/config-resolver@^3.0.9": + version "3.0.9" + resolved "https://registry.yarnpkg.com/@smithy/config-resolver/-/config-resolver-3.0.9.tgz#dcf4b7747ca481866f9bfac21469ebe2031a599e" + integrity sha512-5d9oBf40qC7n2xUoHmntKLdqsyTMMo/r49+eqSIjJ73eDfEtljAxEhzIQ3bkgXJtR3xiv7YzMT/3FF3ORkjWdg== + dependencies: + "@smithy/node-config-provider" "^3.1.8" + "@smithy/types" "^3.5.0" + "@smithy/util-config-provider" "^3.0.0" + "@smithy/util-middleware" "^3.0.7" + tslib "^2.6.2" + +"@smithy/core@^2.4.8": + version "2.4.8" + resolved "https://registry.yarnpkg.com/@smithy/core/-/core-2.4.8.tgz#397ac17dfa8ad658b77f96f19484f0eeaf22d397" + integrity sha512-x4qWk7p/a4dcf7Vxb2MODIf4OIcqNbK182WxRvZ/3oKPrf/6Fdic5sSElhO1UtXpWKBazWfqg0ZEK9xN1DsuHA== + dependencies: + "@smithy/middleware-endpoint" "^3.1.4" + "@smithy/middleware-retry" "^3.0.23" + "@smithy/middleware-serde" "^3.0.7" + "@smithy/protocol-http" "^4.1.4" + "@smithy/smithy-client" "^3.4.0" + "@smithy/types" "^3.5.0" + "@smithy/util-body-length-browser" "^3.0.0" + "@smithy/util-middleware" "^3.0.7" + "@smithy/util-utf8" "^3.0.0" + tslib "^2.6.2" + +"@smithy/credential-provider-imds@^3.2.4": + version "3.2.4" + resolved "https://registry.yarnpkg.com/@smithy/credential-provider-imds/-/credential-provider-imds-3.2.4.tgz#e1a2bfc8a0066f673756ad8735247cf284b9735c" + integrity sha512-S9bb0EIokfYEuar4kEbLta+ivlKCWOCFsLZuilkNy9i0uEUEHSi47IFLPaxqqCl+0ftKmcOTHayY5nQhAuq7+w== + dependencies: + "@smithy/node-config-provider" "^3.1.8" + "@smithy/property-provider" "^3.1.7" + "@smithy/types" "^3.5.0" + "@smithy/url-parser" "^3.0.7" + tslib "^2.6.2" + +"@smithy/eventstream-codec@^3.1.6": + version "3.1.6" + resolved "https://registry.yarnpkg.com/@smithy/eventstream-codec/-/eventstream-codec-3.1.6.tgz#70ca95aad82d5140522eb883fbc140f1f22dcb27" + integrity sha512-SBiOYPBH+5wOyPS7lfI150ePfGLhnp/eTu5RnV9xvhGvRiKfnl6HzRK9wehBph+il8FxS9KTeadx7Rcmf1GLPQ== + dependencies: + "@aws-crypto/crc32" "5.2.0" + "@smithy/types" "^3.5.0" + "@smithy/util-hex-encoding" "^3.0.0" + tslib "^2.6.2" + +"@smithy/eventstream-serde-browser@^3.0.10": + version "3.0.10" + resolved "https://registry.yarnpkg.com/@smithy/eventstream-serde-browser/-/eventstream-serde-browser-3.0.10.tgz#ffca366a4edee5097be5a710f87627a5b2da5dec" + integrity sha512-1i9aMY6Pl/SmA6NjvidxnfBLHMPzhKu2BP148pEt5VwhMdmXn36PE2kWKGa9Hj8b0XGtCTRucpCncylevCtI7g== + dependencies: + "@smithy/eventstream-serde-universal" "^3.0.9" + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@smithy/eventstream-serde-config-resolver@^3.0.7": + version "3.0.7" + resolved "https://registry.yarnpkg.com/@smithy/eventstream-serde-config-resolver/-/eventstream-serde-config-resolver-3.0.7.tgz#1f352f384665f322e024a1396a7a2cca52fce9e3" + integrity sha512-eVzhGQBPEqXXYHvIUku0jMTxd4gDvenRzUQPTmKVWdRvp9JUCKrbAXGQRYiGxUYq9+cqQckRm0wq3kTWnNtDhw== + dependencies: + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@smithy/eventstream-serde-node@^3.0.9": + version "3.0.9" + resolved "https://registry.yarnpkg.com/@smithy/eventstream-serde-node/-/eventstream-serde-node-3.0.9.tgz#e985340093c2ca6587ae2fdd0663e6845fbe9463" + integrity sha512-JE0Guqvt0xsmfQ5y1EI342/qtJqznBv8cJqkHZV10PwC8GWGU5KNgFbQnsVCcX+xF+qIqwwfRmeWoJCjuOLmng== + dependencies: + "@smithy/eventstream-serde-universal" "^3.0.9" + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@smithy/eventstream-serde-universal@^3.0.9": + version "3.0.9" + resolved "https://registry.yarnpkg.com/@smithy/eventstream-serde-universal/-/eventstream-serde-universal-3.0.9.tgz#1832b190a3018204e33487ba1f7f0f6e2fb0da34" + integrity sha512-bydfgSisfepCufw9kCEnWRxqxJFzX/o8ysXWv+W9F2FIyiaEwZ/D8bBKINbh4ONz3i05QJ1xE7A5OKYvgJsXaw== + dependencies: + "@smithy/eventstream-codec" "^3.1.6" + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@smithy/fetch-http-handler@^3.2.9": + version "3.2.9" + resolved "https://registry.yarnpkg.com/@smithy/fetch-http-handler/-/fetch-http-handler-3.2.9.tgz#8d5199c162a37caa37a8b6848eefa9ca58221a0b" + integrity sha512-hYNVQOqhFQ6vOpenifFME546f0GfJn2OiQ3M0FDmuUu8V/Uiwy2wej7ZXxFBNqdx0R5DZAqWM1l6VRhGz8oE6A== + dependencies: + "@smithy/protocol-http" "^4.1.4" + "@smithy/querystring-builder" "^3.0.7" + "@smithy/types" "^3.5.0" + "@smithy/util-base64" "^3.0.0" + tslib "^2.6.2" + +"@smithy/hash-blob-browser@^3.1.6": + version "3.1.6" + resolved "https://registry.yarnpkg.com/@smithy/hash-blob-browser/-/hash-blob-browser-3.1.6.tgz#d61de344aa3cef0bc83e3ab8166558256262dfcd" + integrity sha512-BKNcMIaeZ9lB67sgo88iCF4YB35KT8X2dNJ8DqrtZNTgN6tUDYBKThzfGtos/mnZkGkW91AYHisESHmSiYQmKw== + dependencies: + "@smithy/chunked-blob-reader" "^3.0.0" + "@smithy/chunked-blob-reader-native" "^3.0.0" + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@smithy/hash-node@^3.0.7": + version "3.0.7" + resolved "https://registry.yarnpkg.com/@smithy/hash-node/-/hash-node-3.0.7.tgz#03b5a382fb588b8c2bac11b4fe7300aaf1661c88" + integrity sha512-SAGHN+QkrwcHFjfWzs/czX94ZEjPJ0CrWJS3M43WswDXVEuP4AVy9gJ3+AF6JQHZD13bojmuf/Ap/ItDeZ+Qfw== + dependencies: + "@smithy/types" "^3.5.0" + "@smithy/util-buffer-from" "^3.0.0" + "@smithy/util-utf8" "^3.0.0" + tslib "^2.6.2" + +"@smithy/hash-stream-node@^3.1.6": + version "3.1.6" + resolved "https://registry.yarnpkg.com/@smithy/hash-stream-node/-/hash-stream-node-3.1.6.tgz#854ad354a865a1334baa2abc2f2247f2723de688" + integrity sha512-sFSSt7cmCpFWZPfVx7k80Bgb1K2VJ27VmMxH8X+dDhp7Wv8IBgID4K2VK5ehMJROF8hQgcj4WywnkHIwX/xlwQ== + dependencies: + "@smithy/types" "^3.5.0" + "@smithy/util-utf8" "^3.0.0" + tslib "^2.6.2" + +"@smithy/invalid-dependency@^3.0.7": + version "3.0.7" + resolved "https://registry.yarnpkg.com/@smithy/invalid-dependency/-/invalid-dependency-3.0.7.tgz#b36f258d94498f3c72ab6020091a66fc7cc16eda" + integrity sha512-Bq00GsAhHeYSuZX8Kpu4sbI9agH2BNYnqUmmbTGWOhki9NVsWn2jFr896vvoTMH8KAjNX/ErC/8t5QHuEXG+IA== + dependencies: + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@smithy/is-array-buffer@^2.2.0": + version "2.2.0" + resolved "https://registry.yarnpkg.com/@smithy/is-array-buffer/-/is-array-buffer-2.2.0.tgz#f84f0d9f9a36601a9ca9381688bd1b726fd39111" + integrity sha512-GGP3O9QFD24uGeAXYUjwSTXARoqpZykHadOmA8G5vfJPK0/DC67qa//0qvqrJzL1xc8WQWX7/yc7fwudjPHPhA== + dependencies: + tslib "^2.6.2" + +"@smithy/is-array-buffer@^3.0.0": + version "3.0.0" + resolved "https://registry.yarnpkg.com/@smithy/is-array-buffer/-/is-array-buffer-3.0.0.tgz#9a95c2d46b8768946a9eec7f935feaddcffa5e7a" + integrity sha512-+Fsu6Q6C4RSJiy81Y8eApjEB5gVtM+oFKTffg+jSuwtvomJJrhUJBu2zS8wjXSgH/g1MKEWrzyChTBe6clb5FQ== + dependencies: + tslib "^2.6.2" + +"@smithy/md5-js@^3.0.7": + version "3.0.7" + resolved "https://registry.yarnpkg.com/@smithy/md5-js/-/md5-js-3.0.7.tgz#0a645dd9c139254353fd6e6a6b65154baeab7d2e" + integrity sha512-+wco9IN9uOW4tNGkZIqTR6IXyfO7Z8A+IOq82QCRn/f/xcmt7H1fXwmQVbfDSvbeFwfNnhv7s+u0G9PzPG6o2w== + dependencies: + "@smithy/types" "^3.5.0" + "@smithy/util-utf8" "^3.0.0" + tslib "^2.6.2" + +"@smithy/middleware-content-length@^3.0.9": + version "3.0.9" + resolved "https://registry.yarnpkg.com/@smithy/middleware-content-length/-/middleware-content-length-3.0.9.tgz#fb613d1a6b8c91e828d11c0d7a0a8576dba89b8b" + integrity sha512-t97PidoGElF9hTtLCrof32wfWMqC5g2SEJNxaVH3NjlatuNGsdxXRYO/t+RPnxA15RpYiS0f+zG7FuE2DeGgjA== + dependencies: + "@smithy/protocol-http" "^4.1.4" + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@smithy/middleware-endpoint@^3.1.4": + version "3.1.4" + resolved "https://registry.yarnpkg.com/@smithy/middleware-endpoint/-/middleware-endpoint-3.1.4.tgz#222c9fa49c8af6ebf8bea8ab220d92d9b8c90d3d" + integrity sha512-/ChcVHekAyzUbyPRI8CzPPLj6y8QRAfJngWcLMgsWxKVzw/RzBV69mSOzJYDD3pRwushA1+5tHtPF8fjmzBnrQ== + dependencies: + "@smithy/middleware-serde" "^3.0.7" + "@smithy/node-config-provider" "^3.1.8" + "@smithy/shared-ini-file-loader" "^3.1.8" + "@smithy/types" "^3.5.0" + "@smithy/url-parser" "^3.0.7" + "@smithy/util-middleware" "^3.0.7" + tslib "^2.6.2" + +"@smithy/middleware-retry@^3.0.23": + version "3.0.23" + resolved "https://registry.yarnpkg.com/@smithy/middleware-retry/-/middleware-retry-3.0.23.tgz#ce5574e278dd14a7995afd5a4ed2a6c9891da8ed" + integrity sha512-x9PbGXxkcXIpm6L26qRSCC+eaYcHwybRmqU8LO/WM2RRlW0g8lz6FIiKbKgGvHuoK3dLZRiQVSQJveiCzwnA5A== + dependencies: + "@smithy/node-config-provider" "^3.1.8" + "@smithy/protocol-http" "^4.1.4" + "@smithy/service-error-classification" "^3.0.7" + "@smithy/smithy-client" "^3.4.0" + "@smithy/types" "^3.5.0" + "@smithy/util-middleware" "^3.0.7" + "@smithy/util-retry" "^3.0.7" + tslib "^2.6.2" + uuid "^9.0.1" + +"@smithy/middleware-serde@^3.0.7": + version "3.0.7" + resolved "https://registry.yarnpkg.com/@smithy/middleware-serde/-/middleware-serde-3.0.7.tgz#03f0dda75edffc4cc90ea422349cbfb82368efa7" + integrity sha512-VytaagsQqtH2OugzVTq4qvjkLNbWehHfGcGr0JLJmlDRrNCeZoWkWsSOw1nhS/4hyUUWF/TLGGml4X/OnEep5g== + dependencies: + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@smithy/middleware-stack@^3.0.7": + version "3.0.7" + resolved "https://registry.yarnpkg.com/@smithy/middleware-stack/-/middleware-stack-3.0.7.tgz#813fa7b47895ce0d085eac89c056d21b1e46e771" + integrity sha512-EyTbMCdqS1DoeQsO4gI7z2Gzq1MoRFAeS8GkFYIwbedB7Lp5zlLHJdg+56tllIIG5Hnf9ZWX48YKSHlsKvugGA== + dependencies: + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@smithy/node-config-provider@^3.1.8": + version "3.1.8" + resolved "https://registry.yarnpkg.com/@smithy/node-config-provider/-/node-config-provider-3.1.8.tgz#2c1092040b4062eae0f7c9e121cc00ac6a77efee" + integrity sha512-E0rU0DglpeJn5ge64mk8wTGEXcQwmpUTY5Zr7IzTpDLmHKiIamINERNZYrPQjg58Ck236sEKSwRSHA4CwshU6Q== + dependencies: + "@smithy/property-provider" "^3.1.7" + "@smithy/shared-ini-file-loader" "^3.1.8" + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@smithy/node-http-handler@^3.2.4": + version "3.2.4" + resolved "https://registry.yarnpkg.com/@smithy/node-http-handler/-/node-http-handler-3.2.4.tgz#3c57c40d082c3bacac1e49955bd1240e8ccc40b2" + integrity sha512-49reY3+JgLMFNm7uTAKBWiKCA6XSvkNp9FqhVmusm2jpVnHORYFeFZ704LShtqWfjZW/nhX+7Iexyb6zQfXYIQ== + dependencies: + "@smithy/abort-controller" "^3.1.5" + "@smithy/protocol-http" "^4.1.4" + "@smithy/querystring-builder" "^3.0.7" + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@smithy/property-provider@^3.1.7": + version "3.1.7" + resolved "https://registry.yarnpkg.com/@smithy/property-provider/-/property-provider-3.1.7.tgz#8a304a4b9110a067a93c784e4c11e175f82da379" + integrity sha512-QfzLi1GPMisY7bAM5hOUqBdGYnY5S2JAlr201pghksrQv139f8iiiMalXtjczIP5f6owxFn3MINLNUNvUkgtPw== + dependencies: + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@smithy/protocol-http@^4.1.4": + version "4.1.4" + resolved "https://registry.yarnpkg.com/@smithy/protocol-http/-/protocol-http-4.1.4.tgz#6940d652b1825bda2422163ec9baab552669a338" + integrity sha512-MlWK8eqj0JlpZBnWmjQLqmFp71Ug00P+m72/1xQB3YByXD4zZ+y9N4hYrR0EDmrUCZIkyATWHOXFgtavwGDTzQ== + dependencies: + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@smithy/querystring-builder@^3.0.7": + version "3.0.7" + resolved "https://registry.yarnpkg.com/@smithy/querystring-builder/-/querystring-builder-3.0.7.tgz#8c443c65f4249ff1637088db1166d18411d41555" + integrity sha512-65RXGZZ20rzqqxTsChdqSpbhA6tdt5IFNgG6o7e1lnPVLCe6TNWQq4rTl4N87hTDD8mV4IxJJnvyE7brbnRkQw== + dependencies: + "@smithy/types" "^3.5.0" + "@smithy/util-uri-escape" "^3.0.0" + tslib "^2.6.2" + +"@smithy/querystring-parser@^3.0.7": + version "3.0.7" + resolved "https://registry.yarnpkg.com/@smithy/querystring-parser/-/querystring-parser-3.0.7.tgz#936206d1e6da9d862384dae730b4bad042d6a948" + integrity sha512-Fouw4KJVWqqUVIu1gZW8BH2HakwLz6dvdrAhXeXfeymOBrZw+hcqaWs+cS1AZPVp4nlbeIujYrKA921ZW2WMPA== + dependencies: + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@smithy/service-error-classification@^3.0.7": + version "3.0.7" + resolved "https://registry.yarnpkg.com/@smithy/service-error-classification/-/service-error-classification-3.0.7.tgz#5bab4ad802d30bd3fa52b8134f6c171582358226" + integrity sha512-91PRkTfiBf9hxkIchhRKJfl1rsplRDyBnmyFca3y0Z3x/q0JJN480S83LBd8R6sBCkm2bBbqw2FHp0Mbh+ecSA== + dependencies: + "@smithy/types" "^3.5.0" + +"@smithy/shared-ini-file-loader@^3.1.8": + version "3.1.8" + resolved "https://registry.yarnpkg.com/@smithy/shared-ini-file-loader/-/shared-ini-file-loader-3.1.8.tgz#7a0bf5f20cfe8e0c4a36d8dcab8194d0d2ee958e" + integrity sha512-0NHdQiSkeGl0ICQKcJQ2lCOKH23Nb0EaAa7RDRId6ZqwXkw4LJyIyZ0t3iusD4bnKYDPLGy2/5e2rfUhrt0Acw== + dependencies: + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@smithy/signature-v4@^4.2.0": + version "4.2.0" + resolved "https://registry.yarnpkg.com/@smithy/signature-v4/-/signature-v4-4.2.0.tgz#291f5a0e756cc251377e1e8af2a1f494e6173029" + integrity sha512-LafbclHNKnsorMgUkKm7Tk7oJ7xizsZ1VwqhGKqoCIrXh4fqDDp73fK99HOEEgcsQbtemmeY/BPv0vTVYYUNEQ== + dependencies: + "@smithy/is-array-buffer" "^3.0.0" + "@smithy/protocol-http" "^4.1.4" + "@smithy/types" "^3.5.0" + "@smithy/util-hex-encoding" "^3.0.0" + "@smithy/util-middleware" "^3.0.7" + "@smithy/util-uri-escape" "^3.0.0" + "@smithy/util-utf8" "^3.0.0" + tslib "^2.6.2" + +"@smithy/smithy-client@^3.4.0": + version "3.4.0" + resolved "https://registry.yarnpkg.com/@smithy/smithy-client/-/smithy-client-3.4.0.tgz#ceffb92108a4ad60cbede3baf44ed224dc70b333" + integrity sha512-nOfJ1nVQsxiP6srKt43r2My0Gp5PLWCW2ASqUioxIiGmu6d32v4Nekidiv5qOmmtzIrmaD+ADX5SKHUuhReeBQ== + dependencies: + "@smithy/middleware-endpoint" "^3.1.4" + "@smithy/middleware-stack" "^3.0.7" + "@smithy/protocol-http" "^4.1.4" + "@smithy/types" "^3.5.0" + "@smithy/util-stream" "^3.1.9" + tslib "^2.6.2" + +"@smithy/types@^3.5.0": + version "3.5.0" + resolved "https://registry.yarnpkg.com/@smithy/types/-/types-3.5.0.tgz#9589e154c50d9c5d00feb7d818112ef8fc285d6e" + integrity sha512-QN0twHNfe8mNJdH9unwsCK13GURU7oEAZqkBI+rsvpv1jrmserO+WnLE7jidR9W/1dxwZ0u/CB01mV2Gms/K2Q== + dependencies: + tslib "^2.6.2" + +"@smithy/url-parser@^3.0.7": + version "3.0.7" + resolved "https://registry.yarnpkg.com/@smithy/url-parser/-/url-parser-3.0.7.tgz#9d7d7e4e38514bf75ade6e8a30d2300f3db17d1b" + integrity sha512-70UbSSR8J97c1rHZOWhl+VKiZDqHWxs/iW8ZHrHp5fCCPLSBE7GcUlUvKSle3Ca+J9LLbYCj/A79BxztBvAfpA== + dependencies: + "@smithy/querystring-parser" "^3.0.7" + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@smithy/util-base64@^3.0.0": + version "3.0.0" + resolved "https://registry.yarnpkg.com/@smithy/util-base64/-/util-base64-3.0.0.tgz#f7a9a82adf34e27a72d0719395713edf0e493017" + integrity sha512-Kxvoh5Qtt0CDsfajiZOCpJxgtPHXOKwmM+Zy4waD43UoEMA+qPxxa98aE/7ZhdnBFZFXMOiBR5xbcaMhLtznQQ== + dependencies: + "@smithy/util-buffer-from" "^3.0.0" + "@smithy/util-utf8" "^3.0.0" + tslib "^2.6.2" + +"@smithy/util-body-length-browser@^3.0.0": + version "3.0.0" + resolved "https://registry.yarnpkg.com/@smithy/util-body-length-browser/-/util-body-length-browser-3.0.0.tgz#86ec2f6256310b4845a2f064e2f571c1ca164ded" + integrity sha512-cbjJs2A1mLYmqmyVl80uoLTJhAcfzMOyPgjwAYusWKMdLeNtzmMz9YxNl3/jRLoxSS3wkqkf0jwNdtXWtyEBaQ== + dependencies: + tslib "^2.6.2" + +"@smithy/util-body-length-node@^3.0.0": + version "3.0.0" + resolved "https://registry.yarnpkg.com/@smithy/util-body-length-node/-/util-body-length-node-3.0.0.tgz#99a291bae40d8932166907fe981d6a1f54298a6d" + integrity sha512-Tj7pZ4bUloNUP6PzwhN7K386tmSmEET9QtQg0TgdNOnxhZvCssHji+oZTUIuzxECRfG8rdm2PMw2WCFs6eIYkA== + dependencies: + tslib "^2.6.2" + +"@smithy/util-buffer-from@^2.2.0": + version "2.2.0" + resolved "https://registry.yarnpkg.com/@smithy/util-buffer-from/-/util-buffer-from-2.2.0.tgz#6fc88585165ec73f8681d426d96de5d402021e4b" + integrity sha512-IJdWBbTcMQ6DA0gdNhh/BwrLkDR+ADW5Kr1aZmd4k3DIF6ezMV4R2NIAmT08wQJ3yUK82thHWmC/TnK/wpMMIA== + dependencies: + "@smithy/is-array-buffer" "^2.2.0" + tslib "^2.6.2" + +"@smithy/util-buffer-from@^3.0.0": + version "3.0.0" + resolved "https://registry.yarnpkg.com/@smithy/util-buffer-from/-/util-buffer-from-3.0.0.tgz#559fc1c86138a89b2edaefc1e6677780c24594e3" + integrity sha512-aEOHCgq5RWFbP+UDPvPot26EJHjOC+bRgse5A8V3FSShqd5E5UN4qc7zkwsvJPPAVsf73QwYcHN1/gt/rtLwQA== + dependencies: + "@smithy/is-array-buffer" "^3.0.0" + tslib "^2.6.2" + +"@smithy/util-config-provider@^3.0.0": + version "3.0.0" + resolved "https://registry.yarnpkg.com/@smithy/util-config-provider/-/util-config-provider-3.0.0.tgz#62c6b73b22a430e84888a8f8da4b6029dd5b8efe" + integrity sha512-pbjk4s0fwq3Di/ANL+rCvJMKM5bzAQdE5S/6RL5NXgMExFAi6UgQMPOm5yPaIWPpr+EOXKXRonJ3FoxKf4mCJQ== + dependencies: + tslib "^2.6.2" + +"@smithy/util-defaults-mode-browser@^3.0.23": + version "3.0.23" + resolved "https://registry.yarnpkg.com/@smithy/util-defaults-mode-browser/-/util-defaults-mode-browser-3.0.23.tgz#6920b473126ae8857a04dd6941793bbda12adc8b" + integrity sha512-Y07qslyRtXDP/C5aWKqxTPBl4YxplEELG3xRrz2dnAQ6Lq/FgNrcKWmV561nNaZmFH+EzeGOX3ZRMbU8p1T6Nw== + dependencies: + "@smithy/property-provider" "^3.1.7" + "@smithy/smithy-client" "^3.4.0" + "@smithy/types" "^3.5.0" + bowser "^2.11.0" + tslib "^2.6.2" + +"@smithy/util-defaults-mode-node@^3.0.23": + version "3.0.23" + resolved "https://registry.yarnpkg.com/@smithy/util-defaults-mode-node/-/util-defaults-mode-node-3.0.23.tgz#d03d21816e8b2f586ccf4a87cd0b1cc55b4d75e0" + integrity sha512-9Y4WH7f0vnDGuHUa4lGX9e2p+sMwODibsceSV6rfkZOvMC+BY3StB2LdO1NHafpsyHJLpwAgChxQ38tFyd6vkg== + dependencies: + "@smithy/config-resolver" "^3.0.9" + "@smithy/credential-provider-imds" "^3.2.4" + "@smithy/node-config-provider" "^3.1.8" + "@smithy/property-provider" "^3.1.7" + "@smithy/smithy-client" "^3.4.0" + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@smithy/util-endpoints@^2.1.3": + version "2.1.3" + resolved "https://registry.yarnpkg.com/@smithy/util-endpoints/-/util-endpoints-2.1.3.tgz#7498151e9dc714bdd0c6339314dd2350fa4d250a" + integrity sha512-34eACeKov6jZdHqS5hxBMJ4KyWKztTMulhuQ2UdOoP6vVxMLrOKUqIXAwJe/wiWMhXhydLW664B02CNpQBQ4Aw== + dependencies: + "@smithy/node-config-provider" "^3.1.8" + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@smithy/util-hex-encoding@^3.0.0": + version "3.0.0" + resolved "https://registry.yarnpkg.com/@smithy/util-hex-encoding/-/util-hex-encoding-3.0.0.tgz#32938b33d5bf2a15796cd3f178a55b4155c535e6" + integrity sha512-eFndh1WEK5YMUYvy3lPlVmYY/fZcQE1D8oSf41Id2vCeIkKJXPcYDCZD+4+xViI6b1XSd7tE+s5AmXzz5ilabQ== + dependencies: + tslib "^2.6.2" + +"@smithy/util-middleware@^3.0.7": + version "3.0.7" + resolved "https://registry.yarnpkg.com/@smithy/util-middleware/-/util-middleware-3.0.7.tgz#770d09749b6d170a1641384a2e961487447446fa" + integrity sha512-OVA6fv/3o7TMJTpTgOi1H5OTwnuUa8hzRzhSFDtZyNxi6OZ70L/FHattSmhE212I7b6WSOJAAmbYnvcjTHOJCA== + dependencies: + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@smithy/util-retry@^3.0.7": + version "3.0.7" + resolved "https://registry.yarnpkg.com/@smithy/util-retry/-/util-retry-3.0.7.tgz#694e0667574ffe9772f620b35d3c7286aced35e9" + integrity sha512-nh1ZO1vTeo2YX1plFPSe/OXaHkLAHza5jpokNiiKX2M5YpNUv6RxGJZhpfmiR4jSvVHCjIDmILjrxKmP+/Ghug== + dependencies: + "@smithy/service-error-classification" "^3.0.7" + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + +"@smithy/util-stream@^3.1.9": + version "3.1.9" + resolved "https://registry.yarnpkg.com/@smithy/util-stream/-/util-stream-3.1.9.tgz#d39656eae27696bdc5a3ec7c2f6b89c32dccd1ca" + integrity sha512-7YAR0Ub3MwTMjDfjnup4qa6W8gygZMxikBhFMPESi6ASsl/rZJhwLpF/0k9TuezScCojsM0FryGdz4LZtjKPPQ== + dependencies: + "@smithy/fetch-http-handler" "^3.2.9" + "@smithy/node-http-handler" "^3.2.4" + "@smithy/types" "^3.5.0" + "@smithy/util-base64" "^3.0.0" + "@smithy/util-buffer-from" "^3.0.0" + "@smithy/util-hex-encoding" "^3.0.0" + "@smithy/util-utf8" "^3.0.0" + tslib "^2.6.2" + +"@smithy/util-uri-escape@^3.0.0": + version "3.0.0" + resolved "https://registry.yarnpkg.com/@smithy/util-uri-escape/-/util-uri-escape-3.0.0.tgz#e43358a78bf45d50bb736770077f0f09195b6f54" + integrity sha512-LqR7qYLgZTD7nWLBecUi4aqolw8Mhza9ArpNEQ881MJJIU2sE5iHCK6TdyqqzcDLy0OPe10IY4T8ctVdtynubg== + dependencies: + tslib "^2.6.2" + +"@smithy/util-utf8@^2.0.0": + version "2.3.0" + resolved "https://registry.yarnpkg.com/@smithy/util-utf8/-/util-utf8-2.3.0.tgz#dd96d7640363259924a214313c3cf16e7dd329c5" + integrity sha512-R8Rdn8Hy72KKcebgLiv8jQcQkXoLMOGGv5uI1/k0l+snqkOzQ1R0ChUBCxWMlBsFMekWjq0wRudIweFs7sKT5A== + dependencies: + "@smithy/util-buffer-from" "^2.2.0" + tslib "^2.6.2" + +"@smithy/util-utf8@^3.0.0": + version "3.0.0" + resolved "https://registry.yarnpkg.com/@smithy/util-utf8/-/util-utf8-3.0.0.tgz#1a6a823d47cbec1fd6933e5fc87df975286d9d6a" + integrity sha512-rUeT12bxFnplYDe815GXbq/oixEGHfRFFtcTF3YdDi/JaENIM6aSYYLJydG83UNzLXeRI5K8abYd/8Sp/QM0kA== + dependencies: + "@smithy/util-buffer-from" "^3.0.0" + tslib "^2.6.2" + +"@smithy/util-waiter@^3.1.6": + version "3.1.6" + resolved "https://registry.yarnpkg.com/@smithy/util-waiter/-/util-waiter-3.1.6.tgz#c65870d0c802e33b96112fac5c4471b3bf2eeecb" + integrity sha512-xs/KAwWOeCklq8aMlnpk25LgxEYHKOEodfjfKclDMLcBJEVEKzDLxZxBQyztcuPJ7F54213NJS8PxoiHNMdItQ== + dependencies: + "@smithy/abort-controller" "^3.1.5" + "@smithy/types" "^3.5.0" + tslib "^2.6.2" + "@sqltools/formatter@^1.2.2": version "1.2.3" resolved "https://registry.npmjs.org/@sqltools/formatter/-/formatter-1.2.3.tgz" @@ -6820,6 +7942,11 @@ borc@^2.1.0, borc@^2.1.2: json-text-sequence "~0.1.0" readable-stream "^3.6.0" +bowser@^2.11.0: + version "2.11.0" + resolved "https://registry.yarnpkg.com/bowser/-/bowser-2.11.0.tgz#5ca3c35757a7aa5771500c70a73a9f91ef420a8f" + integrity sha512-AlcaJBi/pqqJBIQ8U9Mcpc9i8Aqxn88Skv5d+xBX006BY5u8N3mGLHa5Lgppa7L/HfwgwLgZ6NYs+Ag6uUmJRA== + brace-expansion@^1.1.7: version "1.1.11" resolved "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz" @@ -7434,6 +8561,13 @@ chokidar@3.5.1: optionalDependencies: fsevents "~2.3.1" +chokidar@4.0.1: + version "4.0.1" + resolved "https://registry.yarnpkg.com/chokidar/-/chokidar-4.0.1.tgz#4a6dff66798fb0f72a94f616abbd7e1a19f31d41" + integrity sha512-n8enUVCED/KVRQlab1hr3MVpcVMvxtZjmEa956u+4YijlmQED223XMSYj2tLuKvr4jcCTzNNMpQDUer72MMmzA== + dependencies: + readdirp "^4.0.1" + chokidar@^3.5.1: version "3.6.0" resolved "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz" @@ -10560,6 +11694,13 @@ fast-write-atomic@~0.2.0: resolved "https://registry.npmjs.org/fast-write-atomic/-/fast-write-atomic-0.2.1.tgz" integrity sha512-WvJe06IfNYlr+6cO3uQkdKdy3Cb1LlCJSF8zRs2eT8yuhdbSlR9nIt+TgQ92RUxiRrQm+/S7RARnMfCs5iuAjw== +fast-xml-parser@4.4.1: + version "4.4.1" + resolved "https://registry.yarnpkg.com/fast-xml-parser/-/fast-xml-parser-4.4.1.tgz#86dbf3f18edf8739326447bcaac31b4ae7f6514f" + integrity sha512-xkjOecfnKGkSsOwtZ5Pz7Us/T6mrbPQrq0nh+aCO5V9nk5NLWmasAHumTKjiPJPWANe+kAZ84Jc8ooJkzZ88Sw== + dependencies: + strnum "^1.0.5" + fastest-levenshtein@^1.0.7: version "1.0.12" resolved "https://registry.npmjs.org/fastest-levenshtein/-/fastest-levenshtein-1.0.12.tgz" @@ -18882,6 +20023,11 @@ readdirp@3.6.0, readdirp@~3.6.0: dependencies: picomatch "^2.2.1" +readdirp@^4.0.1: + version "4.0.2" + resolved "https://registry.yarnpkg.com/readdirp/-/readdirp-4.0.2.tgz#388fccb8b75665da3abffe2d8f8ed59fe74c230a" + integrity sha512-yDMz9g+VaZkqBYS/ozoBJwaBhTbZo3UNYQHNRw1D3UFQB8oHB4uS/tAODO+ZLjGWmUbKnIlOWO+aaIiAxrUWHA== + readdirp@~3.5.0: version "3.5.0" resolved "https://registry.npmjs.org/readdirp/-/readdirp-3.5.0.tgz" @@ -20586,6 +21732,11 @@ strip-json-comments@^2.0.0, strip-json-comments@~2.0.1: resolved "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz" integrity sha1-PFMZQukIwml8DsNEhYwobHygpgo= +strnum@^1.0.5: + version "1.0.5" + resolved "https://registry.yarnpkg.com/strnum/-/strnum-1.0.5.tgz#5c4e829fe15ad4ff0d20c3db5ac97b73c9b072db" + integrity sha512-J8bbNyKKXl5qYcR36TIO8W3mVGVHrmmxsd5PAItGkmyzwJvybiw2IVq5nqd0i4LSNSkB/sx9VHllbfFdr9k1JA== + strtok3@^6.2.4: version "6.3.0" resolved "https://registry.npmjs.org/strtok3/-/strtok3-6.3.0.tgz" @@ -21951,6 +23102,11 @@ uuid@^7.0.3: resolved "https://registry.npmjs.org/uuid/-/uuid-7.0.3.tgz" integrity sha512-DPSke0pXhTZgoF/d+WSt2QaKMCFSfx7QegxEWT+JOuHF5aWrKEn0G+ztjuJg/gG8/ItK+rbPCD/yNv8yyih6Cg== +uuid@^9.0.1: + version "9.0.1" + resolved "https://registry.yarnpkg.com/uuid/-/uuid-9.0.1.tgz#e188d4c8853cc722220392c424cd637f32293f30" + integrity sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA== + v8-compile-cache-lib@^3.0.1: version "3.0.1" resolved "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz" From e3a3074c3195f62183403b401abb41cbfbf701ff Mon Sep 17 00:00:00 2001 From: Lezek123 Date: Fri, 25 Oct 2024 15:49:18 +0200 Subject: [PATCH 03/19] Colossus: Add proper-lockfile to deps --- storage-node/package.json | 1 + 1 file changed, 1 insertion(+) diff --git a/storage-node/package.json b/storage-node/package.json index 9cd12d5801..6a2fc88a7c 100644 --- a/storage-node/package.json +++ b/storage-node/package.json @@ -55,6 +55,7 @@ "node-cache": "^5.1.2", "openapi-editor": "^0.3.0", "promise-timeout": "^1.3.0", + "proper-lockfile": "^4.1.2", "react": "^18.2.0", "read-chunk": "^3.2.0", "rimraf": "^3.0.2", From 0d9ba5b2d0554d7d92f85615b86fa001653628fa Mon Sep 17 00:00:00 2001 From: Lezek123 Date: Fri, 25 Oct 2024 16:35:16 +0200 Subject: [PATCH 04/19] Colossus: Add @types/proper-lockfile dep --- storage-node/package.json | 1 + 1 file changed, 1 insertion(+) diff --git a/storage-node/package.json b/storage-node/package.json index 6a2fc88a7c..a7e782ad41 100644 --- a/storage-node/package.json +++ b/storage-node/package.json @@ -85,6 +85,7 @@ "@types/mocha": "^5", "@types/node": "^18.6.0", "@types/pg": "^8.6.1", + "@types/proper-lockfile": "^4.1.4", "@types/swagger-ui-express": "^4.1.2", "@types/ws": "^5.1.2", "@typescript-eslint/eslint-plugin": "3.8.0", From b8f5e8d7668e79a4ec9bbb9e67bb5a76f1a6ca8d Mon Sep 17 00:00:00 2001 From: Lezek123 Date: Fri, 25 Oct 2024 16:35:16 +0200 Subject: [PATCH 05/19] Colossus: Add @types/proper-lockfile dep --- storage-node/package.json | 1 + yarn.lock | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/storage-node/package.json b/storage-node/package.json index 6a2fc88a7c..a7e782ad41 100644 --- a/storage-node/package.json +++ b/storage-node/package.json @@ -85,6 +85,7 @@ "@types/mocha": "^5", "@types/node": "^18.6.0", "@types/pg": "^8.6.1", + "@types/proper-lockfile": "^4.1.4", "@types/swagger-ui-express": "^4.1.2", "@types/ws": "^5.1.2", "@typescript-eslint/eslint-plugin": "3.8.0", diff --git a/yarn.lock b/yarn.lock index a48b2f4120..a7d5d565ac 100644 --- a/yarn.lock +++ b/yarn.lock @@ -6264,6 +6264,13 @@ dependencies: "@types/retry" "*" +"@types/proper-lockfile@^4.1.4": + version "4.1.4" + resolved "https://registry.yarnpkg.com/@types/proper-lockfile/-/proper-lockfile-4.1.4.tgz#cd9fab92bdb04730c1ada542c356f03620f84008" + integrity sha512-uo2ABllncSqg9F1D4nugVl9v93RmjxF6LJzQLMLDdPaXCUIDPeOJ21Gbqi43xNKzBi/WQ0Q0dICqufzQbMjipQ== + dependencies: + "@types/retry" "*" + "@types/qs@*", "@types/qs@^6.2.31": version "6.9.7" resolved "https://registry.npmjs.org/@types/qs/-/qs-6.9.7.tgz" From df625e71b29f396d0ad1587e89c55c33781a5cbb Mon Sep 17 00:00:00 2001 From: Lezek123 Date: Fri, 25 Oct 2024 18:17:29 +0200 Subject: [PATCH 06/19] storageCleanup test: Give nodes more time to sync --- tests/network-tests/src/flows/storage/storageCleanup.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/network-tests/src/flows/storage/storageCleanup.ts b/tests/network-tests/src/flows/storage/storageCleanup.ts index 0f24b4ae44..75b3ddaf20 100644 --- a/tests/network-tests/src/flows/storage/storageCleanup.ts +++ b/tests/network-tests/src/flows/storage/storageCleanup.ts @@ -67,8 +67,8 @@ export async function storageCleanup({ api, query }: FlowProps): Promise { ) // Give colossus nodes some time to sync - debug('Giving colossus nodes 60 seconds to sync...') - await Utils.wait(60_000) + debug('Giving colossus nodes 120 seconds to sync...') + await Utils.wait(120_000) // Verify that both storage nodes store all the assets of created channels const colossus1Endpoint = doubleBucketConfig.buckets[0].metadata.endpoint From 6e742349f2b270d8b4df7b105bd01c5413d5bf7d Mon Sep 17 00:00:00 2001 From: Lezek123 Date: Fri, 25 Oct 2024 21:38:53 +0200 Subject: [PATCH 07/19] Colossus: Add util:search-archives command --- .../src/commands/util/search-archives.ts | 83 +++++++++++++++++++ storage-node/src/services/archive/tracking.ts | 34 +++++++- 2 files changed, 115 insertions(+), 2 deletions(-) create mode 100644 storage-node/src/commands/util/search-archives.ts diff --git a/storage-node/src/commands/util/search-archives.ts b/storage-node/src/commands/util/search-archives.ts new file mode 100644 index 0000000000..c332cd5528 --- /dev/null +++ b/storage-node/src/commands/util/search-archives.ts @@ -0,0 +1,83 @@ +import { Command, flags } from '@oclif/command' +import { customFlags } from '../../command-base/CustomFlags' +import { ArchivesTrackingService } from '../../services/archive/tracking' +import path from 'path' + +/** + * CLI command: + * Searches for the archive file names given an archive trackfile and a list of data objects of interest. + * + * @remarks + * Shell command: "util:search-archives" + */ +export default class SearchArchives extends Command { + static description = + 'Searches for the archive file names given an archive trackfile and a list of data objects of interest.' + + static flags = { + archiveTrackfile: flags.string({ + char: 'f', + description: 'Path to the archive trackfile (jsonl)', + required: true, + }), + dataObjects: customFlags.integerArr({ + char: 'o', + description: 'List of the data object ids to look for (comma-separated)', + required: true, + }), + json: flags.boolean({ + char: 'j', + description: 'Output as JSON', + required: false, + }), + nameOnly: flags.boolean({ + char: 'n', + description: 'Output only the archive names', + required: false, + }), + } + + async run(): Promise { + const { flags } = this.parse(SearchArchives) + + const archiveTrackingService = new ArchivesTrackingService( + path.dirname(flags.archiveTrackfile), + path.basename(flags.archiveTrackfile) + ) + + const results = await archiveTrackingService.findDataObjects(flags.dataObjects.map((id) => id.toString())) + + if (flags.json) { + if (flags.nameOnly) { + this.log( + JSON.stringify( + results.hits.map((hit) => hit.name), + null, + 2 + ) + ) + } else { + this.log(JSON.stringify(results, null, 2)) + } + } else if (flags.nameOnly) { + this.log(results.hits.map((hit) => hit.name).join('\n')) + } else { + this.log('') + const objectsFound = flags.dataObjects.length - results.missingObjects.length + if (objectsFound > 0) { + this.log( + `Found ${objectsFound} out of ${flags.dataObjects.length} objects in ${results.hits.length} archive(s):` + ) + for (const hit of results.hits) { + this.log( + `\n ${hit.name}\n ${hit.foundObjects.length} objects\n ${hit.foundObjects.join(', ')}\n` + ) + } + } + if (results.missingObjects.length > 0) { + this.warn(`${results.missingObjects.length} objects could not be found: ${results.missingObjects.join(', ')}`) + } + this.log('') + } + } +} diff --git a/storage-node/src/services/archive/tracking.ts b/storage-node/src/services/archive/tracking.ts index 436e295641..9667da717f 100644 --- a/storage-node/src/services/archive/tracking.ts +++ b/storage-node/src/services/archive/tracking.ts @@ -2,6 +2,7 @@ import path from 'path' import { createReadStream, promises as fsp } from 'fs' import lockfile from 'proper-lockfile' import readline from 'node:readline/promises' +import _ from 'lodash' export const OBJECTS_TRACKING_FILENAME = 'objects_trackfile' export const ARCHIVES_TRACKING_FILENAME = 'archives_trackfile.jsonl' @@ -41,13 +42,22 @@ abstract class TrackfileService { type TrackedArchive = { name: string; dataObjectIds: string[] } +type ArchiveSearchResultHit = { + name: string + foundObjects: string[] +} +type ArchiveSearchResults = { + hits: ArchiveSearchResultHit[] + missingObjects: string[] +} + export class ArchivesTrackingService extends TrackfileService { protected trackfilePath: string protected trackedArchiveNames: Set | undefined - constructor(private directory: string) { + constructor(private directory: string, trackFileName = ARCHIVES_TRACKING_FILENAME) { super() - this.trackfilePath = path.join(this.directory, ARCHIVES_TRACKING_FILENAME) + this.trackfilePath = path.join(this.directory, trackFileName) } public getTrackfilePath(): string { @@ -87,6 +97,26 @@ export class ArchivesTrackingService extends TrackfileService { this.trackedArchiveNames = trackedArchiveNames }) } + + public async findDataObjects(dataObjectIds: string[]): Promise { + const results: ArchiveSearchResults = { + hits: [], + missingObjects: [...dataObjectIds], + } + await this.withLock(async () => { + const rl = readline.createInterface({ input: createReadStream(this.trackfilePath) }) + for await (const line of rl) { + const trackedArchive: TrackedArchive = JSON.parse(line.trim()) + const foundObjects = _.intersection(trackedArchive.dataObjectIds, dataObjectIds) + results.missingObjects = _.difference(results.missingObjects, foundObjects) + if (foundObjects.length > 0) { + results.hits.push({ name: trackedArchive.name, foundObjects }) + } + } + rl.close() + }) + return results + } } export class ObjectTrackingService extends TrackfileService { From abcf781b3552169b9900943b699281e33e61ca78 Mon Sep 17 00:00:00 2001 From: Lezek123 Date: Mon, 28 Oct 2024 14:30:57 +0100 Subject: [PATCH 08/19] Colossus archive script: Add storage classes --- storage-node/src/commands/archive.ts | 13 ++++++-- .../src/services/archive/ArchiveService.ts | 11 +++++-- storage-node/src/services/archive/tasks.ts | 3 +- .../src/services/s3/AwsConnectionHandler.ts | 32 +++++++++++++++---- .../src/services/s3/IConnectionHandler.ts | 12 +++++-- 5 files changed, 55 insertions(+), 16 deletions(-) diff --git a/storage-node/src/commands/archive.ts b/storage-node/src/commands/archive.ts index b5286284de..3f41cc367b 100644 --- a/storage-node/src/commands/archive.ts +++ b/storage-node/src/commands/archive.ts @@ -15,8 +15,7 @@ import { IConnectionHandler } from '../services/s3/IConnectionHandler' import { AwsConnectionHandler } from '../services/s3/AwsConnectionHandler' import { createDirectory } from '../services/helpers/filesystem' import { promises as fsp } from 'fs' - -// TODO: Add command for retrieving archive links by object ids. +import { StorageClass } from '@aws-sdk/client-s3' /** * CLI command: @@ -191,6 +190,13 @@ Supported values: warn, error, debug, info. Default:debug`, env: 'AWS_BUCKET_NAME', required: true, }), + awsStorageClass: flags.enum({ + description: 'AWS S3 storage class to use when uploading the archives to S3.', + env: 'AWS_STORAGE_CLASS', + required: true, + default: 'DEEP_ARCHIVE', + options: Object.keys(StorageClass) as StorageClass[], + }), ...ApiCommandBase.flags, } @@ -303,9 +309,10 @@ Supported values: warn, error, debug, info. Default:debug`, } // Try to construct S3 connection handler - const s3ConnectionHandler: IConnectionHandler = new AwsConnectionHandler({ + const s3ConnectionHandler: IConnectionHandler = new AwsConnectionHandler({ bucketName: flags.awsS3BucketName, region: flags.awsS3BucketRegion, + defaultStorageClass: flags.awsStorageClass, }) // Get buckets to sync diff --git a/storage-node/src/services/archive/ArchiveService.ts b/storage-node/src/services/archive/ArchiveService.ts index af28b2c364..6d83ee79f9 100644 --- a/storage-node/src/services/archive/ArchiveService.ts +++ b/storage-node/src/services/archive/ArchiveService.ts @@ -17,6 +17,7 @@ import { getStorageObligationsFromRuntime } from '../sync/storageObligations' import { getDownloadTasks } from '../sync/synchronizer' import sleep from 'sleep-promise' import { Logger } from 'winston' +import { StorageClass } from '@aws-sdk/client-s3' type DataObjectData = { id: string @@ -120,7 +121,7 @@ type ArchiveServiceParams = { uploadQueueDir: string tmpDownloadDir: string // API's - s3ConnectionHandler: IConnectionHandler + s3ConnectionHandler: IConnectionHandler queryNodeApi: QueryNodeApi // Upload tasks config uploadWorkersNum: number @@ -149,7 +150,7 @@ export class ArchiveService { private tmpDownloadDir: string // API's and services private queryNodeApi: QueryNodeApi - private s3ConnectionHandler: IConnectionHandler + private s3ConnectionHandler: IConnectionHandler // Tracking services private objectTrackingService: ObjectTrackingService private archivesTrackingService: ArchivesTrackingService @@ -247,7 +248,11 @@ export class ArchiveService { const lastModified = (await fsp.stat(trackfilePath)).mtime if (!this.archiveTrackfileLastMtime || lastModified.getTime() > this.archiveTrackfileLastMtime.getTime()) { this.logger.info('Backing up the archive trackfile...') - await this.s3ConnectionHandler.uploadFileToRemoteBucket(path.basename(trackfilePath), trackfilePath) + // For the trackfile we're using STANDARD class, because: + // 1. It's a lightweight file, + // 2. It may be useful to be able to access it quickly, + // 3. It's often overriden, which would incur additional costs for archival storage classes. + await this.s3ConnectionHandler.uploadFileToRemoteBucket(path.basename(trackfilePath), trackfilePath, 'STANDARD') this.archiveTrackfileLastMtime = lastModified } } diff --git a/storage-node/src/services/archive/tasks.ts b/storage-node/src/services/archive/tasks.ts index 28a6a3a5b6..bbfe66cae5 100644 --- a/storage-node/src/services/archive/tasks.ts +++ b/storage-node/src/services/archive/tasks.ts @@ -7,6 +7,7 @@ import { blake2AsHex } from '@polkadot/util-crypto' import { IConnectionHandler } from '../s3/IConnectionHandler' import { SevenZipService } from './SevenZipService' import { ArchivesTrackingService } from './tracking' +import { StorageClass } from '@aws-sdk/client-s3' /** * Compresses provided files into a 7zip archive and removes them. @@ -89,7 +90,7 @@ export class UploadArchiveFileTask implements Task { private objectKey: string, private uploadsDirectory: string, private archivesTrackingService: ArchivesTrackingService, - private connectionHandler: IConnectionHandler + private connectionHandler: IConnectionHandler ) { this._7z = new SevenZipService() } diff --git a/storage-node/src/services/s3/AwsConnectionHandler.ts b/storage-node/src/services/s3/AwsConnectionHandler.ts index e7a0b7c225..b8265e8ac6 100644 --- a/storage-node/src/services/s3/AwsConnectionHandler.ts +++ b/storage-node/src/services/s3/AwsConnectionHandler.ts @@ -10,6 +10,7 @@ import { PutObjectCommand, PutObjectCommandInput, S3Client, + StorageClass, } from '@aws-sdk/client-s3' import { getSignedUrl } from '@aws-sdk/s3-request-presigner' import { fromEnv } from '@aws-sdk/credential-provider-env' @@ -19,11 +20,13 @@ import fs from 'fs' export type AwsConnectionHandlerParams = { region: string bucketName: string + defaultStorageClass?: StorageClass } -export class AwsConnectionHandler implements IConnectionHandler { +export class AwsConnectionHandler implements IConnectionHandler { private client: S3Client private bucket: string + private defaultStorageClass?: StorageClass // Official doc at https://docs.aws.amazon.com/AmazonS3/latest/userguide/upload-objects.html: // Upload an object in a single operation by using the AWS SDKs, REST API, or AWS CLI – With a single PUT operation, you can upload a single object up to 5 GB in size. @@ -36,11 +39,15 @@ export class AwsConnectionHandler implements IConnectionHandler { this.client = this.constructProduction(opts) } this.bucket = opts.bucketName + this.defaultStorageClass = opts.defaultStorageClass logger.info( `AWS connection handler initialized with bucket config ${ process.env.LOCALSTACK_ENABLED === 'true' ? 'LOCALSTACK' : 'PRODUCTION' }` ) + if (this.defaultStorageClass) { + logger.info(`Using default AWS S3 storage class: ${this.defaultStorageClass}`) + } } private constructProduction(opts: AwsConnectionHandlerParams): S3Client { @@ -71,15 +78,23 @@ export class AwsConnectionHandler implements IConnectionHandler { return fileSizeInBytes > this.multiPartThresholdGB * 1_000_000_000 } - async uploadFileToRemoteBucket(key: string, filePath: string): Promise { - await this.uploadFileToAWSBucket(key, filePath) + async uploadFileToRemoteBucket( + key: string, + filePath: string, + storageClass?: StorageClass + ): Promise { + await this.uploadFileToAWSBucket(key, filePath, storageClass) return { key, filePath, } } - async uploadFileToRemoteBucketIfNotExists(key: string, filePath: string): Promise { + async uploadFileToRemoteBucketIfNotExists( + key: string, + filePath: string, + storageClass?: StorageClass + ): Promise { // check if file exists at key const fileExists = await this.checkIfFileExists(key) // if it does, return @@ -91,7 +106,7 @@ export class AwsConnectionHandler implements IConnectionHandler { } } // if it doesn't, upload the file - await this.uploadFileToAWSBucket(key, filePath) + await this.uploadFileToAWSBucket(key, filePath, storageClass) return { key, filePath, @@ -99,13 +114,18 @@ export class AwsConnectionHandler implements IConnectionHandler { } } - private async uploadFileToAWSBucket(filename: string, filePath: string): Promise { + private async uploadFileToAWSBucket( + filename: string, + filePath: string, + storageClass?: StorageClass + ): Promise { const fileStream = fs.createReadStream(filePath) const input: PutObjectCommandInput = { Bucket: this.bucket, Key: filename, Body: fileStream, + StorageClass: storageClass || this.defaultStorageClass, } // Uploading files to the bucket: multipart diff --git a/storage-node/src/services/s3/IConnectionHandler.ts b/storage-node/src/services/s3/IConnectionHandler.ts index a98a80206c..df97804167 100644 --- a/storage-node/src/services/s3/IConnectionHandler.ts +++ b/storage-node/src/services/s3/IConnectionHandler.ts @@ -14,22 +14,28 @@ export type UploadFileIfNotExistsOutput = { * Within this storage unit, objects are organized using keys. A key is a string that defines the location of an object * within the storage unit. Keys use the format "/" with "/" as a delimiter to separate directories. */ -export interface IConnectionHandler { +export interface IConnectionHandler { /** * Asynchronously uploads an object to the storage unit. It doesn't check if the object already exists. * @param key - The key of the object in the storage unit. * @param filePath - The local file path of the object to upload. + * @param storageClass - Optional. The storage class of the object. * @returns A promise that resolves when the upload is complete or rejects with an error. */ - uploadFileToRemoteBucket(key: string, filePath: string): Promise + uploadFileToRemoteBucket(key: string, filePath: string, storageClass?: StorageClassType): Promise /** * Asynchronously uploads an object to the storage unit if it does not exist. * @param key - The key of the object in the storage unit. * @param filePath - The local file path of the object to upload. + * @param storageClass - Optional. The storage class of the object. * @returns A promise that resolves when the upload is complete or rejects with an error. */ - uploadFileToRemoteBucketIfNotExists(key: string, filePath: string): Promise + uploadFileToRemoteBucketIfNotExists( + key: string, + filePath: string, + storageClass?: StorageClassType + ): Promise /** * Asynchronously retrieves a presigned URL for an object in the storage unit. From 2b45e12d8c4c419974f4b4835e3def731df79304 Mon Sep 17 00:00:00 2001 From: Lezek123 Date: Tue, 29 Oct 2024 18:12:33 +0100 Subject: [PATCH 09/19] Colossus archive script: Support for faster compression / no compression + bug fixes --- colossus.Dockerfile | 3 + storage-node/src/commands/archive.ts | 35 +++- .../src/services/archive/ArchiveService.ts | 75 +++---- .../src/services/archive/SevenZipService.ts | 55 ----- .../src/services/archive/compression.ts | 192 ++++++++++++++++++ storage-node/src/services/archive/tasks.ts | 102 +++++++--- 6 files changed, 328 insertions(+), 134 deletions(-) delete mode 100644 storage-node/src/services/archive/SevenZipService.ts create mode 100644 storage-node/src/services/archive/compression.ts diff --git a/colossus.Dockerfile b/colossus.Dockerfile index 36be93dc91..1fbc80b0a4 100644 --- a/colossus.Dockerfile +++ b/colossus.Dockerfile @@ -30,7 +30,10 @@ RUN yarn workspace storage-node build RUN yarn cache clean FROM node:18 as final + WORKDIR /joystream +# 7zip and zstd are required by the archive script +RUN apt-get update && apt-get install -y p7zip-full zstd COPY --from=builder /joystream /joystream RUN yarn --frozen-lockfile --production diff --git a/storage-node/src/commands/archive.ts b/storage-node/src/commands/archive.ts index 3f41cc367b..4fe94c2c76 100644 --- a/storage-node/src/commands/archive.ts +++ b/storage-node/src/commands/archive.ts @@ -15,6 +15,7 @@ import { IConnectionHandler } from '../services/s3/IConnectionHandler' import { AwsConnectionHandler } from '../services/s3/AwsConnectionHandler' import { createDirectory } from '../services/helpers/filesystem' import { promises as fsp } from 'fs' +import { CompressionAlgorithm, CompressionLevel, getCompressionService } from '../services/archive/compression' import { StorageClass } from '@aws-sdk/client-s3' /** @@ -87,11 +88,34 @@ export default class Archive extends ApiCommandBase { }), archiveTrackfileBackupFreqMinutes: flags.integer({ description: - 'Specifies how frequently the archive tracking file (containing information about .7z files content)' + + 'Specifies how frequently the archive tracking file (containing information about archive files content)' + " should be uploaded to S3 (in case it's changed).", env: 'ARCHIVE_TRACKFILE_BACKUP_FREQ_MINUTES', default: 60, }), + compressionAlgorithm: flags.enum({ + required: true, + description: 'Compression algorithm to use for archive files', + options: ['7zip', 'zstd', 'none'], + default: 'zstd', + env: 'COMPRESSION_ALGORITHM', + }), + compressionLevel: flags.enum({ + required: true, + description: 'Compression level to use for archive files (lower is faster, but provides lower storage savings)', + env: 'COMPRESSION_LEVEL', + default: 'medium', + options: ['low', 'medium', 'high'], + }), + compressionThreads: flags.integer({ + required: true, + description: + 'Number of threads to use for compression. ' + + 'Note that {uploadWorkersNumber} upload tasks may be running at once ' + + 'and each of them can spawn a separate compression task which uses {compressionThreads} threads!', + env: 'COMPRESSION_THREADS', + default: 1, + }), uploadWorkersNumber: flags.integer({ required: false, description: 'Upload workers number (max async operations in progress).', @@ -248,7 +272,7 @@ Supported values: warn, error, debug, info. Default:debug`, this.error('No buckets to serve. Exiting...') } - if (syncableBuckets.length !== flags.buckets.length) { + if (flags.buckets.length && syncableBuckets.length !== flags.buckets.length) { logger.warn(`Only ${syncableBuckets.length} out of ${flags.buckets.length} provided buckets will be synced!`) } @@ -325,6 +349,12 @@ Supported values: warn, error, debug, info. Default:debug`, uploadQueueDir: flags.uploadQueueDir, }) + const compressionService = getCompressionService( + flags.compressionAlgorithm, + flags.compressionThreads, + flags.compressionLevel + ) + // Build and run archive service const X_HOST_ID = uuidv4() const archiveService = new ArchiveService({ @@ -339,6 +369,7 @@ Supported values: warn, error, debug, info. Default:debug`, tmpDownloadDir, s3ConnectionHandler, queryNodeApi: qnApi, + compressionService, uploadWorkersNum: flags.uploadWorkersNumber, hostId: X_HOST_ID, syncWorkersNum: flags.syncWorkersNumber, diff --git a/storage-node/src/services/archive/ArchiveService.ts b/storage-node/src/services/archive/ArchiveService.ts index 6d83ee79f9..b9a318ea1c 100644 --- a/storage-node/src/services/archive/ArchiveService.ts +++ b/storage-node/src/services/archive/ArchiveService.ts @@ -1,7 +1,7 @@ import { promises as fsp } from 'fs' import path from 'path' import logger from '../logger' -import { CompressFilesTask, UploadArchiveFileTask } from './tasks' +import { CompressAndUploadTask, UploadArchiveFileTask } from './tasks' import { WorkingStack, TaskProcessorSpawner } from '../processing/workingProcess' import { downloadEvents, DownloadFileTask } from '../sync/tasks' import _ from 'lodash' @@ -18,6 +18,7 @@ import { getDownloadTasks } from '../sync/synchronizer' import sleep from 'sleep-promise' import { Logger } from 'winston' import { StorageClass } from '@aws-sdk/client-s3' +import { CompressionService } from './compression' type DataObjectData = { id: string @@ -123,6 +124,8 @@ type ArchiveServiceParams = { // API's s3ConnectionHandler: IConnectionHandler queryNodeApi: QueryNodeApi + // Compression service + compressionService: CompressionService // Upload tasks config uploadWorkersNum: number // Sync tasks config @@ -151,6 +154,8 @@ export class ArchiveService { // API's and services private queryNodeApi: QueryNodeApi private s3ConnectionHandler: IConnectionHandler + // Compression service + private compressionService: CompressionService // Tracking services private objectTrackingService: ObjectTrackingService private archivesTrackingService: ArchivesTrackingService @@ -183,6 +188,7 @@ export class ArchiveService { this.uploadQueueDir = params.uploadQueueDir this.tmpDownloadDir = params.tmpDownloadDir this.s3ConnectionHandler = params.s3ConnectionHandler + this.compressionService = params.compressionService this.queryNodeApi = params.queryNodeApi this.uploadWorkersNum = params.uploadWorkersNum this.hostId = params.hostId @@ -400,9 +406,12 @@ export class ArchiveService { const uploadDirContents = await fsp.readdir(this.uploadQueueDir, { withFileTypes: true }) for (const item of uploadDirContents) { if (item.isFile()) { - const [name, ext1, ext2] = item.name.split('.') + const splitParts = item.name.split('.') + const name = splitParts[0] + const isTmp = splitParts[1] === 'tmp' + const ext = splitParts.slice(isTmp ? 2 : 1).join('.') // 1. If file name is an int and has no ext: We assume it's a fully downloaded data object - if (parseInt(name).toString() === name && !ext1) { + if (parseInt(name).toString() === name && !isTmp && !ext) { const dataObjectId = name // 1.1. If the object is not in dataObjectsQueue: remove if (!this.dataObjectsQueue.has(dataObjectId)) { @@ -419,8 +428,8 @@ export class ArchiveService { await this.tryRemovingLocalDataObject(dataObjectId) } } - // 2. If file is .7z: We assume it's a valid archive with data objects - else if (ext1 === '7z') { + // 2. If file is an archive and has no `.tmp` ext: We assume it's a valid archive with data objects + else if (!isTmp && ext === this.compressionService.getExt()) { if (!this.archivesTrackingService.isTracked(item.name)) { // 2.1. If not tracked by archiveTrackingService - try to re-upload: this.logger.warn(`Found unuploaded archive: ${item.name}. Scheduling for re-upload...`) @@ -430,7 +439,8 @@ export class ArchiveService { item.name, this.uploadQueueDir, this.archivesTrackingService, - this.s3ConnectionHandler + this.s3ConnectionHandler, + this.compressionService ), ]) // 2.2. If it's already tracked by archiveTrackingService (already uploaded): remove @@ -438,9 +448,9 @@ export class ArchiveService { this.logger.warn(`Found already uploaded archive: ${item.name}. Removing...`) await this.tryRemovingLocalFile(path.join(this.uploadQueueDir, item.name)) } - // 3. If file is .tmp.7z: remove - } else if (ext1 === 'tmp' && ext2 === '7z') { - this.logger.warn(`Found broken archive: ${item.name}. Removing...`) + // 3. If file is temporary: remove + } else if (isTmp) { + this.logger.warn(`Found temporary file: ${item.name}. Removing...`) await this.tryRemovingLocalFile(path.join(this.uploadQueueDir, item.name)) } else if (item.name !== ARCHIVES_TRACKING_FILENAME && item.name !== OBJECTS_TRACKING_FILENAME) { this.logger.warn(`Found unrecognized file: ${item.name}`) @@ -583,8 +593,7 @@ export class ArchiveService { } /** - * Compresses batches of data objects into 7zip archives and - * schedules the uploads to S3. + * Compresses batches of data objects into archives and schedules the uploads to S3. */ public async prepareAndUploadBatches(dataObjectBatches: DataObjectData[][]): Promise { if (!dataObjectBatches.length) { @@ -594,46 +603,20 @@ export class ArchiveService { this.preparingForUpload = true - this.logger.info(`Preparing ${dataObjectBatches.length} batches for upload...`) - const compressionTasks: CompressFilesTask[] = [] + this.logger.info(`Preparing ${dataObjectBatches.length} object batches for upload...`) + const uploadTasks: CompressAndUploadTask[] = [] for (const batch of dataObjectBatches) { - const compressionTask = new CompressFilesTask( + const uploadTask = new CompressAndUploadTask( this.uploadQueueDir, - batch.map((o) => o.id) + batch.map((o) => o.id), + this.archivesTrackingService, + this.s3ConnectionHandler, + this.compressionService ) - compressionTasks.push(compressionTask) + uploadTasks.push(uploadTask) } - // We run compression tasks one by one, because they spawn 7zip, which uses all available threads - // by default, ie. we probably won't benefit from running multiple 7zip tasks in parallel. - this.logger.info(`Creating ${compressionTasks.length} archive file(s)...`) - const archiveFiles = [] - for (const compressionTask of compressionTasks) { - this.logger.debug(compressionTask.description()) - try { - await compressionTask.execute() - archiveFiles.push(compressionTask.getArchiveFilePath()) - } catch (e) { - this.logger.error(`Data objects compression task failed: ${e.toString()}`) - } - } - - // After collecting the archive files we add them to upload queue - const uploadFileTasks = archiveFiles.map( - (filePath) => - new UploadArchiveFileTask( - filePath, - path.basename(filePath), - this.uploadQueueDir, - this.archivesTrackingService, - this.s3ConnectionHandler - ) - ) - - if (uploadFileTasks.length) { - this.logger.info(`Scheduling ${uploadFileTasks.length} uploads to S3...`) - await this.uploadWorkingStack.add(uploadFileTasks) - } + await this.uploadWorkingStack.add(uploadTasks) this.preparingForUpload = false } diff --git a/storage-node/src/services/archive/SevenZipService.ts b/storage-node/src/services/archive/SevenZipService.ts deleted file mode 100644 index 9a6c1a8d51..0000000000 --- a/storage-node/src/services/archive/SevenZipService.ts +++ /dev/null @@ -1,55 +0,0 @@ -import path from 'path' -import internal from 'stream' -import { ChildProcessByStdio, spawn, exec } from 'child_process' -import { promisify } from 'util' -import logger from '../logger' - -const execPromise = promisify(exec) - -export class SevenZipService { - public spawnCompressionProcess( - archiveFilePath: string, - compressFilePaths: string[], - onClose: (exitCode: number) => unknown - ): ChildProcessByStdio { - const p7z = spawn( - '7z', - [ - 'a', // Create an archive - '-mx=5', // Compression level (1-9) - '-ms=on', // Enable solid mode - '-y', // Answer "yes" to any prompts (like overriding existing archive file etc.) - '-bb0', // Output error messages only - '-bd', // Disable progress indicator - archiveFilePath, // Archive file path - ...compressFilePaths, // Files to include in the archive - ], - { - // Ignore stdin and stdout, pipe stderr - stdio: ['ignore', 'ignore', 'pipe'], - } - ) - p7z.stderr.on('data', (data) => { - logger.error(`7zip stderr: ${data}`) - }) - p7z.on('error', (error) => { - logger.error(`7zip spawn error: ${error.toString()}`) - }) - // Close will be emitted even if there was an error - p7z.on('close', onClose) - return p7z - } - - public async listFiles(archiveFilePath: string): Promise { - try { - const { stdout } = await execPromise(`7z l -ba ${archiveFilePath} | awk '{print $NF}'`) - const files = stdout - .trim() - .split('\n') - .map((o) => path.basename(o.trim())) - return files - } catch (e) { - throw new Error(`Cannot list archive files in ${archiveFilePath}: ${e.toString()}`) - } - } -} diff --git a/storage-node/src/services/archive/compression.ts b/storage-node/src/services/archive/compression.ts new file mode 100644 index 0000000000..b51b5fc720 --- /dev/null +++ b/storage-node/src/services/archive/compression.ts @@ -0,0 +1,192 @@ +import path from 'path' +import { spawn, exec } from 'child_process' +import { promisify } from 'util' +import logger from '../logger' + +const execPromise = promisify(exec) + +// Basic abstraction of algorithm-independent compression level +export type CompressionLevel = 'low' | 'medium' | 'high' + +// Available compression types +export type CompressionAlgorithm = 'none' | '7zip' | 'zstd' + +// Compression service base class +export abstract class CompressionService { + protected compressionThreads?: number + protected defaultCompressionLevel: CompressionLevel + + public constructor(compressionThreads?: number, defaultCompressionLevel?: CompressionLevel) { + this.defaultCompressionLevel = defaultCompressionLevel || 'medium' + this.compressionThreads = compressionThreads + } + + public abstract compressFiles(inputFilePaths: string[], archivePath: string, level?: CompressionLevel): Promise + + public abstract listFiles(archivePath: string): Promise + + public abstract getExt(): string +} + +// Compression service provider +export function getCompressionService( + algorithm: CompressionAlgorithm, + compressionThreads?: number, + defaultCompressionLevel?: CompressionLevel +): CompressionService { + if (algorithm === '7zip') { + return new SevenZipService(compressionThreads, defaultCompressionLevel) + } + if (algorithm === 'zstd') { + return new ZstdService(compressionThreads, defaultCompressionLevel) + } + if (algorithm === 'none') { + return new TarService(compressionThreads, defaultCompressionLevel) + } else { + throw new Error(`Unrecognized compression algorithm: ${algorithm}`) + } +} + +export class TarService extends CompressionService { + // eslint-disable-next-line @typescript-eslint/no-unused-vars + protected getCompressProgram(level?: CompressionLevel): string { + // Use no compression by default + return '' + } + + protected getCompressProgramFlag(level?: CompressionLevel): string { + const program = this.getCompressProgram(level) + if (program) { + return `--use-compress-program="${program}"` + } + return '' + } + + public getExt(): string { + return 'tar' + } + + public async compressFiles( + compressFilePaths: string[], + archiveFilePath: string, + level?: CompressionLevel + ): Promise { + try { + const useCompressProgram = this.getCompressProgramFlag(level || this.defaultCompressionLevel) + const { stderr } = await execPromise( + // -c - compress + // -f - output to file + // -P - don't strip leading '/'s from file names + `tar -Pcf ${archiveFilePath} ${useCompressProgram} ${compressFilePaths.join(' ')}` + ) + if (stderr) { + logger.warn(`tar process stderr: ${stderr}`) + } + } catch (e) { + throw new Error(`tar process failed (exit code: ${e.exit}): ${e.toString()}`) + } + } + + public async listFiles(archiveFilePath: string): Promise { + try { + const useCompressProgram = this.getCompressProgramFlag() + // -t - list contents + const { stdout } = await execPromise(`tar -tf ${archiveFilePath} ${useCompressProgram}`) + const files = stdout + .trim() + .split('\n') + .map((o) => path.basename(o.trim())) + return files + } catch (e) { + throw new Error(`Cannot list archive files in ${archiveFilePath}: ${e.toString()}`) + } + } +} + +export class ZstdService extends TarService { + private compressionLevelMap = new Map([ + ['low', 3], + ['medium', 9], + ['high', 18], + ]) + + protected getCompressProgram(level?: CompressionLevel): string { + if (level) { + // -T# - # of threads. 0 = # of cores + // -# - compression level + // -f - force (allows overriding existing archives etc.) + const threads = this.compressionThreads || 0 + return `zstd -T${threads} -${this.compressionLevelMap.get(level)} -f` + } else { + return `zstd` + } + } + + public getExt(): string { + return 'tar.zst' + } +} + +export class SevenZipService extends CompressionService { + private compressionLevelMap = new Map([ + ['low', 1], + ['medium', 5], + ['high', 9], + ]) + + public compressFiles(compressFilePaths: string[], archiveFilePath: string, level?: CompressionLevel): Promise { + return new Promise((resolve, reject) => { + const compressionLevel = this.compressionLevelMap.get(level || this.defaultCompressionLevel) + const threadFlags = this.compressionThreads ? [`-mmt${this.compressionThreads}`] : [] + const p7z = spawn( + '7z', + [ + 'a', // Create an archive + `-mx=${compressionLevel}`, // Compression level (1-9) + '-ms=on', // Enable solid mode + '-y', // Answer "yes" to any prompts (like overriding existing archive file etc.) + '-bb0', // Output error messages only + '-bd', // Disable progress indicator + ...threadFlags, + archiveFilePath, // Archive file path + ...compressFilePaths, // Files to include in the archive + ], + { + // Ignore stdin and stdout, pipe stderr + stdio: ['ignore', 'ignore', 'pipe'], + } + ) + p7z.stderr.on('data', (data) => { + logger.error(`7zip stderr: ${data}`) + }) + p7z.on('error', (error) => { + logger.error(`7zip spawn error: ${error.toString()}`) + }) + // Close will be emitted even if there was an error + p7z.on('close', (exitCode) => { + if (exitCode === 0) { + resolve() + } else { + reject(Error(`7z process failed with exit code: ${exitCode || 'null'}`)) + } + }) + }) + } + + public async listFiles(archiveFilePath: string): Promise { + try { + const { stdout } = await execPromise(`7z l -ba ${archiveFilePath} | awk '{print $NF}'`) + const files = stdout + .trim() + .split('\n') + .map((o) => path.basename(o.trim())) + return files + } catch (e) { + throw new Error(`Cannot list archive files in ${archiveFilePath}: ${e.toString()}`) + } + } + + public getExt(): string { + return '7z' + } +} diff --git a/storage-node/src/services/archive/tasks.ts b/storage-node/src/services/archive/tasks.ts index bbfe66cae5..77a5e2671d 100644 --- a/storage-node/src/services/archive/tasks.ts +++ b/storage-node/src/services/archive/tasks.ts @@ -5,26 +5,30 @@ import path from 'path' import logger from '../../services/logger' import { blake2AsHex } from '@polkadot/util-crypto' import { IConnectionHandler } from '../s3/IConnectionHandler' -import { SevenZipService } from './SevenZipService' import { ArchivesTrackingService } from './tracking' import { StorageClass } from '@aws-sdk/client-s3' +import { CompressionService } from './compression' /** - * Compresses provided files into a 7zip archive and removes them. + * Compresses provided files into an archive and removes them. */ export class CompressFilesTask implements Task { private dataObjectPaths: string[] private archiveFileName: string private tmpArchiveFilePath: string private archiveFilePath: string - private _7z: SevenZipService + private ext: string - constructor(private uploadsDirectory: string, private dataObjectIds: string[]) { + constructor( + private uploadsDirectory: string, + private dataObjectIds: string[], + private compressionService: CompressionService + ) { this.archiveFileName = blake2AsHex(_.sortBy(this.dataObjectIds, (id) => parseInt(id)).join(',')).substring(2) - this.tmpArchiveFilePath = path.join(this.uploadsDirectory, `${this.archiveFileName}.tmp.7z`) - this.archiveFilePath = path.join(this.uploadsDirectory, `${this.archiveFileName}.7z`) + this.ext = this.compressionService.getExt() + this.tmpArchiveFilePath = path.join(this.uploadsDirectory, `${this.archiveFileName}.tmp.${this.ext}`) + this.archiveFilePath = path.join(this.uploadsDirectory, `${this.archiveFileName}.${this.ext}`) this.dataObjectPaths = dataObjectIds.map((id) => path.join(uploadsDirectory, id)) - this._7z = new SevenZipService() } public description(): string { @@ -39,12 +43,12 @@ export class CompressFilesTask implements Task { try { await fsp.access(this.tmpArchiveFilePath, fsp.constants.W_OK | fsp.constants.R_OK) } catch (e) { - throw new Error(`7z archive access error: ${e.toString()}`) + throw new Error(`${this.tmpArchiveFilePath} access error: ${e.toString()}`) } - const packedObjects = await this._7z.listFiles(this.tmpArchiveFilePath) + const packedObjects = await this.compressionService.listFiles(this.tmpArchiveFilePath) if (_.difference(this.dataObjectIds, packedObjects).length) { - throw new Error(`7z archive is missing some files`) + throw new Error(`${this.tmpArchiveFilePath} is missing some files`) } try { @@ -64,18 +68,13 @@ export class CompressFilesTask implements Task { } public async execute(): Promise { - return new Promise((resolve, reject) => { - this._7z.spawnCompressionProcess(this.tmpArchiveFilePath, this.dataObjectPaths, (exitCode) => { - if (exitCode === 0) { - this.verifyAndMoveArchive() - .then(() => this.clenaup()) - .then(() => resolve()) - .catch((e) => reject(Error(`Compression task failed: ${e.toString()}`))) - } else { - reject(Error(`Compression task failed: 7z process failed with exit code: ${exitCode || 'null'}`)) - } - }) - }) + try { + await this.compressionService.compressFiles(this.dataObjectPaths, this.tmpArchiveFilePath) + await this.verifyAndMoveArchive() + await this.clenaup() + } catch (e) { + throw new Error(`Compression task failed: ${e.toString()}`) + } } } @@ -83,24 +82,23 @@ export class CompressFilesTask implements Task { * Uploads a specified file to S3. */ export class UploadArchiveFileTask implements Task { - private _7z: SevenZipService - + // eslint-disable-next-line no-useless-constructor constructor( private archiveFilePath: string, private objectKey: string, private uploadsDirectory: string, private archivesTrackingService: ArchivesTrackingService, - private connectionHandler: IConnectionHandler - ) { - this._7z = new SevenZipService() - } + private connectionHandler: IConnectionHandler, + private compressionService: CompressionService, + private dataObjectIds?: string[] + ) {} public description(): string { return `Uploading ${this.archiveFilePath} to S3 (key: ${this.objectKey})...` } public async getPackedFiles(): Promise { - const packedFiles = await this._7z.listFiles(this.archiveFilePath) + const packedFiles = await this.compressionService.listFiles(this.archiveFilePath) return packedFiles } @@ -114,14 +112,56 @@ export class UploadArchiveFileTask implements Task { } public async execute(): Promise { - const dataObjectIds = await this.getPackedFiles() + const dataObjectIds = this.dataObjectIds || (await this.getPackedFiles()) try { await this.connectionHandler.uploadFileToRemoteBucket(this.objectKey, this.archiveFilePath) await this.archivesTrackingService.track({ name: this.objectKey, dataObjectIds: dataObjectIds }) + await this.cleanup(dataObjectIds) logger.info(`${this.archiveFilePath} successfully uploaded to S3!`) } catch (e) { logger.error(`Upload job failed for ${this.archiveFilePath}: ${e.toString()}`) } - await this.cleanup(dataObjectIds) + } +} + +/** + * Compresses data objects into an archive and uploads them to S3. + */ +export class CompressAndUploadTask implements Task { + private archiveFilePath: string + private archiveFileName: string + private compressTask: CompressFilesTask + private uploadTask: UploadArchiveFileTask + + // eslint-disable-next-line no-useless-constructor + constructor( + private uploadsDirectory: string, + private dataObjectIds: string[], + private archivesTrackingService: ArchivesTrackingService, + private connectionHandler: IConnectionHandler, + private compressionService: CompressionService + ) { + this.compressTask = new CompressFilesTask(this.uploadsDirectory, this.dataObjectIds, this.compressionService) + this.archiveFilePath = this.compressTask.getArchiveFilePath() + this.archiveFileName = path.basename(this.archiveFilePath) + this.uploadTask = new UploadArchiveFileTask( + this.archiveFilePath, + this.archiveFileName, + this.uploadsDirectory, + this.archivesTrackingService, + this.connectionHandler, + this.compressionService + ) + } + + public description(): string { + return `Compressing data objects and uploading them to S3...` + } + + public async execute(): Promise { + logger.debug(this.compressTask.description()) + await this.compressTask.execute() + logger.debug(this.uploadTask.description()) + await this.uploadTask.execute() } } From 7b7bf268c07280639ac14c4434508d5df83751a7 Mon Sep 17 00:00:00 2001 From: Lezek123 Date: Fri, 1 Nov 2024 16:47:47 +0100 Subject: [PATCH 10/19] Archive script: Optimizations, bug fixes, stats logging --- storage-node/src/commands/archive.ts | 7 + .../src/services/archive/ArchiveService.ts | 24 +++- .../src/services/archive/compression.ts | 6 +- storage-node/src/services/archive/stats.ts | 122 ++++++++++++++++++ storage-node/src/services/archive/tasks.ts | 66 +++++++++- .../src/services/s3/AwsConnectionHandler.ts | 2 + storage-node/src/services/sync/tasks.ts | 6 +- 7 files changed, 219 insertions(+), 14 deletions(-) create mode 100644 storage-node/src/services/archive/stats.ts diff --git a/storage-node/src/commands/archive.ts b/storage-node/src/commands/archive.ts index 4fe94c2c76..85813a7858 100644 --- a/storage-node/src/commands/archive.ts +++ b/storage-node/src/commands/archive.ts @@ -221,6 +221,12 @@ Supported values: warn, error, debug, info. Default:debug`, default: 'DEEP_ARCHIVE', options: Object.keys(StorageClass) as StorageClass[], }), + statsLoggingInterval: flags.integer({ + description: 'How often the upload/download/compression statistics summary will be logged (in minutes).', + env: 'STATS_LOGGING_INTERVAL', + default: 60, + required: true, + }), ...ApiCommandBase.flags, } @@ -375,6 +381,7 @@ Supported values: warn, error, debug, info. Default:debug`, syncWorkersNum: flags.syncWorkersNumber, syncWorkersTimeout: flags.syncWorkersTimeout, syncInterval: flags.syncInterval, + statsLoggingInterval: flags.statsLoggingInterval, }) await archiveService.init() diff --git a/storage-node/src/services/archive/ArchiveService.ts b/storage-node/src/services/archive/ArchiveService.ts index b9a318ea1c..39713b4c92 100644 --- a/storage-node/src/services/archive/ArchiveService.ts +++ b/storage-node/src/services/archive/ArchiveService.ts @@ -19,6 +19,7 @@ import sleep from 'sleep-promise' import { Logger } from 'winston' import { StorageClass } from '@aws-sdk/client-s3' import { CompressionService } from './compression' +import { StatsCollectingService } from './stats' type DataObjectData = { id: string @@ -135,6 +136,8 @@ type ArchiveServiceParams = { syncInterval: number // Archive tracking backup archiveTrackfileBackupFreqMinutes: number + // Stats logging + statsLoggingInterval: number } export class ArchiveService { @@ -176,6 +179,9 @@ export class ArchiveService { private syncQueueObjectsSize = 0 private syncWorkersTimeout: number private syncInterval: number + // Statistics + private statsLoggingInterval: number + private statsCollectingService: StatsCollectingService constructor(params: ArchiveServiceParams) { // From params: @@ -196,12 +202,14 @@ export class ArchiveService { this.syncWorkersTimeout = params.syncWorkersTimeout this.syncInterval = params.syncInterval this.archiveTrackfileBackupFreqMinutes = params.archiveTrackfileBackupFreqMinutes + this.statsLoggingInterval = params.statsLoggingInterval // Other: this.objectTrackingService = new ObjectTrackingService(this.uploadQueueDir) this.archivesTrackingService = new ArchivesTrackingService(this.uploadQueueDir) this.dataObjectsQueue = new DataObjectsQueue(this.uploadQueueDir) this.uploadWorkingStack = new WorkingStack() this.syncWorkingStack = new WorkingStack() + this.statsCollectingService = new StatsCollectingService() this.logger = logger.child({ label: 'ArchiveService' }) } @@ -228,8 +236,13 @@ export class ArchiveService { * Initializes downloadEvent handlers and archive trackfile backup interval. */ private installTriggers(): void { - downloadEvents.on('success', (dataObjectId, size) => { + downloadEvents.on('success', (dataObjectId, size, startTime, endTime) => { this.logger.debug(`Download success event received for object: ${dataObjectId}`) + this.statsCollectingService.addDownloadJobStats({ + start: startTime, + end: endTime, + size: size, + }) this.handleSuccessfulDownload(dataObjectId).catch((e) => { this.logger.error(`Critical error on handleSuccessfulDownload: ${e.toString()}`) process.exit(1) @@ -244,6 +257,9 @@ export class ArchiveService { this.logger.error(`Failed to upload archive trackfile backup to S3: ${e.toString()}`) }) }, this.archiveTrackfileBackupFreqMinutes * 60_000) + setInterval(() => { + this.statsCollectingService.logSummaries() + }, this.statsLoggingInterval * 60_000) } /** @@ -440,7 +456,8 @@ export class ArchiveService { this.uploadQueueDir, this.archivesTrackingService, this.s3ConnectionHandler, - this.compressionService + this.compressionService, + this.statsCollectingService ), ]) // 2.2. If it's already tracked by archiveTrackingService (already uploaded): remove @@ -611,7 +628,8 @@ export class ArchiveService { batch.map((o) => o.id), this.archivesTrackingService, this.s3ConnectionHandler, - this.compressionService + this.compressionService, + this.statsCollectingService ) uploadTasks.push(uploadTask) } diff --git a/storage-node/src/services/archive/compression.ts b/storage-node/src/services/archive/compression.ts index b51b5fc720..5fed522285 100644 --- a/storage-node/src/services/archive/compression.ts +++ b/storage-node/src/services/archive/compression.ts @@ -73,11 +73,13 @@ export class TarService extends CompressionService { ): Promise { try { const useCompressProgram = this.getCompressProgramFlag(level || this.defaultCompressionLevel) + const baseDir = path.dirname(compressFilePaths[0]) + const relativeFilePaths = compressFilePaths.map((f) => path.relative(baseDir, f)) const { stderr } = await execPromise( // -c - compress // -f - output to file - // -P - don't strip leading '/'s from file names - `tar -Pcf ${archiveFilePath} ${useCompressProgram} ${compressFilePaths.join(' ')}` + // -C - omit the path from file names (cd into the directory) + `tar -cf ${archiveFilePath} ${useCompressProgram} -C ${baseDir} ${relativeFilePaths.join(' ')}` ) if (stderr) { logger.warn(`tar process stderr: ${stderr}`) diff --git a/storage-node/src/services/archive/stats.ts b/storage-node/src/services/archive/stats.ts new file mode 100644 index 0000000000..0f50671a6a --- /dev/null +++ b/storage-node/src/services/archive/stats.ts @@ -0,0 +1,122 @@ +import { Logger } from 'winston' +import logger from '../../services/logger' +import _ from 'lodash' + +type SizeDurationJobStats = { + size: number + start: bigint + end: bigint +} + +type DownloadJobStats = SizeDurationJobStats + +type UploadJobStats = SizeDurationJobStats + +type CompressionJobStats = SizeDurationJobStats & { + sizeAfter: number +} + +export class StatsCollectingService { + private logger: Logger + private downloadJobsStats: DownloadJobStats[] = [] + private uploadJobsStats: UploadJobStats[] = [] + private compressionJobsStats: CompressionJobStats[] = [] + + constructor() { + this.logger = logger.child({ label: 'StatsCollectingService' }) + } + + public addDownloadJobStats(stats: DownloadJobStats): void { + this.downloadJobsStats.push(stats) + } + + public addUploadJobStats(stats: UploadJobStats): void { + this.uploadJobsStats.push(stats) + } + + public addCompressionJobStats(stats: CompressionJobStats): void { + this.compressionJobsStats.push(stats) + } + + // Convert time in miliseconds to an `HH:MM:SS.XX` string + private humanizeDuration(durationMs: number): string { + const hours = Math.floor(durationMs / 1000 / 60 / 60) + const minutes = Math.floor((durationMs / 1000 / 60) % 60) + const seconds = Math.floor((durationMs / 1000) % 60) + const miniseconds = Math.floor((durationMs % 1000) / 10) + return `${hours.toString().padStart(2, '0')}:${minutes.toString().padStart(2, '0')}:${seconds + .toString() + .padStart(2, '0')}.${miniseconds.toString().padStart(2, '0')}` + } + + private toMs(ns: bigint) { + return Number(ns / BigInt(1_000_000)) + } + + private countTotalDurationMs(source: SizeDurationJobStats[]): number { + if (source.length === 0) { + // Prevent division by 0 + return 1 + } + + // Because jobs are executed in parallel, we "merge" start/end times + // when they overlap. + const jobs = _.sortBy(source, (job) => job.start) + let mergedRange: [bigint, bigint] = [jobs[0].start, jobs[0].end] + const mergedRanges: [bigint, bigint][] = [] + for (const job of jobs) { + const start = job.start + const end = job.end + if (start <= mergedRange[1]) { + mergedRange[1] = end > mergedRange[1] ? end : mergedRange[1] + } else { + mergedRanges.push(mergedRange) + mergedRange = [start, end] + } + } + mergedRanges.push(mergedRange) + + return this.toMs(mergedRanges.reduce((a, b) => a + (b[1] - b[0]), BigInt(0))) + } + + private sizeDurationStats(source: SizeDurationJobStats[]): string { + const totalSize = source.reduce((a, b) => a + b.size, 0) + const totalDuration = this.countTotalDurationMs(source) + const numFiles = source.length + + const totalSizeGB = (totalSize / 1_000_000_000).toFixed(2) + const totalDurationH = this.humanizeDuration(totalDuration) + const MBps = (totalSize / 1_000_000 / (totalDuration / 1000)).toFixed(2) + + return `num_files=${numFiles}, total_size=${totalSizeGB}GB, total_duration=${totalDurationH}, avg_speed=${MBps}MB/s` + } + + public logDownloadSummary(): void { + this.logger.info(`Download summary: ${this.sizeDurationStats(this.downloadJobsStats)}`) + } + + public logUploadSummary(): void { + this.logger.info(`Upload summary: ${this.sizeDurationStats(this.uploadJobsStats)}`) + } + + public logCompressionSummary(): void { + const totalSizeBefore = this.compressionJobsStats.reduce((a, b) => a + b.size, 0) + const totalSizeAfter = this.compressionJobsStats.reduce((a, b) => a + b.sizeAfter, 0) + const totalSizeReduction = totalSizeBefore - totalSizeAfter + + const totalSizeAfterGB = (totalSizeAfter / 1_000_000_000).toFixed(2) + const reducitonPercentage = ((totalSizeReduction / totalSizeBefore) * 100).toFixed(2) + + this.logger.info( + `Compression summary: ${this.sizeDurationStats( + this.compressionJobsStats + )}, total_archives_size=${totalSizeAfterGB}GB, avg_size_reduction=${reducitonPercentage}%` + ) + } + + public logSummaries(): void { + this.logDownloadSummary() + this.logUploadSummary() + this.logCompressionSummary() + } +} diff --git a/storage-node/src/services/archive/tasks.ts b/storage-node/src/services/archive/tasks.ts index 77a5e2671d..c431780e58 100644 --- a/storage-node/src/services/archive/tasks.ts +++ b/storage-node/src/services/archive/tasks.ts @@ -8,6 +8,7 @@ import { IConnectionHandler } from '../s3/IConnectionHandler' import { ArchivesTrackingService } from './tracking' import { StorageClass } from '@aws-sdk/client-s3' import { CompressionService } from './compression' +import { StatsCollectingService } from './stats' /** * Compresses provided files into an archive and removes them. @@ -22,7 +23,8 @@ export class CompressFilesTask implements Task { constructor( private uploadsDirectory: string, private dataObjectIds: string[], - private compressionService: CompressionService + private compressionService: CompressionService, + private statsCollectingService: StatsCollectingService ) { this.archiveFileName = blake2AsHex(_.sortBy(this.dataObjectIds, (id) => parseInt(id)).join(',')).substring(2) this.ext = this.compressionService.getExt() @@ -39,6 +41,31 @@ export class CompressFilesTask implements Task { return this.archiveFilePath } + private async getPreCompressionSize(): Promise { + const stats = await Promise.all(this.dataObjectPaths.map((p) => fsp.stat(p))) + return stats.reduce((a, b) => a + b.size, 0) + } + + private async getPostCompressionSize(): Promise { + const { size } = await fsp.stat(this.archiveFilePath) + return size + } + + private async logCompressionStats(startTime: bigint): Promise { + try { + const preCompressionSize = await this.getPreCompressionSize() + const postCompressionSize = await this.getPostCompressionSize() + this.statsCollectingService.addCompressionJobStats({ + size: preCompressionSize, + sizeAfter: postCompressionSize, + start: startTime, + end: process.hrtime.bigint(), + }) + } catch (e) { + logger.error(`Failed to get compression stats for archive ${this.archiveFilePath}: ${e.toString()}`) + } + } + private async verifyAndMoveArchive(): Promise { try { await fsp.access(this.tmpArchiveFilePath, fsp.constants.W_OK | fsp.constants.R_OK) @@ -69,8 +96,10 @@ export class CompressFilesTask implements Task { public async execute(): Promise { try { + const startTime = process.hrtime.bigint() await this.compressionService.compressFiles(this.dataObjectPaths, this.tmpArchiveFilePath) await this.verifyAndMoveArchive() + await this.logCompressionStats(startTime) await this.clenaup() } catch (e) { throw new Error(`Compression task failed: ${e.toString()}`) @@ -90,6 +119,7 @@ export class UploadArchiveFileTask implements Task { private archivesTrackingService: ArchivesTrackingService, private connectionHandler: IConnectionHandler, private compressionService: CompressionService, + private statsCollectingService: StatsCollectingService, private dataObjectIds?: string[] ) {} @@ -97,12 +127,12 @@ export class UploadArchiveFileTask implements Task { return `Uploading ${this.archiveFilePath} to S3 (key: ${this.objectKey})...` } - public async getPackedFiles(): Promise { + private async getPackedFiles(): Promise { const packedFiles = await this.compressionService.listFiles(this.archiveFilePath) return packedFiles } - public async cleanup(dataObjectIds: string[]): Promise { + private async cleanup(dataObjectIds: string[]): Promise { const paths = [this.archiveFilePath, ...dataObjectIds.map((id) => path.join(this.uploadsDirectory, id))] try { await Promise.all(paths.map((p) => fsp.rm(p, { force: true }))) @@ -111,11 +141,26 @@ export class UploadArchiveFileTask implements Task { } } + private async logUploadStats(startTime: bigint): Promise { + try { + const { size } = await fsp.stat(this.archiveFilePath) + this.statsCollectingService.addUploadJobStats({ + size, + start: startTime, + end: process.hrtime.bigint(), + }) + } catch (e) { + logger.error(`Failed to get compression stats for archive ${this.archiveFilePath}: ${e.toString()}`) + } + } + public async execute(): Promise { const dataObjectIds = this.dataObjectIds || (await this.getPackedFiles()) try { + const startTime = process.hrtime.bigint() await this.connectionHandler.uploadFileToRemoteBucket(this.objectKey, this.archiveFilePath) await this.archivesTrackingService.track({ name: this.objectKey, dataObjectIds: dataObjectIds }) + await this.logUploadStats(startTime) await this.cleanup(dataObjectIds) logger.info(`${this.archiveFilePath} successfully uploaded to S3!`) } catch (e) { @@ -133,15 +178,20 @@ export class CompressAndUploadTask implements Task { private compressTask: CompressFilesTask private uploadTask: UploadArchiveFileTask - // eslint-disable-next-line no-useless-constructor constructor( private uploadsDirectory: string, private dataObjectIds: string[], private archivesTrackingService: ArchivesTrackingService, private connectionHandler: IConnectionHandler, - private compressionService: CompressionService + private compressionService: CompressionService, + private statsCollectingService: StatsCollectingService ) { - this.compressTask = new CompressFilesTask(this.uploadsDirectory, this.dataObjectIds, this.compressionService) + this.compressTask = new CompressFilesTask( + this.uploadsDirectory, + this.dataObjectIds, + this.compressionService, + this.statsCollectingService + ) this.archiveFilePath = this.compressTask.getArchiveFilePath() this.archiveFileName = path.basename(this.archiveFilePath) this.uploadTask = new UploadArchiveFileTask( @@ -150,7 +200,9 @@ export class CompressAndUploadTask implements Task { this.uploadsDirectory, this.archivesTrackingService, this.connectionHandler, - this.compressionService + this.compressionService, + this.statsCollectingService, + this.dataObjectIds ) } diff --git a/storage-node/src/services/s3/AwsConnectionHandler.ts b/storage-node/src/services/s3/AwsConnectionHandler.ts index b8265e8ac6..d4db01070e 100644 --- a/storage-node/src/services/s3/AwsConnectionHandler.ts +++ b/storage-node/src/services/s3/AwsConnectionHandler.ts @@ -119,12 +119,14 @@ export class AwsConnectionHandler implements IConnectionHandler { filePath: string, storageClass?: StorageClass ): Promise { + const size = (await fs.promises.stat(filePath)).size const fileStream = fs.createReadStream(filePath) const input: PutObjectCommandInput = { Bucket: this.bucket, Key: filename, Body: fileStream, + ContentLength: size, StorageClass: storageClass || this.defaultStorageClass, } diff --git a/storage-node/src/services/sync/tasks.ts b/storage-node/src/services/sync/tasks.ts index a359bd754b..9f864e9866 100644 --- a/storage-node/src/services/sync/tasks.ts +++ b/storage-node/src/services/sync/tasks.ts @@ -16,7 +16,7 @@ import { EventEmitter } from 'node:events' const fsPromises = fs.promises export const downloadEvents = new EventEmitter<{ - 'success': [string, number] + 'success': [string, number, bigint, bigint] 'fail': [string, number] }>() @@ -88,6 +88,7 @@ export class DownloadFileTask implements Task { return } + const startTime = process.hrtime.bigint() for (const randomUrlIndex of operatorUrlIndices) { const chosenBaseUrl = this.operatorUrls[randomUrlIndex] logger.debug(`Sync - random storage node URL was chosen ${chosenBaseUrl}`) @@ -101,7 +102,8 @@ export class DownloadFileTask implements Task { try { await moveFile(tempFilePath, filepath) await fsPromises.access(filepath, fs.constants.F_OK) - downloadEvents.emit('success', this.dataObjectId, this.expectedSize) + const endTime = process.hrtime.bigint() + downloadEvents.emit('success', this.dataObjectId, this.expectedSize, startTime, endTime) return } catch (err) { logger.error(`Sync - error trying to move file ${tempFilePath} to ${filepath}: ${err.toString()}`) From 5238b65b932fa136ec8c5db4e08a68c02669f137 Mon Sep 17 00:00:00 2001 From: Lezek123 Date: Wed, 6 Nov 2024 18:08:50 +0100 Subject: [PATCH 11/19] Archive script: Sync all objects, ignore bucket assignments --- storage-node/CHANGELOG.md | 2 +- storage-node/src/commands/archive.ts | 75 ++----------------- .../src/services/archive/ArchiveService.ts | 9 +-- storage-node/src/services/queryNode/api.ts | 12 +++ .../queryNode/queries/queries.graphql | 6 ++ .../src/services/sync/storageObligations.ts | 7 +- 6 files changed, 31 insertions(+), 80 deletions(-) diff --git a/storage-node/CHANGELOG.md b/storage-node/CHANGELOG.md index 1c07b042b9..da23df9c92 100644 --- a/storage-node/CHANGELOG.md +++ b/storage-node/CHANGELOG.md @@ -1,6 +1,6 @@ ### 4.3.0 -- Adds `archive` mode / command, which allows downloading, compressing and uploading assigned data objects to an external S3 bucket that can be used as a backup. +- Adds `archive` mode / command, which allows downloading, compressing and uploading all data objects to an external S3 bucket that can be used as a backup. ### 4.2.0 diff --git a/storage-node/src/commands/archive.ts b/storage-node/src/commands/archive.ts index 85813a7858..66c08f2710 100644 --- a/storage-node/src/commands/archive.ts +++ b/storage-node/src/commands/archive.ts @@ -1,13 +1,10 @@ import { flags } from '@oclif/command' -import { ApiPromise } from '@polkadot/api' import _ from 'lodash' import path from 'path' import { v4 as uuidv4 } from 'uuid' import ApiCommandBase from '../command-base/ApiCommandBase' -import { customFlags } from '../command-base/CustomFlags' import logger, { DatePatternByFrequency, Frequency, initNewLogger } from '../services/logger' import { QueryNodeApi } from '../services/queryNode/api' -import { constructBucketToAddressMapping } from '../services/sync/storageObligations' import { verifyWorkerId } from '../services/runtime/queries' import { ArchiveService } from '../services/archive/ArchiveService' import ExitCodes from './../command-base/ExitCodes' @@ -38,13 +35,6 @@ export default class Archive extends ApiCommandBase { description: 'Storage provider worker ID', env: 'WORKER_ID', }), - buckets: customFlags.integerArr({ - char: 'b', - description: - 'Comma separated list of bucket IDs to sync. Buckets that are not assigned to worker are ignored.\n' + - 'If not specified all buckets belonging to the worker will be synced.', - default: process.env.BUCKETS ? _.uniq(process.env.BUCKETS.split(',').map((b) => parseInt(b))) : [], - }), uploadQueueDir: flags.string({ description: 'Directory to store fully downloaded data objects before compressing them and uploading to S3 (absolute path).', @@ -230,61 +220,6 @@ Supported values: warn, error, debug, info. Default:debug`, ...ApiCommandBase.flags, } - async getSyncableBuckets(api: ApiPromise, qnApi: QueryNodeApi): Promise { - const { flags } = this.parse(Archive) - const workerId = flags.worker - - if (!(await verifyWorkerId(api, workerId))) { - logger.error(`workerId ${workerId} does not exist in the storage working group`) - this.exit(ExitCodes.InvalidWorkerId) - } - - if (!flags.buckets.length) { - logger.info(`No buckets provided. Will use all bucket belonging to worker ${workerId}.`) - } - - const selectedBucketsAndAccounts = await constructBucketToAddressMapping(api, qnApi, workerId, flags.buckets) - const selectedBuckets = selectedBucketsAndAccounts.map(([bucketId]) => bucketId) - const selectedVsProvidedDiff = _.difference( - flags.buckets.map((id) => id.toString()), - selectedBuckets - ) - - if (selectedVsProvidedDiff.length) { - logger.warn( - `Buckets: ${JSON.stringify( - selectedVsProvidedDiff - )} do not belong to worker with ID=${workerId} and will NOT be synced!` - ) - } - - let syncableBuckets = selectedBuckets - if (process.env.DISABLE_BUCKET_AUTH === 'true') { - logger.warn('Bucket authentication is disabled! This is not recommended for production use!') - } else { - const keystoreAddresses = this.getUnlockedAccounts() - const bucketsWithKeysInKeyring = selectedBucketsAndAccounts.filter(([bucketId, address]) => { - if (!keystoreAddresses.includes(address)) { - this.warn(`Missing transactor key for bucket ${bucketId}. It will NOT be synced!`) - return false - } - return true - }) - - syncableBuckets = bucketsWithKeysInKeyring.map(([bucketId]) => bucketId) - } - - if (!syncableBuckets.length) { - this.error('No buckets to serve. Exiting...') - } - - if (flags.buckets.length && syncableBuckets.length !== flags.buckets.length) { - logger.warn(`Only ${syncableBuckets.length} out of ${flags.buckets.length} provided buckets will be synced!`) - } - - return syncableBuckets - } - initLogger(): void { const { flags } = this.parse(Archive) if (!_.isEmpty(flags.elasticSearchEndpoint) || !_.isEmpty(flags.logFilePath)) { @@ -345,9 +280,12 @@ Supported values: warn, error, debug, info. Default:debug`, defaultStorageClass: flags.awsStorageClass, }) - // Get buckets to sync - const syncableBuckets = await this.getSyncableBuckets(api, qnApi) - logger.info(`Buckets to sync: [${syncableBuckets}]`) + // Verify workerId + const workerId = flags.worker + if (!(await verifyWorkerId(api, workerId))) { + logger.error(`workerId ${workerId} does not exist in the storage working group`) + this.exit(ExitCodes.InvalidWorkerId) + } // Check and normalize input directories const { tmpDownloadDir, uploadQueueDir } = await this.checkAndNormalizeDirs({ @@ -364,7 +302,6 @@ Supported values: warn, error, debug, info. Default:debug`, // Build and run archive service const X_HOST_ID = uuidv4() const archiveService = new ArchiveService({ - buckets: syncableBuckets.map((id) => id.toString()), archiveTrackfileBackupFreqMinutes: flags.archiveTrackfileBackupFreqMinutes, localCountTriggerThreshold: flags.localCountTriggerThreshold, localSizeTriggerThreshold: flags.localSizeTriggerThresholdMB * 1_000_000, diff --git a/storage-node/src/services/archive/ArchiveService.ts b/storage-node/src/services/archive/ArchiveService.ts index 39713b4c92..93c05139c9 100644 --- a/storage-node/src/services/archive/ArchiveService.ts +++ b/storage-node/src/services/archive/ArchiveService.ts @@ -110,8 +110,6 @@ class DataObjectsQueue { } type ArchiveServiceParams = { - // Supported buckets - buckets: string[] // Upload trigger Thresholds localCountTriggerThreshold: number | undefined localSizeTriggerThreshold: number @@ -142,8 +140,6 @@ type ArchiveServiceParams = { export class ArchiveService { private logger: Logger - // Buckets - private buckets: string[] // Thresholds private localCountTriggerThreshold: number | undefined private localSizeTriggerThreshold: number @@ -185,7 +181,6 @@ export class ArchiveService { constructor(params: ArchiveServiceParams) { // From params: - this.buckets = params.buckets this.localCountTriggerThreshold = params.localCountTriggerThreshold this.localSizeTriggerThreshold = params.localSizeTriggerThreshold this.localAgeTriggerThresholdMinutes = params.localAgeTriggerThresholdMinutes @@ -372,7 +367,7 @@ export class ArchiveService { * @throws Error If there's an issue w/ file access or the query node */ public async performSync(): Promise { - const model = await getStorageObligationsFromRuntime(this.queryNodeApi, this.buckets) + const model = await getStorageObligationsFromRuntime(this.queryNodeApi) const assignedObjects = model.dataObjects const added = assignedObjects.filter((obj) => !this.objectTrackingService.isTracked(obj.id)) @@ -400,7 +395,7 @@ export class ArchiveService { } const [downloadTask] = await getDownloadTasks( model, - this.buckets, + [], [object], this.uploadQueueDir, this.tmpDownloadDir, diff --git a/storage-node/src/services/queryNode/api.ts b/storage-node/src/services/queryNode/api.ts index e074905b11..8fb896dafa 100644 --- a/storage-node/src/services/queryNode/api.ts +++ b/storage-node/src/services/queryNode/api.ts @@ -6,6 +6,9 @@ import logger from '../logger' import { DataObjectByBagIdsDetailsFragment, DataObjectDetailsFragment, + GetAllStorageBagDetails, + GetAllStorageBagDetailsQuery, + GetAllStorageBagDetailsQueryVariables, GetDataObjects, GetDataObjectsByBagIds, GetDataObjectsByBagIdsQuery, @@ -236,6 +239,15 @@ export class QueryNodeApi { return result } + public async getAllStorageBagsDetails(): Promise> { + const result = await this.multipleEntitiesQuery< + GetAllStorageBagDetailsQuery, + GetAllStorageBagDetailsQueryVariables + >(GetAllStorageBagDetails, {}, 'storageBags') + + return result + } + /** * Returns data objects info by pages for the given bags. * diff --git a/storage-node/src/services/queryNode/queries/queries.graphql b/storage-node/src/services/queryNode/queries/queries.graphql index 4e2701730f..b29c1c51df 100644 --- a/storage-node/src/services/queryNode/queries/queries.graphql +++ b/storage-node/src/services/queryNode/queries/queries.graphql @@ -53,6 +53,12 @@ query getStorageBagDetails($bucketIds: [String!]) { } } +query getAllStorageBagDetails { + storageBags { + ...StorageBagDetails + } +} + fragment DataObjectByBagIdsDetails on StorageDataObject { id size diff --git a/storage-node/src/services/sync/storageObligations.ts b/storage-node/src/services/sync/storageObligations.ts index b8bc4e7a4f..58f6b75ac6 100644 --- a/storage-node/src/services/sync/storageObligations.ts +++ b/storage-node/src/services/sync/storageObligations.ts @@ -95,16 +95,17 @@ export type DataObject = { * runtime (Query Node). * * @param queryNodeUrl - Query Node URL - * @param workerId - worker ID + * @param bucketIds - bucket IDs. If undefined, we treat all existing bags as obligations. * @returns promise for the DataObligations */ export async function getStorageObligationsFromRuntime( qnApi: QueryNodeApi, - bucketIds: string[] + bucketIds?: string[] ): Promise { const allBuckets = await getAllBuckets(qnApi) - const assignedBags = await getAllAssignedBags(qnApi, bucketIds) + const assignedBags = + bucketIds === undefined ? await qnApi.getAllStorageBagsDetails() : await getAllAssignedBags(qnApi, bucketIds) const bagIds = assignedBags.map((bag) => bag.id) const assignedDataObjects = await getAllAssignedDataObjects(qnApi, bagIds) From 3cb0713f22963d8cc22c509bd7388297aa3d9f4b Mon Sep 17 00:00:00 2001 From: Lezek123 Date: Fri, 15 Nov 2024 17:19:31 +0100 Subject: [PATCH 12/19] Upload timeout issues fix attempt --- .../src/services/archive/ArchiveService.ts | 4 +- storage-node/src/services/archive/tasks.ts | 73 ++++++++++++++++--- .../src/services/s3/AwsConnectionHandler.ts | 4 + 3 files changed, 68 insertions(+), 13 deletions(-) diff --git a/storage-node/src/services/archive/ArchiveService.ts b/storage-node/src/services/archive/ArchiveService.ts index 93c05139c9..9ed7dd1bd6 100644 --- a/storage-node/src/services/archive/ArchiveService.ts +++ b/storage-node/src/services/archive/ArchiveService.ts @@ -37,7 +37,7 @@ class DataObjectsQueue { constructor(dataDir: string) { this.dataDir = dataDir - this.logger = logger.child('DataObjectsQueue') + this.logger = logger.child({ label: 'DataObjectsQueue' }) } public get totalSize() { @@ -450,6 +450,7 @@ export class ArchiveService { item.name, this.uploadQueueDir, this.archivesTrackingService, + this.objectTrackingService, this.s3ConnectionHandler, this.compressionService, this.statsCollectingService @@ -622,6 +623,7 @@ export class ArchiveService { this.uploadQueueDir, batch.map((o) => o.id), this.archivesTrackingService, + this.objectTrackingService, this.s3ConnectionHandler, this.compressionService, this.statsCollectingService diff --git a/storage-node/src/services/archive/tasks.ts b/storage-node/src/services/archive/tasks.ts index c431780e58..95c694a9b9 100644 --- a/storage-node/src/services/archive/tasks.ts +++ b/storage-node/src/services/archive/tasks.ts @@ -5,10 +5,11 @@ import path from 'path' import logger from '../../services/logger' import { blake2AsHex } from '@polkadot/util-crypto' import { IConnectionHandler } from '../s3/IConnectionHandler' -import { ArchivesTrackingService } from './tracking' +import { ArchivesTrackingService, ObjectTrackingService } from './tracking' import { StorageClass } from '@aws-sdk/client-s3' import { CompressionService } from './compression' import { StatsCollectingService } from './stats' +import { Logger } from 'winston' /** * Compresses provided files into an archive and removes them. @@ -19,18 +20,23 @@ export class CompressFilesTask implements Task { private tmpArchiveFilePath: string private archiveFilePath: string private ext: string + private logger: Logger constructor( private uploadsDirectory: string, private dataObjectIds: string[], private compressionService: CompressionService, - private statsCollectingService: StatsCollectingService + private statsCollectingService: StatsCollectingService, + private objectTrackingService: ObjectTrackingService ) { this.archiveFileName = blake2AsHex(_.sortBy(this.dataObjectIds, (id) => parseInt(id)).join(',')).substring(2) this.ext = this.compressionService.getExt() this.tmpArchiveFilePath = path.join(this.uploadsDirectory, `${this.archiveFileName}.tmp.${this.ext}`) this.archiveFilePath = path.join(this.uploadsDirectory, `${this.archiveFileName}.${this.ext}`) this.dataObjectPaths = dataObjectIds.map((id) => path.join(uploadsDirectory, id)) + this.logger = logger.child({ + label: `CompressFilesTask (${this.archiveFileName})`, + }) } public description(): string { @@ -62,7 +68,7 @@ export class CompressFilesTask implements Task { end: process.hrtime.bigint(), }) } catch (e) { - logger.error(`Failed to get compression stats for archive ${this.archiveFilePath}: ${e.toString()}`) + this.logger.error(`Failed to get compression stats: ${e.toString()}`) } } @@ -85,12 +91,30 @@ export class CompressFilesTask implements Task { } } + private async handleFailure(e: Error): Promise { + const pathsToClean = [this.tmpArchiveFilePath, this.archiveFilePath, ...this.dataObjectPaths] + // Untrack data objects so that they can be re-downloaded + // and remove data objects and any archives that were created from uploadsDir + try { + await Promise.all(this.dataObjectIds.map((id) => this.objectTrackingService.untrack(id))) + await Promise.all(pathsToClean.map((p) => fsp.rm(p, { force: true }))) + } catch (e) { + this.logger.error(`Compression failed: ${e.toString()}`) + this.logger.error(`Failed to clean up local data: ${e.toString()}`) + this.logger.error(`Exiting due to cirtical error...`) + process.exit(1) + } + throw new Error(`Compression task failed: ${e.toString()}`) + } + private async clenaup(): Promise { // Remove packed objects from uploadsDir try { await Promise.all(this.dataObjectPaths.map((p) => fsp.rm(p))) } catch (e) { - logger.error(`Couldn't fully cleanup files after compression: ${e.toString()}`) + this.logger.error(`Cleanup failed: ${e.toString()}`) + this.logger.error(`Exiting due to cirtical error...`) + process.exit(1) } } @@ -102,7 +126,7 @@ export class CompressFilesTask implements Task { await this.logCompressionStats(startTime) await this.clenaup() } catch (e) { - throw new Error(`Compression task failed: ${e.toString()}`) + await this.handleFailure(e) } } } @@ -111,17 +135,23 @@ export class CompressFilesTask implements Task { * Uploads a specified file to S3. */ export class UploadArchiveFileTask implements Task { - // eslint-disable-next-line no-useless-constructor + private logger: Logger + constructor( private archiveFilePath: string, private objectKey: string, private uploadsDirectory: string, private archivesTrackingService: ArchivesTrackingService, + private objectTrackingService: ObjectTrackingService, private connectionHandler: IConnectionHandler, private compressionService: CompressionService, private statsCollectingService: StatsCollectingService, private dataObjectIds?: string[] - ) {} + ) { + this.logger = logger.child({ + label: `UploadArchiveFileTask (${this.objectKey})`, + }) + } public description(): string { return `Uploading ${this.archiveFilePath} to S3 (key: ${this.objectKey})...` @@ -137,7 +167,9 @@ export class UploadArchiveFileTask implements Task { try { await Promise.all(paths.map((p) => fsp.rm(p, { force: true }))) } catch (e) { - logger.error(`Upload task cleanup failed: ${e.toString()}`) + this.logger.error(`Cleanup failed: ${e.toString()}`) + this.logger.error(`Exiting due to cirtical error...`) + process.exit(1) } } @@ -150,8 +182,22 @@ export class UploadArchiveFileTask implements Task { end: process.hrtime.bigint(), }) } catch (e) { - logger.error(`Failed to get compression stats for archive ${this.archiveFilePath}: ${e.toString()}`) + this.logger.error(`Failed to get upload stats: ${e.toString()}`) + } + } + + private async handleFailure(e: Error, dataObjectIds: string[]): Promise { + // Untrack the data objects so that they can be re-downloaded and remove the archive file + try { + await Promise.all(dataObjectIds.map((id) => this.objectTrackingService.untrack(id))) + await fsp.rm(this.archiveFilePath, { force: true }) + } catch (e) { + this.logger.error(`Upload failed: ${e.toString()}`) + this.logger.error(`Failed to clean up local data: ${e.toString()}`) + this.logger.error(`Exiting due to cirtical error...`) + process.exit(1) } + throw new Error(`Upload failed: ${e.toString()}`) } public async execute(): Promise { @@ -162,9 +208,9 @@ export class UploadArchiveFileTask implements Task { await this.archivesTrackingService.track({ name: this.objectKey, dataObjectIds: dataObjectIds }) await this.logUploadStats(startTime) await this.cleanup(dataObjectIds) - logger.info(`${this.archiveFilePath} successfully uploaded to S3!`) + this.logger.info(`Successfully uploaded to S3!`) } catch (e) { - logger.error(`Upload job failed for ${this.archiveFilePath}: ${e.toString()}`) + await this.handleFailure(e, dataObjectIds) } } } @@ -182,6 +228,7 @@ export class CompressAndUploadTask implements Task { private uploadsDirectory: string, private dataObjectIds: string[], private archivesTrackingService: ArchivesTrackingService, + private objectTrackingService: ObjectTrackingService, private connectionHandler: IConnectionHandler, private compressionService: CompressionService, private statsCollectingService: StatsCollectingService @@ -190,7 +237,8 @@ export class CompressAndUploadTask implements Task { this.uploadsDirectory, this.dataObjectIds, this.compressionService, - this.statsCollectingService + this.statsCollectingService, + this.objectTrackingService ) this.archiveFilePath = this.compressTask.getArchiveFilePath() this.archiveFileName = path.basename(this.archiveFilePath) @@ -199,6 +247,7 @@ export class CompressAndUploadTask implements Task { this.archiveFileName, this.uploadsDirectory, this.archivesTrackingService, + this.objectTrackingService, this.connectionHandler, this.compressionService, this.statsCollectingService, diff --git a/storage-node/src/services/s3/AwsConnectionHandler.ts b/storage-node/src/services/s3/AwsConnectionHandler.ts index d4db01070e..d3f2b0f89a 100644 --- a/storage-node/src/services/s3/AwsConnectionHandler.ts +++ b/storage-node/src/services/s3/AwsConnectionHandler.ts @@ -54,6 +54,10 @@ export class AwsConnectionHandler implements IConnectionHandler { return new S3Client({ credentials: fromEnv(), region: opts.region, + requestHandler: { + connectionTimeout: 60_000, + requestTimeout: 60_000, + }, }) } From c550c0eaad11176ab2a6d040218cbeb27984dfe9 Mon Sep 17 00:00:00 2001 From: Lezek123 Date: Fri, 15 Nov 2024 23:08:29 +0100 Subject: [PATCH 13/19] Fix failure handling and adjust timeouts --- storage-node/src/services/archive/tasks.ts | 16 ++++++++++------ storage-node/src/services/archive/tracking.ts | 5 ++--- .../src/services/s3/AwsConnectionHandler.ts | 4 ++-- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/storage-node/src/services/archive/tasks.ts b/storage-node/src/services/archive/tasks.ts index 95c694a9b9..b1f0e2832b 100644 --- a/storage-node/src/services/archive/tasks.ts +++ b/storage-node/src/services/archive/tasks.ts @@ -91,12 +91,14 @@ export class CompressFilesTask implements Task { } } - private async handleFailure(e: Error): Promise { + private async handleFailure(error: Error): Promise { const pathsToClean = [this.tmpArchiveFilePath, this.archiveFilePath, ...this.dataObjectPaths] // Untrack data objects so that they can be re-downloaded // and remove data objects and any archives that were created from uploadsDir try { - await Promise.all(this.dataObjectIds.map((id) => this.objectTrackingService.untrack(id))) + for (const id of this.dataObjectIds) { + await this.objectTrackingService.untrack(id) + } await Promise.all(pathsToClean.map((p) => fsp.rm(p, { force: true }))) } catch (e) { this.logger.error(`Compression failed: ${e.toString()}`) @@ -104,7 +106,7 @@ export class CompressFilesTask implements Task { this.logger.error(`Exiting due to cirtical error...`) process.exit(1) } - throw new Error(`Compression task failed: ${e.toString()}`) + throw new Error(`Compression task failed: ${error.toString()}`) } private async clenaup(): Promise { @@ -186,10 +188,12 @@ export class UploadArchiveFileTask implements Task { } } - private async handleFailure(e: Error, dataObjectIds: string[]): Promise { + private async handleFailure(error: Error, dataObjectIds: string[]): Promise { // Untrack the data objects so that they can be re-downloaded and remove the archive file try { - await Promise.all(dataObjectIds.map((id) => this.objectTrackingService.untrack(id))) + for (const id of dataObjectIds) { + await this.objectTrackingService.untrack(id) + } await fsp.rm(this.archiveFilePath, { force: true }) } catch (e) { this.logger.error(`Upload failed: ${e.toString()}`) @@ -197,7 +201,7 @@ export class UploadArchiveFileTask implements Task { this.logger.error(`Exiting due to cirtical error...`) process.exit(1) } - throw new Error(`Upload failed: ${e.toString()}`) + throw new Error(`Upload failed: ${error.toString()}`) } public async execute(): Promise { diff --git a/storage-node/src/services/archive/tracking.ts b/storage-node/src/services/archive/tracking.ts index 9667da717f..8ce734a01b 100644 --- a/storage-node/src/services/archive/tracking.ts +++ b/storage-node/src/services/archive/tracking.ts @@ -17,9 +17,8 @@ abstract class TrackfileService { // Source: https://www.npmjs.com/package/retry retries: { minTimeout: 10, - maxTimeout: 100, - factor: 1.5, - retries: 10, + maxTimeout: 10, + retries: 10_000, }, }) } diff --git a/storage-node/src/services/s3/AwsConnectionHandler.ts b/storage-node/src/services/s3/AwsConnectionHandler.ts index d3f2b0f89a..f15f7aa1e0 100644 --- a/storage-node/src/services/s3/AwsConnectionHandler.ts +++ b/storage-node/src/services/s3/AwsConnectionHandler.ts @@ -55,8 +55,8 @@ export class AwsConnectionHandler implements IConnectionHandler { credentials: fromEnv(), region: opts.region, requestHandler: { - connectionTimeout: 60_000, - requestTimeout: 60_000, + connectionTimeout: 30_000, + requestTimeout: 120_000, }, }) } From dff104024f6d0e8b973f1afeefd2f1c4a0da7544 Mon Sep 17 00:00:00 2001 From: Lezek123 Date: Sat, 16 Nov 2024 00:22:17 +0100 Subject: [PATCH 14/19] Better logs --- storage-node/src/services/archive/tasks.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/storage-node/src/services/archive/tasks.ts b/storage-node/src/services/archive/tasks.ts index b1f0e2832b..fe7d43f40f 100644 --- a/storage-node/src/services/archive/tasks.ts +++ b/storage-node/src/services/archive/tasks.ts @@ -106,7 +106,8 @@ export class CompressFilesTask implements Task { this.logger.error(`Exiting due to cirtical error...`) process.exit(1) } - throw new Error(`Compression task failed: ${error.toString()}`) + this.logger.error(`Compression task failed: ${error.toString()}`) + throw error } private async clenaup(): Promise { @@ -201,7 +202,8 @@ export class UploadArchiveFileTask implements Task { this.logger.error(`Exiting due to cirtical error...`) process.exit(1) } - throw new Error(`Upload failed: ${error.toString()}`) + this.logger.error(`Upload failed: ${error.toString()}`) + throw error } public async execute(): Promise { From f76e84d8e5090c0e00c92e23024db7d423c02f1a Mon Sep 17 00:00:00 2001 From: Lezek123 Date: Tue, 19 Nov 2024 12:24:50 +0100 Subject: [PATCH 15/19] Fix: Destroy fileStream after failed upload --- storage-node/src/services/s3/AwsConnectionHandler.ts | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/storage-node/src/services/s3/AwsConnectionHandler.ts b/storage-node/src/services/s3/AwsConnectionHandler.ts index f15f7aa1e0..caa4314fce 100644 --- a/storage-node/src/services/s3/AwsConnectionHandler.ts +++ b/storage-node/src/services/s3/AwsConnectionHandler.ts @@ -139,7 +139,13 @@ export class AwsConnectionHandler implements IConnectionHandler { ? new CreateMultipartUploadCommand(input) : new PutObjectCommand(input) - return await this.client.send(command) + try { + const resp = await this.client.send(command) + return resp + } catch (e) { + fileStream.destroy() + throw e + } } async getRedirectUrlForObject(filename: string): Promise { From 53fbddfaaa34cb67f6c238c63cb286e97bace58e Mon Sep 17 00:00:00 2001 From: Lezek123 Date: Thu, 21 Nov 2024 13:13:30 +0100 Subject: [PATCH 16/19] Always destroy fileStream after successful/failed upload --- storage-node/src/services/s3/AwsConnectionHandler.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/storage-node/src/services/s3/AwsConnectionHandler.ts b/storage-node/src/services/s3/AwsConnectionHandler.ts index caa4314fce..0e8e5231d7 100644 --- a/storage-node/src/services/s3/AwsConnectionHandler.ts +++ b/storage-node/src/services/s3/AwsConnectionHandler.ts @@ -142,9 +142,8 @@ export class AwsConnectionHandler implements IConnectionHandler { try { const resp = await this.client.send(command) return resp - } catch (e) { + } finally { fileStream.destroy() - throw e } } From 29a12fb34a6d6bf6fa60a69728c6c74cabefc5b3 Mon Sep 17 00:00:00 2001 From: Lezek123 Date: Mon, 25 Nov 2024 19:43:45 +0100 Subject: [PATCH 17/19] Sync and cleanup rework --- .../src/services/archive/ArchiveService.ts | 71 +++--- storage-node/src/services/queryNode/api.ts | 129 ++++++---- .../queryNode/queries/queries.graphql | 76 +++++- .../src/services/sync/acceptPendingObjects.ts | 2 +- .../src/services/sync/cleanupService.ts | 114 +++++---- .../src/services/sync/storageObligations.ts | 230 ++++++++++++------ .../src/services/sync/synchronizer.ts | 113 +++------ .../services/webApi/controllers/stateApi.ts | 3 +- 8 files changed, 439 insertions(+), 299 deletions(-) diff --git a/storage-node/src/services/archive/ArchiveService.ts b/storage-node/src/services/archive/ArchiveService.ts index 9ed7dd1bd6..e3da432c7c 100644 --- a/storage-node/src/services/archive/ArchiveService.ts +++ b/storage-node/src/services/archive/ArchiveService.ts @@ -13,7 +13,7 @@ import { OBJECTS_TRACKING_FILENAME, } from './tracking' import { QueryNodeApi } from '../queryNode/api' -import { getStorageObligationsFromRuntime } from '../sync/storageObligations' +import { DataObjectDetailsLoader, getStorageObligationsFromRuntime } from '../sync/storageObligations' import { getDownloadTasks } from '../sync/synchronizer' import sleep from 'sleep-promise' import { Logger } from 'winston' @@ -369,40 +369,49 @@ export class ArchiveService { public async performSync(): Promise { const model = await getStorageObligationsFromRuntime(this.queryNodeApi) - const assignedObjects = model.dataObjects - const added = assignedObjects.filter((obj) => !this.objectTrackingService.isTracked(obj.id)) - added.sort((a, b) => parseInt(b.id) - parseInt(a.id)) + const assignedObjectsIds = await model.createAssignedObjectsIdsLoader(true).getAll() + const unsyncedIds = assignedObjectsIds + .filter((id) => !this.objectTrackingService.isTracked(id)) + .map((id) => parseInt(id)) + .sort((a, b) => a - b) - this.logger.info(`Sync - new objects: ${added.length}`) + this.logger.info(`Sync - new objects: ${unsyncedIds.length}`) - // Add new download tasks while the upload dir size limit allows - while (added.length) { - const uploadDirectorySize = await this.getUploadDirSize() - while (true) { - const object = added.pop() - if (!object) { - break - } - if (object.size + uploadDirectorySize + this.syncQueueObjectsSize > this.uploadDirSizeLimit) { - this.logger.debug( - `Waiting for some disk space to free ` + - `(upload_dir: ${uploadDirectorySize} / ${this.uploadDirSizeLimit}, ` + - `sync_q=${this.syncQueueObjectsSize}, obj_size=${object.size})... ` + // Sync objects in batches of 10_000 + for (const unsyncedIdsBatch of _.chunk(unsyncedIds, 10_000)) { + const objectsBatchLoader = new DataObjectDetailsLoader(this.queryNodeApi, { + by: 'ids', + ids: unsyncedIdsBatch.map((id) => id.toString()), + }) + const objectsBatch = await objectsBatchLoader.getAll() + // Add new download tasks while the upload dir size limit allows + while (objectsBatch.length) { + const uploadDirectorySize = await this.getUploadDirSize() + while (true) { + const object = objectsBatch.pop() + if (!object) { + break + } + if (object.size + uploadDirectorySize + this.syncQueueObjectsSize > this.uploadDirSizeLimit) { + this.logger.debug( + `Waiting for some disk space to free ` + + `(upload_dir: ${uploadDirectorySize} / ${this.uploadDirSizeLimit}, ` + + `sync_q=${this.syncQueueObjectsSize}, obj_size=${object.size})... ` + ) + objectsBatch.push(object) + await sleep(60_000) + break + } + const [downloadTask] = await getDownloadTasks( + model, + [object], + this.uploadQueueDir, + this.tmpDownloadDir, + this.syncWorkersTimeout, + this.hostId ) - added.push(object) - await sleep(60_000) - break + await this.addDownloadTask(downloadTask, object.size) } - const [downloadTask] = await getDownloadTasks( - model, - [], - [object], - this.uploadQueueDir, - this.tmpDownloadDir, - this.syncWorkersTimeout, - this.hostId - ) - await this.addDownloadTask(downloadTask, object.size) } } } diff --git a/storage-node/src/services/queryNode/api.ts b/storage-node/src/services/queryNode/api.ts index 8fb896dafa..afafd65b2b 100644 --- a/storage-node/src/services/queryNode/api.ts +++ b/storage-node/src/services/queryNode/api.ts @@ -4,20 +4,26 @@ import fetch from 'cross-fetch' import stringify from 'fast-safe-stringify' import logger from '../logger' import { - DataObjectByBagIdsDetailsFragment, DataObjectDetailsFragment, + DataObjectIdsByBagId, + DataObjectIdsByBagIdQuery, + DataObjectIdsByBagIdQueryVariables, + DataObjectsByBagsConnection, + DataObjectsByBagsConnectionQuery, + DataObjectsByBagsConnectionQueryVariables, + DataObjectsByIdsConnection, + DataObjectsByIdsConnectionQuery, + DataObjectsByIdsConnectionQueryVariables, + DataObjectsWithBagDetailsByIds, + DataObjectsWithBagDetailsByIdsQuery, + DataObjectsWithBagDetailsByIdsQueryVariables, + DataObjectWithBagDetailsFragment, GetAllStorageBagDetails, GetAllStorageBagDetailsQuery, GetAllStorageBagDetailsQueryVariables, - GetDataObjects, - GetDataObjectsByBagIds, - GetDataObjectsByBagIdsQuery, - GetDataObjectsByBagIdsQueryVariables, GetDataObjectsDeletedEvents, GetDataObjectsDeletedEventsQuery, GetDataObjectsDeletedEventsQueryVariables, - GetDataObjectsQuery, - GetDataObjectsQueryVariables, GetSquidVersion, GetSquidVersionQuery, GetSquidVersionQueryVariables, @@ -41,7 +47,7 @@ import { StorageBucketDetailsFragment, StorageBucketIdsFragment, } from './generated/queries' -import { Maybe, StorageBagWhereInput } from './generated/schema' +import { Maybe } from './generated/schema' /** * Defines query paging limits. @@ -53,7 +59,7 @@ type PaginationQueryVariables = { lastCursor?: Maybe } -type PaginationQueryResult = { +export type PaginationQueryResult = { edges: { node: T }[] pageInfo: { hasNextPage: boolean @@ -249,50 +255,87 @@ export class QueryNodeApi { } /** - * Returns data objects info by pages for the given bags. + * Gets a page of data objects belonging to specified bags. * * @param bagIds - query filter: bag IDs */ - public async getDataObjectsByBagIds(bagIds: string[]): Promise> { - const allBagIds = [...bagIds] // Copy to avoid modifying the original array - let fullResult: DataObjectByBagIdsDetailsFragment[] = [] - while (allBagIds.length) { - const bagIdsBatch = allBagIds.splice(0, 1000) - const input: StorageBagWhereInput = { id_in: bagIdsBatch } - fullResult = [ - ...fullResult, - ...(await this.multipleEntitiesQuery( - GetDataObjectsByBagIds, - { bagIds: input }, - 'storageDataObjects' - )), - ] - } + public async getDataObjectsByBagsPage( + bagIds: string[], + limit: number, + after: string | undefined, + includeDetails: IncludeDetails, + isAccepted?: boolean + ): Promise< + IncludeDetails extends true + ? PaginationQueryResult | null + : PaginationQueryResult<{ id: string }> | null + > { + return this.uniqueEntityQuery( + DataObjectsByBagsConnection, + { + bagIds: [...bagIds], + isAccepted, + limit, + after, + includeDetails: includeDetails, + }, + 'storageDataObjectsConnection' + ) + } - return fullResult + /** + * Gets a page of data objects by the given list of dataObject IDs. + * + * @param ids - query filter: data object ids + */ + public async getDataObjectsByIdsPage( + ids: string[], + limit: number, + after: string | undefined, + includeDetails: IncludeDetails, + isAccepted?: boolean + ): Promise< + IncludeDetails extends true + ? PaginationQueryResult | null + : PaginationQueryResult<{ id: string }> | null + > { + return this.uniqueEntityQuery( + DataObjectsByIdsConnection, + { + ids: [...ids], + isAccepted, + limit, + after, + includeDetails: includeDetails, + }, + 'storageDataObjectsConnection' + ) } /** - * Returns data objects info by pages for the given dataObject IDs. + * Returns a list of data objects by ids, with their corresponding bag details * - * @param dataObjectIds - query filter: dataObject IDs + * @param ids - query filter: data object ids */ - public async getDataObjectDetails(dataObjectIds: string[]): Promise> { - const allDataObjectIds = [...dataObjectIds] // Copy to avoid modifying the original array - let fullResult: DataObjectDetailsFragment[] = [] - while (allDataObjectIds.length) { - const dataObjectIdsBatch = allDataObjectIds.splice(0, 1000) - fullResult = [ - ...fullResult, - ...(await this.multipleEntitiesQuery( - GetDataObjects, - { dataObjectIds: dataObjectIdsBatch }, - 'storageDataObjects' - )), - ] - } + public async getDataObjectsWithBagDetails(ids: string[]): Promise { + return this.multipleEntitiesQuery< + DataObjectsWithBagDetailsByIdsQuery, + DataObjectsWithBagDetailsByIdsQueryVariables + >(DataObjectsWithBagDetailsByIds, { ids: [...ids] }, 'storageDataObjects') + } - return fullResult + /** + * Returns a list of data object ids that belong to a given bag. + * + * @param bagId - query filter: bag ID + */ + public async getDataObjectIdsByBagId(bagId: string): Promise { + const result = await this.multipleEntitiesQuery( + DataObjectIdsByBagId, + { bagId }, + 'storageDataObjects' + ) + return result.map((o) => o.id) } /** diff --git a/storage-node/src/services/queryNode/queries/queries.graphql b/storage-node/src/services/queryNode/queries/queries.graphql index b29c1c51df..32851a9051 100644 --- a/storage-node/src/services/queryNode/queries/queries.graphql +++ b/storage-node/src/services/queryNode/queries/queries.graphql @@ -59,7 +59,13 @@ query getAllStorageBagDetails { } } -fragment DataObjectByBagIdsDetails on StorageDataObject { +query dataObjectIdsByBagId($bagId: String) { + storageDataObjects(where: { storageBag: { id_eq: $bagId } }) { + id + } +} + +fragment DataObjectDetails on StorageDataObject { id size ipfsHash @@ -68,13 +74,7 @@ fragment DataObjectByBagIdsDetails on StorageDataObject { } } -query getDataObjectsByBagIds($bagIds: StorageBagWhereInput) { - storageDataObjects(where: { storageBag: $bagIds, isAccepted_eq: true }) { - ...DataObjectByBagIdsDetails - } -} - -fragment DataObjectDetails on StorageDataObject { +fragment DataObjectWithBagDetails on StorageDataObject { id isAccepted ipfsHash @@ -83,9 +83,63 @@ fragment DataObjectDetails on StorageDataObject { } } -query getDataObjects($dataObjectIds: [String!]) { - storageDataObjects(where: { id_in: $dataObjectIds }) { - ...DataObjectDetails +query dataObjectsByBagsConnection( + $bagIds: [String!] + $limit: Int + $after: String + $includeDetails: Boolean! + $isAccepted: Boolean +) { + storageDataObjectsConnection( + where: { storageBag: { id_in: $bagIds }, isAccepted_eq: $isAccepted } + first: $limit + after: $after + orderBy: id_ASC + ) { + edges { + node { + id + ...DataObjectDetails @include(if: $includeDetails) + } + } + pageInfo { + startCursor + endCursor + hasNextPage + } + } +} + +query dataObjectsByIdsConnection( + $ids: [String!] + $limit: Int + $after: String + $includeDetails: Boolean! + $isAccepted: Boolean +) { + storageDataObjectsConnection( + where: { id_in: $ids, isAccepted_eq: $isAccepted } + first: $limit + after: $after + orderBy: id_ASC + ) { + edges { + node { + id + ...DataObjectDetails @include(if: $includeDetails) + } + } + pageInfo { + startCursor + endCursor + hasNextPage + } + } +} + +query dataObjectsWithBagDetailsByIds($ids: [String!]) { + storageDataObjects(where: { id_in: $ids }) { + ...DataObjectWithBagDetails } } diff --git a/storage-node/src/services/sync/acceptPendingObjects.ts b/storage-node/src/services/sync/acceptPendingObjects.ts index 170498688d..7b8f1db4b8 100644 --- a/storage-node/src/services/sync/acceptPendingObjects.ts +++ b/storage-node/src/services/sync/acceptPendingObjects.ts @@ -91,7 +91,7 @@ export class AcceptPendingObjectsService { } private async processPendingObjects(pendingIds: string[]): Promise { - const pendingDataObjects = await this.qnApi.getDataObjectDetails(pendingIds) + const pendingDataObjects = await this.qnApi.getDataObjectsWithBagDetails(pendingIds) // objects not found in the query node const maybeDeletedObjectIds = pendingIds.filter( diff --git a/storage-node/src/services/sync/cleanupService.ts b/storage-node/src/services/sync/cleanupService.ts index bfc99e54e6..fdaf4f5f2f 100644 --- a/storage-node/src/services/sync/cleanupService.ts +++ b/storage-node/src/services/sync/cleanupService.ts @@ -3,12 +3,13 @@ import _ from 'lodash' import superagent from 'superagent' import urljoin from 'url-join' import { getDataObjectIDs } from '../../services/caching/localDataObjects' -import logger from '../../services/logger' +import rootLogger from '../../services/logger' import { QueryNodeApi } from '../queryNode/api' -import { DataObjectDetailsFragment } from '../queryNode/generated/queries' -import { DataObligations, getDataObjectsByIDs, getStorageObligationsFromRuntime } from './storageObligations' +import { DataObjectIdsLoader, DataObligations, getStorageObligationsFromRuntime } from './storageObligations' import { DeleteLocalFileTask } from './tasks' import { TaskProcessorSpawner, WorkingStack } from '../processing/workingProcess' +import { DataObjectWithBagDetailsFragment } from '../queryNode/generated/queries' +import { Logger } from 'winston' /** * The maximum allowed threshold by which the QN processor can lag behind @@ -43,7 +44,7 @@ export const MINIMUM_REPLICATION_THRESHOLD = parseInt(process.env.CLEANUP_MIN_RE * @param api - (optional) runtime API promise * @param workerId - current storage provider ID * @param buckets - Selected storage buckets - * @param asyncWorkersNumber - maximum parallel downloads number + * @param asyncWorkersNumber - maximum parallel cleanups number * @param asyncWorkersTimeout - downloading asset timeout * @param qnApi - Query Node API * @param uploadDirectory - local directory to get file names from @@ -57,6 +58,7 @@ export async function performCleanup( uploadDirectory: string, hostId: string ): Promise { + const logger = rootLogger.child({ label: 'Cleanup' }) logger.info('Started cleanup service...') const squidStatus = await qnApi.getState() if (!squidStatus || !squidStatus.height) { @@ -77,89 +79,93 @@ export async function performCleanup( const model = await getStorageObligationsFromRuntime(qnApi, buckets) const storedObjectsIds = getDataObjectIDs() - const assignedObjectsIds = model.dataObjects.map((obj) => obj.id) - const removedIds = _.difference(storedObjectsIds, assignedObjectsIds) - const removedObjects = await getDataObjectsByIDs(qnApi, removedIds) + const assignedObjectsLoader = model.createAssignedObjectsIdsLoader() + const assignedObjectIds = new Set(await assignedObjectsLoader.getAll()) + const obsoleteObjectIds = new Set(storedObjectsIds.filter((id) => !assignedObjectIds.has(id))) - logger.debug(`Cleanup - stored objects: ${storedObjectsIds.length}, assigned objects: ${assignedObjectsIds.length}`) - logger.debug(`Cleanup - pruning ${removedIds.length} obsolete objects`) + // If objects are obsolete but still exist: They are "moved" objects + const movedObjectsLoader = new DataObjectIdsLoader(qnApi, { by: 'ids', ids: Array.from(obsoleteObjectIds) }) + const movedObjectIds = new Set(await movedObjectsLoader.getAll()) - // Data objects permanently deleted from the runtime - const deletedDataObjects = removedIds.filter( - (removedId) => !removedObjects.some((removedObject) => removedObject.id === removedId) - ) + // If objects are obsolete and don't exist: They are "deleted objects" + const deletedDataObjectIds = new Set([...obsoleteObjectIds].filter((id) => !movedObjectIds.has(id))) - // Data objects no-longer assigned to current storage-node - // operated buckets, and have been moved to other buckets - const movedDataObjects = removedObjects + logger.info(`stored objects: ${storedObjectsIds.length}, assigned objects: ${assignedObjectIds.size}`) + logger.info( + `pruning ${obsoleteObjectIds.size} obsolete objects ` + + `(${movedObjectIds.size} moved, ${deletedDataObjectIds.size} deleted)` + ) const workingStack = new WorkingStack() const processSpawner = new TaskProcessorSpawner(workingStack, asyncWorkersNumber) - const deletionTasksOfDeletedDataObjects = await Promise.all( - deletedDataObjects.map((dataObject) => new DeleteLocalFileTask(uploadDirectory, dataObject)) - ) - const deletionTasksOfMovedDataObjects = await getDeletionTasksFromMovedDataObjects( - buckets, - uploadDirectory, - model, - movedDataObjects, - hostId - ) + // Execute deleted objects removal tasks in batches of 10_000 + let deletedProcessed = 0 + logger.info(`removing ${deletedDataObjectIds.size} deleted objects...`) + for (const deletedObjectsIdsBatch of _.chunk([...deletedDataObjectIds], 10_000)) { + const deletionTasks = deletedObjectsIdsBatch.map((id) => new DeleteLocalFileTask(uploadDirectory, id)) + await workingStack.add(deletionTasks) + await processSpawner.process() + deletedProcessed += deletedObjectsIdsBatch.length + logger.debug(`${deletedProcessed} / ${deletedDataObjectIds.size} deleted objects processed...`) + } + + // Execute moved objects removal tasks in batches of 10_000 + let movedProcessed = 0 + logger.info(`removing ${movedObjectIds.size} moved objects...`) + for (const movedObjectsIdsBatch of _.chunk([...movedObjectIds], 10_000)) { + const movedDataObjectsBatch = await qnApi.getDataObjectsWithBagDetails(movedObjectsIdsBatch) + const deletionTasksOfMovedDataObjects = await getDeletionTasksFromMovedDataObjects( + logger, + uploadDirectory, + model, + movedDataObjectsBatch, + hostId + ) + await workingStack.add(deletionTasksOfMovedDataObjects) + await processSpawner.process() + movedProcessed += movedDataObjectsBatch.length + logger.debug(`${movedProcessed} / ${movedObjectIds.size} moved objects processed...`) + } - await workingStack.add(deletionTasksOfDeletedDataObjects) - await workingStack.add(deletionTasksOfMovedDataObjects) - await processSpawner.process() logger.info('Cleanup ended.') } /** * Creates the local file deletion tasks. * - * @param ownBuckets - list of bucket ids operated by this node + * @param logger - cleanup service logger * @param uploadDirectory - local directory for data uploading * @param dataObligations - defines the current data obligations for the node * @param movedDataObjects- obsolete (no longer assigned) data objects that has been moved to other buckets + * @param hostId - host id of the current node */ async function getDeletionTasksFromMovedDataObjects( - ownBuckets: string[], + logger: Logger, uploadDirectory: string, dataObligations: DataObligations, - movedDataObjects: DataObjectDetailsFragment[], + movedDataObjects: DataObjectWithBagDetailsFragment[], hostId: string ): Promise { - const ownOperatorUrls: string[] = [] - for (const entry of dataObligations.storageBuckets) { - if (ownBuckets.includes(entry.id)) { - ownOperatorUrls.push(entry.operatorUrl) - } - } - - const bucketOperatorUrlById = new Map() - for (const entry of dataObligations.storageBuckets) { - if (!ownBuckets.includes(entry.id)) { - if (ownOperatorUrls.includes(entry.operatorUrl)) { - logger.warn(`(cleanup) Skipping remote bucket ${entry.id} - ${entry.operatorUrl}`) - } else { - bucketOperatorUrlById.set(entry.id, entry.operatorUrl) - } - } - } - const timeoutMs = 60 * 1000 // 1 minute since it's only a HEAD request const deletionTasks: DeleteLocalFileTask[] = [] + + const { bucketOperatorUrlById } = dataObligations await Promise.allSettled( movedDataObjects.map(async (movedDataObject) => { let dataObjectReplicationCount = 0 for (const { storageBucket } of movedDataObject.storageBag.storageBuckets) { - const url = urljoin(bucketOperatorUrlById.get(storageBucket.id), 'api/v1/files', movedDataObject.id) - await superagent.head(url).timeout(timeoutMs).set('X-COLOSSUS-HOST-ID', hostId) - dataObjectReplicationCount++ + const nodeUrl = bucketOperatorUrlById.get(storageBucket.id) + if (nodeUrl) { + const fileUrl = urljoin(nodeUrl, 'api/v1/files', movedDataObject.id) + await superagent.head(fileUrl).timeout(timeoutMs).set('X-COLOSSUS-HOST-ID', hostId) + dataObjectReplicationCount++ + } } if (dataObjectReplicationCount < MINIMUM_REPLICATION_THRESHOLD) { - logger.warn(`Cleanup - data object replication threshold unmet - file deletion canceled: ${movedDataObject.id}`) + logger.warn(`data object replication threshold unmet - file deletion canceled: ${movedDataObject.id}`) return } diff --git a/storage-node/src/services/sync/storageObligations.ts b/storage-node/src/services/sync/storageObligations.ts index 58f6b75ac6..c9369b9d42 100644 --- a/storage-node/src/services/sync/storageObligations.ts +++ b/storage-node/src/services/sync/storageObligations.ts @@ -1,12 +1,7 @@ import _ from 'lodash' import logger from '../logger' -import { MAX_RESULTS_PER_QUERY, QueryNodeApi } from '../queryNode/api' -import { - DataObjectByBagIdsDetailsFragment, - DataObjectDetailsFragment, - StorageBagDetailsFragment, - StorageBucketDetailsFragment, -} from '../queryNode/generated/queries' +import { MAX_RESULTS_PER_QUERY, PaginationQueryResult, QueryNodeApi } from '../queryNode/api' +import { DataObjectDetailsFragment, StorageBucketDetailsFragment } from '../queryNode/generated/queries' import { ApiPromise } from '@polkadot/api' import { PalletStorageStorageBucketRecord } from '@polkadot/types/lookup' @@ -25,9 +20,19 @@ export type DataObligations = { bags: Bag[] /** - * Assigned data objects for the storage provider. + * Map from bucket id to storage node url, without own buckets. */ - dataObjects: DataObject[] + bucketOperatorUrlById: Map + + /** + * Map from assigned bag ids to storage node urls. + */ + bagOperatorsUrlsById: Map + + /** + * A function that returns a loader of all assigned data object ids + */ + createAssignedObjectsIdsLoader(isAccepted?: boolean): DataObjectIdsLoader } /** @@ -90,6 +95,93 @@ export type DataObject = { size: number } +export abstract class LazyBatchLoader, MappedEntity> { + private endCursor: string | undefined + private _hasNextPage: boolean + private queryFn: (limit: number, after?: string) => Promise + + constructor(queryFn: (limit: number, after?: string) => Promise) { + this.queryFn = queryFn + this._hasNextPage = true + } + + public get hasNextPage(): boolean { + return this._hasNextPage + } + + abstract mapResults(results: QueryResult['edges'][number]['node'][]): Promise + + async nextBatch(size = 10_000): Promise { + if (!this._hasNextPage) { + return null + } + const result = await this.queryFn(size, this.endCursor) + if (!result) { + throw new Error('Connection query returned empty result') + } + + this.endCursor = result.pageInfo.endCursor || undefined + this._hasNextPage = result.pageInfo.hasNextPage + const mapped = await this.mapResults(result.edges.map((e) => e.node)) + return mapped + } + + async getAll(): Promise { + const results: MappedEntity[] = [] + while (this._hasNextPage) { + const batch = await this.nextBatch() + if (!batch) { + break + } + results.push(...batch) + } + + return results + } +} + +type DataObjectsLoadBy = { by: 'bagIds' | 'ids'; ids: string[]; isAccepted?: boolean } + +export class DataObjectDetailsLoader extends LazyBatchLoader< + PaginationQueryResult, + DataObject +> { + constructor(qnApi: QueryNodeApi, by: DataObjectsLoadBy) { + if (by.by === 'bagIds') { + super((limit, after) => qnApi.getDataObjectsByBagsPage(by.ids, limit, after, true, by.isAccepted)) + } else if (by.by === 'ids') { + super((limit, after) => qnApi.getDataObjectsByIdsPage(by.ids, limit, after, true, by.isAccepted)) + } else { + throw new Error(`Unknown "by" condition: ${JSON.stringify(by)}`) + } + } + + async mapResults(results: DataObjectDetailsFragment[]): Promise { + return results.map((dataObject) => ({ + id: dataObject.id, + size: parseInt(dataObject.size), + bagId: dataObject.storageBag.id, + ipfsHash: dataObject.ipfsHash, + })) + } +} + +export class DataObjectIdsLoader extends LazyBatchLoader, string> { + constructor(qnApi: QueryNodeApi, by: DataObjectsLoadBy) { + if (by.by === 'bagIds') { + super((limit, after) => qnApi.getDataObjectsByBagsPage(by.ids, limit, after, false, by.isAccepted)) + } else if (by.by === 'ids') { + super((limit, after) => qnApi.getDataObjectsByIdsPage(by.ids, limit, after, false, by.isAccepted)) + } else { + throw new Error(`Unknown "by" condition: ${JSON.stringify(by)}`) + } + } + + async mapResults(results: { id: string }[]): Promise { + return results.map(({ id }) => id) + } +} + /** * Get storage provider obligations like (assigned data objects) from the * runtime (Query Node). @@ -102,30 +194,58 @@ export async function getStorageObligationsFromRuntime( qnApi: QueryNodeApi, bucketIds?: string[] ): Promise { - const allBuckets = await getAllBuckets(qnApi) + const storageBuckets = (await getAllBuckets(qnApi)).map((bucket) => ({ + id: bucket.id, + operatorUrl: bucket.operatorMetadata?.nodeEndpoint ?? '', + workerId: bucket.operatorStatus?.workerId, + })) + + const bags = ( + bucketIds === undefined ? await qnApi.getAllStorageBagsDetails() : await qnApi.getStorageBagsDetails(bucketIds) + ).map((bag) => ({ + id: bag.id, + buckets: bag.storageBuckets.map((bucketInBag) => bucketInBag.storageBucket.id), + })) + + const ownBuckets = new Set(bucketIds || []) + const ownOperatorUrls = new Set() + for (const bucket of storageBuckets) { + if (ownBuckets.has(bucket.id)) { + ownOperatorUrls.add(bucket.operatorUrl) + } + } - const assignedBags = - bucketIds === undefined ? await qnApi.getAllStorageBagsDetails() : await getAllAssignedBags(qnApi, bucketIds) + const bucketOperatorUrlById = new Map() + for (const bucket of storageBuckets) { + if (!ownBuckets.has(bucket.id)) { + if (ownOperatorUrls.has(bucket.operatorUrl)) { + logger.warn(`(sync) Skipping remote bucket ${bucket.id} - ${bucket.operatorUrl}`) + } else { + bucketOperatorUrlById.set(bucket.id, bucket.operatorUrl) + } + } + } + + const bagOperatorsUrlsById = new Map() + for (const bag of bags) { + const operatorUrls = [] + for (const bucketId of bag.buckets) { + const operatorUrl = bucketOperatorUrlById.get(bucketId) + if (operatorUrl) { + operatorUrls.push(operatorUrl) + } + } - const bagIds = assignedBags.map((bag) => bag.id) - const assignedDataObjects = await getAllAssignedDataObjects(qnApi, bagIds) + bagOperatorsUrlsById.set(bag.id, operatorUrls) + } const model: DataObligations = { - storageBuckets: allBuckets.map((bucket) => ({ - id: bucket.id, - operatorUrl: bucket.operatorMetadata?.nodeEndpoint ?? '', - workerId: bucket.operatorStatus?.workerId, - })), - bags: assignedBags.map((bag) => ({ - id: bag.id, - buckets: bag.storageBuckets.map((bucketInBag) => bucketInBag.storageBucket.id), - })), - dataObjects: assignedDataObjects.map((dataObject) => ({ - id: dataObject.id, - size: parseInt(dataObject.size), - bagId: dataObject.storageBag.id, - ipfsHash: dataObject.ipfsHash, - })), + storageBuckets, + bags, + bagOperatorsUrlsById, + bucketOperatorUrlById, + createAssignedObjectsIdsLoader: (isAccepted?: boolean) => + new DataObjectIdsLoader(qnApi, { by: 'bagIds', ids: bags.map((b) => b.id), isAccepted }), } return model @@ -145,19 +265,6 @@ export async function getStorageBucketIdsByWorkerId(qnApi: QueryNodeApi, workerI return ids } -/** - * Get IDs of the data objects assigned to the bag ID. - * - * @param qnApi - initialized QueryNodeApi instance - * @param bagId - bag ID - * @returns data object IDs - */ -export async function getDataObjectIDsByBagId(qnApi: QueryNodeApi, bagId: string): Promise { - const dataObjects = await getAllAssignedDataObjects(qnApi, [bagId]) - - return dataObjects.map((obj) => obj.id) -} - /** * Get all storage buckets registered in the runtime (Query Node). * @@ -179,45 +286,6 @@ async function getAllBuckets(api: QueryNodeApi): Promise { - return await api.getDataObjectsByBagIds(bagIds) -} - -/** - * Get details of storage data objects by IDs. - * - * @param api - initialized QueryNodeApi instance - * @param dataObjectIds - data objects' IDs - * @returns storage data objects - */ -export async function getDataObjectsByIDs( - api: QueryNodeApi, - dataObjectIds: string[] -): Promise { - return await api.getDataObjectDetails(dataObjectIds) -} - -/** - * Get all bags assigned to storage provider. - * - * @param api - initialiazed QueryNodeApi instance - * @param bucketIds - assigned storage provider buckets' IDs - * @returns storage bag data - */ -async function getAllAssignedBags(api: QueryNodeApi, bucketIds: string[]): Promise { - return await api.getStorageBagsDetails(bucketIds) -} - /** * Abstract object acquiring function for the QueryNode. It uses paging for * queries and gets data using record offset and limit (hardcoded to 1000). diff --git a/storage-node/src/services/sync/synchronizer.ts b/storage-node/src/services/sync/synchronizer.ts index a4a7f0a409..93118036bd 100644 --- a/storage-node/src/services/sync/synchronizer.ts +++ b/storage-node/src/services/sync/synchronizer.ts @@ -1,7 +1,12 @@ import { getDataObjectIDs, isDataObjectIdInCache } from '../../services/caching/localDataObjects' import logger from '../../services/logger' import { QueryNodeApi } from '../queryNode/api' -import { DataObligations, getStorageObligationsFromRuntime } from './storageObligations' +import { + DataObject, + DataObjectDetailsLoader, + DataObligations, + getStorageObligationsFromRuntime, +} from './storageObligations' import { DownloadFileTask } from './tasks' import { TaskProcessorSpawner, WorkingStack } from '../processing/workingProcess' import _ from 'lodash' @@ -48,35 +53,39 @@ export async function performSync( const model = await getStorageObligationsFromRuntime(qnApi, buckets) const storedObjectIds = getDataObjectIDs() - const assignedObjects = model.dataObjects - const assignedObjectIds = assignedObjects.map((obj) => obj.id) + const assignedObjectIdsLoader = model.createAssignedObjectsIdsLoader(true) + const assignedObjectIds = new Set(await assignedObjectIdsLoader.getAll()) - const added = assignedObjects.filter((obj) => !isDataObjectIdInCache(obj.id)) - const removed = _.difference(storedObjectIds, assignedObjectIds) + const unsyncedObjectIds = [...assignedObjectIds].filter((id) => !isDataObjectIdInCache(id)) + const obsoleteObjectsNum = storedObjectIds.reduce((count, id) => (assignedObjectIds.has(id) ? count : count + 1), 0) - logger.debug(`Sync - new objects: ${added.length}`) - logger.debug(`Sync - obsolete objects: ${removed.length}`) + logger.debug(`Sync - new objects: ${unsyncedObjectIds.length}`) + logger.debug(`Sync - obsolete objects: ${obsoleteObjectsNum}`) const workingStack = new WorkingStack() - - const addedTasks = await getDownloadTasks( - model, - buckets, - added, - uploadDirectory, - tempDirectory, - asyncWorkersTimeout, - hostId, - selectedOperatorUrl - ) - - logger.debug(`Sync - started processing...`) - const processSpawner = new TaskProcessorSpawner(workingStack, asyncWorkersNumber) - await workingStack.add(addedTasks) + // Process unsynced objects in batches od 10_000 + logger.debug(`Sync - started processing...`) + let processed = 0 + for (const unsyncedIdsBatch of _.chunk(unsyncedObjectIds, 10_000)) { + const objectsLoader = new DataObjectDetailsLoader(qnApi, { by: 'ids', ids: unsyncedIdsBatch }) + const objectsBatch = await objectsLoader.getAll() + const syncTasks = await getDownloadTasks( + model, + objectsBatch, + uploadDirectory, + tempDirectory, + asyncWorkersTimeout, + hostId, + selectedOperatorUrl + ) + await workingStack.add(syncTasks) + await processSpawner.process() + processed += objectsBatch.length + logger.debug(`Sync - processed ${processed} / ${unsyncedObjectIds.length} objects...`) + } - await processSpawner.process() logger.info('Sync ended.') } @@ -84,8 +93,7 @@ export async function performSync( * Creates the download tasks. * * @param dataObligations - defines the current data obligations for the node - * @param ownBuckets - list of bucket ids operated this node - * @param addedIds - data object IDs to download + * @param dataObjects - list of data objects to download * @param uploadDirectory - local directory for data uploading * @param tempDirectory - local directory for temporary data uploading * @param taskSink - a destination for the newly created tasks @@ -95,65 +103,18 @@ export async function performSync( */ export async function getDownloadTasks( dataObligations: DataObligations, - ownBuckets: string[], - added: DataObligations['dataObjects'], + dataObjects: DataObject[], uploadDirectory: string, tempDirectory: string, asyncWorkersTimeout: number, hostId: string, selectedOperatorUrl?: string ): Promise { - const bagIdByDataObjectId = new Map() - for (const entry of dataObligations.dataObjects) { - bagIdByDataObjectId.set(entry.id, entry.bagId) - } - - const ownOperatorUrls: string[] = [] - for (const entry of dataObligations.storageBuckets) { - if (ownBuckets.includes(entry.id)) { - ownOperatorUrls.push(entry.operatorUrl) - } - } - - const bucketOperatorUrlById = new Map() - for (const entry of dataObligations.storageBuckets) { - if (!ownBuckets.includes(entry.id)) { - if (ownOperatorUrls.includes(entry.operatorUrl)) { - logger.warn(`(sync) Skipping remote bucket ${entry.id} - ${entry.operatorUrl}`) - } else { - bucketOperatorUrlById.set(entry.id, entry.operatorUrl) - } - } - } - - const bagOperatorsUrlsById = new Map() - for (const entry of dataObligations.bags) { - const operatorUrls = [] - - for (const bucket of entry.buckets) { - if (bucketOperatorUrlById.has(bucket)) { - const operatorUrl = bucketOperatorUrlById.get(bucket) - if (operatorUrl) { - operatorUrls.push(operatorUrl) - } - } - } - - bagOperatorsUrlsById.set(entry.id, operatorUrls) - } - - const tasks = added.map((dataObject) => { - let operatorUrls: string[] = [] // can be empty after look up - let bagId = null - if (bagIdByDataObjectId.has(dataObject.id)) { - bagId = bagIdByDataObjectId.get(dataObject.id) - if (bagOperatorsUrlsById.has(bagId)) { - operatorUrls = bagOperatorsUrlsById.get(bagId) - } - } + const { bagOperatorsUrlsById } = dataObligations + const tasks = dataObjects.map((dataObject) => { return new DownloadFileTask( - selectedOperatorUrl ? [selectedOperatorUrl] : operatorUrls, + selectedOperatorUrl ? [selectedOperatorUrl] : bagOperatorsUrlsById.get(dataObject.bagId) || [], dataObject.id, dataObject.ipfsHash, dataObject.size, diff --git a/storage-node/src/services/webApi/controllers/stateApi.ts b/storage-node/src/services/webApi/controllers/stateApi.ts index 10273719f4..ea49a6d58c 100644 --- a/storage-node/src/services/webApi/controllers/stateApi.ts +++ b/storage-node/src/services/webApi/controllers/stateApi.ts @@ -8,7 +8,6 @@ import { promisify } from 'util' import { getDataObjectIDs } from '../../../services/caching/localDataObjects' import logger from '../../logger' import { QueryNodeApi } from '../../queryNode/api' -import { getDataObjectIDsByBagId } from '../../sync/storageObligations' import { DataObjectResponse, DataStatsResponse, @@ -168,7 +167,7 @@ async function getCachedDataObjectsObligations(qnApi: QueryNodeApi, bagId: strin const entryName = `data_object_obligations_${bagId}` if (!dataCache.has(entryName)) { - const data = await getDataObjectIDsByBagId(qnApi, bagId) + const data = await qnApi.getDataObjectIdsByBagId(bagId) dataCache.set(entryName, data) } From 0094a618f60afc2588c0e252a583c468e9b40a53 Mon Sep 17 00:00:00 2001 From: Lezek123 Date: Tue, 26 Nov 2024 09:46:22 +0100 Subject: [PATCH 18/19] Update changelog --- storage-node/CHANGELOG.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/storage-node/CHANGELOG.md b/storage-node/CHANGELOG.md index da23df9c92..a3cf96681f 100644 --- a/storage-node/CHANGELOG.md +++ b/storage-node/CHANGELOG.md @@ -1,6 +1,12 @@ ### 4.3.0 -- Adds `archive` mode / command, which allows downloading, compressing and uploading all data objects to an external S3 bucket that can be used as a backup. +- **New feature:** `archive` mode / command, which allows downloading, compressing and uploading all data objects to an external S3 bucket that can be used as a backup. +- **Optimizations:** The way data objects / data object ids are queried and processed during sync and cleanup has been optimized: + - `DataObjectDetailsLoader` and `DataObjectIdsLoader` were implemented. They allow loading data objects / data object ids in batches using a connection query and avoid fetching redundant data from the GraphQL server. + - Sync and cleanup services now process tasks in batches of `10_000` to avoid overflowing the memory. + - Synchronous operations like `sort` or `filter` on larger arrays of data objects have been optimized (for example, by replacing `.filter(Array.includes(...))` with `.filter(Set.has(...))`) +- Improved logging during cleanup + ### 4.2.0 From 2520e15017ca4ba5030d93bbf1c1497744900def Mon Sep 17 00:00:00 2001 From: Lezek123 Date: Tue, 26 Nov 2024 10:16:41 +0100 Subject: [PATCH 19/19] Colossus cleanup: Additional safety mechanism --- storage-node/CHANGELOG.md | 5 +- .../src/services/sync/cleanupService.ts | 87 +++++++++++-------- 2 files changed, 56 insertions(+), 36 deletions(-) diff --git a/storage-node/CHANGELOG.md b/storage-node/CHANGELOG.md index a3cf96681f..8982fb9db2 100644 --- a/storage-node/CHANGELOG.md +++ b/storage-node/CHANGELOG.md @@ -4,8 +4,9 @@ - **Optimizations:** The way data objects / data object ids are queried and processed during sync and cleanup has been optimized: - `DataObjectDetailsLoader` and `DataObjectIdsLoader` were implemented. They allow loading data objects / data object ids in batches using a connection query and avoid fetching redundant data from the GraphQL server. - Sync and cleanup services now process tasks in batches of `10_000` to avoid overflowing the memory. - - Synchronous operations like `sort` or `filter` on larger arrays of data objects have been optimized (for example, by replacing `.filter(Array.includes(...))` with `.filter(Set.has(...))`) -- Improved logging during cleanup + - Synchronous operations like `sort` or `filter` on larger arrays of data objects have been optimized (for example, by replacing `.filter(Array.includes(...))` with `.filter(Set.has(...))`). +- A safety mechanism was added to avoid removing "deleted" objects for which a related `DataObjectDeleted` event cannot be found in storage squid. +- Improved logging during cleanup. ### 4.2.0 diff --git a/storage-node/src/services/sync/cleanupService.ts b/storage-node/src/services/sync/cleanupService.ts index fdaf4f5f2f..8f1eaa1d40 100644 --- a/storage-node/src/services/sync/cleanupService.ts +++ b/storage-node/src/services/sync/cleanupService.ts @@ -91,41 +91,60 @@ export async function performCleanup( const deletedDataObjectIds = new Set([...obsoleteObjectIds].filter((id) => !movedObjectIds.has(id))) logger.info(`stored objects: ${storedObjectsIds.length}, assigned objects: ${assignedObjectIds.size}`) - logger.info( - `pruning ${obsoleteObjectIds.size} obsolete objects ` + - `(${movedObjectIds.size} moved, ${deletedDataObjectIds.size} deleted)` - ) - - const workingStack = new WorkingStack() - const processSpawner = new TaskProcessorSpawner(workingStack, asyncWorkersNumber) - - // Execute deleted objects removal tasks in batches of 10_000 - let deletedProcessed = 0 - logger.info(`removing ${deletedDataObjectIds.size} deleted objects...`) - for (const deletedObjectsIdsBatch of _.chunk([...deletedDataObjectIds], 10_000)) { - const deletionTasks = deletedObjectsIdsBatch.map((id) => new DeleteLocalFileTask(uploadDirectory, id)) - await workingStack.add(deletionTasks) - await processSpawner.process() - deletedProcessed += deletedObjectsIdsBatch.length - logger.debug(`${deletedProcessed} / ${deletedDataObjectIds.size} deleted objects processed...`) - } - - // Execute moved objects removal tasks in batches of 10_000 - let movedProcessed = 0 - logger.info(`removing ${movedObjectIds.size} moved objects...`) - for (const movedObjectsIdsBatch of _.chunk([...movedObjectIds], 10_000)) { - const movedDataObjectsBatch = await qnApi.getDataObjectsWithBagDetails(movedObjectsIdsBatch) - const deletionTasksOfMovedDataObjects = await getDeletionTasksFromMovedDataObjects( - logger, - uploadDirectory, - model, - movedDataObjectsBatch, - hostId + if (obsoleteObjectIds.size) { + logger.info( + `pruning ${obsoleteObjectIds.size} obsolete objects ` + + `(${movedObjectIds.size} moved, ${deletedDataObjectIds.size} deleted)` ) - await workingStack.add(deletionTasksOfMovedDataObjects) - await processSpawner.process() - movedProcessed += movedDataObjectsBatch.length - logger.debug(`${movedProcessed} / ${movedObjectIds.size} moved objects processed...`) + + const workingStack = new WorkingStack() + const processSpawner = new TaskProcessorSpawner(workingStack, asyncWorkersNumber) + + // Execute deleted objects removal tasks in batches of 10_000 + if (deletedDataObjectIds.size) { + let deletedProcessed = 0 + logger.info(`removing ${deletedDataObjectIds.size} deleted objects...`) + for (let deletedObjectsIdsBatch of _.chunk([...deletedDataObjectIds], 10_000)) { + // Confirm whether the objects were actually deleted by fetching the related deletion events + const dataObjectDeletedEvents = await qnApi.getDataObjectDeletedEvents(deletedObjectsIdsBatch) + const confirmedIds = new Set(dataObjectDeletedEvents.map((e) => e.data.dataObjectId)) + deletedObjectsIdsBatch = deletedObjectsIdsBatch.filter((id) => { + if (confirmedIds.has(id)) { + return true + } else { + logger.warn(`Could not find DataObjectDeleted event for object ${id}, skipping from cleanup...`) + return false + } + }) + const deletionTasks = deletedObjectsIdsBatch.map((id) => new DeleteLocalFileTask(uploadDirectory, id)) + await workingStack.add(deletionTasks) + await processSpawner.process() + deletedProcessed += deletedObjectsIdsBatch.length + logger.debug(`${deletedProcessed} / ${deletedDataObjectIds.size} deleted objects processed...`) + } + } + + // Execute moved objects removal tasks in batches of 10_000 + if (movedObjectIds.size) { + let movedProcessed = 0 + logger.info(`removing ${movedObjectIds.size} moved objects...`) + for (const movedObjectsIdsBatch of _.chunk([...movedObjectIds], 10_000)) { + const movedDataObjectsBatch = await qnApi.getDataObjectsWithBagDetails(movedObjectsIdsBatch) + const deletionTasksOfMovedDataObjects = await getDeletionTasksFromMovedDataObjects( + logger, + uploadDirectory, + model, + movedDataObjectsBatch, + hostId + ) + await workingStack.add(deletionTasksOfMovedDataObjects) + await processSpawner.process() + movedProcessed += movedDataObjectsBatch.length + logger.debug(`${movedProcessed} / ${movedObjectIds.size} moved objects processed...`) + } + } + } else { + logger.info('No objects to prune, skipping...') } logger.info('Cleanup ended.')