From 20aaa8ddcfde84bca8825ab2f4a966bb551165f2 Mon Sep 17 00:00:00 2001 From: Andrew Ross Date: Thu, 15 Feb 2024 08:45:18 -0800 Subject: [PATCH 1/2] Avoid string out of bounds error in snapshot delete Test failure #8771 shows cases where certain random seeds trigger this case. The bug is clear: the substring() call should happen after the startsWith() check in case the blob name is shorter than the prefix length being used as the start index of the substring call. I don't yet know if/how this manifests in real deployments. Signed-off-by: Andrew Ross --- .../repositories/blobstore/BlobStoreRepository.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java index 8a2260e1f6d90..f36bad543a359 100644 --- a/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java @@ -1592,8 +1592,11 @@ private void executeOneStaleIndexDelete( Map shardLevelBlobs = shardBlob.getValue().listBlobs(); for (Map.Entry shardLevelBlob : shardLevelBlobs.entrySet()) { String blob = shardLevelBlob.getKey(); - String snapshotUUID = blob.substring(SHALLOW_SNAPSHOT_PREFIX.length(), blob.length() - ".dat".length()); if (blob.startsWith(SHALLOW_SNAPSHOT_PREFIX) && blob.endsWith(".dat")) { + String snapshotUUID = blob.substring( + SHALLOW_SNAPSHOT_PREFIX.length(), + blob.length() - ".dat".length() + ); RemoteStoreShardShallowCopySnapshot remoteStoreShardShallowCopySnapshot = REMOTE_STORE_SHARD_SHALLOW_COPY_SNAPSHOT_FORMAT.read( shardBlob.getValue(), From 09778ad3e563fffeb6a0d76677fbbf93ec5762c1 Mon Sep 17 00:00:00 2001 From: Andrew Ross Date: Thu, 15 Feb 2024 11:14:55 -0800 Subject: [PATCH 2/2] Extract common UUID parsing method Signed-off-by: Andrew Ross --- .../blobstore/BlobStoreRepository.java | 95 +++++++++---------- 1 file changed, 45 insertions(+), 50 deletions(-) diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java index f36bad543a359..18f4ab70024f4 100644 --- a/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java @@ -1116,10 +1116,7 @@ private void executeStaleShardDelete( String indexId = fileToDeletePath[1]; String shardId = fileToDeletePath[2]; String shallowSnapBlob = fileToDeletePath[3]; - String snapshotUUID = shallowSnapBlob.substring( - SHALLOW_SNAPSHOT_PREFIX.length(), - shallowSnapBlob.length() - ".dat".length() - ); + String snapshotUUID = extractShallowSnapshotUUID(shallowSnapBlob).orElseThrow(); BlobContainer shardContainer = blobStore().blobContainer(indicesPath().add(indexId).add(shardId)); RemoteStoreShardShallowCopySnapshot remoteStoreShardShallowCopySnapshot = REMOTE_STORE_SHARD_SHALLOW_COPY_SNAPSHOT_FORMAT.read( @@ -1586,47 +1583,43 @@ private void executeOneStaleIndexDelete( try { logger.debug("[{}] Found stale index [{}]. Cleaning it up", metadata.name(), indexSnId); if (remoteStoreLockManagerFactory != null) { - Map shardBlobs = indexEntry.getValue().children(); - if (!shardBlobs.isEmpty()) { - for (Map.Entry shardBlob : shardBlobs.entrySet()) { - Map shardLevelBlobs = shardBlob.getValue().listBlobs(); - for (Map.Entry shardLevelBlob : shardLevelBlobs.entrySet()) { - String blob = shardLevelBlob.getKey(); - if (blob.startsWith(SHALLOW_SNAPSHOT_PREFIX) && blob.endsWith(".dat")) { - String snapshotUUID = blob.substring( - SHALLOW_SNAPSHOT_PREFIX.length(), - blob.length() - ".dat".length() - ); - RemoteStoreShardShallowCopySnapshot remoteStoreShardShallowCopySnapshot = - REMOTE_STORE_SHARD_SHALLOW_COPY_SNAPSHOT_FORMAT.read( - shardBlob.getValue(), - snapshotUUID, - namedXContentRegistry - ); - String indexUUID = remoteStoreShardShallowCopySnapshot.getIndexUUID(); - String remoteStoreRepoForIndex = remoteStoreShardShallowCopySnapshot.getRemoteStoreRepository(); - // Releasing lock files before deleting the shallow-snap-UUID file because in case of any failure - // while releasing the lock file, we would still have the corresponding shallow-snap-UUID file - // and that would be used during next delete operation for releasing this stale lock file - RemoteStoreLockManager remoteStoreMetadataLockManager = remoteStoreLockManagerFactory - .newLockManager(remoteStoreRepoForIndex, indexUUID, shardBlob.getKey()); - remoteStoreMetadataLockManager.release( - FileLockInfo.getLockInfoBuilder().withAcquirerId(snapshotUUID).build() + final Map shardBlobs = indexEntry.getValue().children(); + for (Map.Entry shardBlob : shardBlobs.entrySet()) { + for (String blob : shardBlob.getValue().listBlobs().keySet()) { + final Optional snapshotUUID = extractShallowSnapshotUUID(blob); + if (snapshotUUID.isPresent()) { + RemoteStoreShardShallowCopySnapshot remoteStoreShardShallowCopySnapshot = + REMOTE_STORE_SHARD_SHALLOW_COPY_SNAPSHOT_FORMAT.read( + shardBlob.getValue(), + snapshotUUID.get(), + namedXContentRegistry ); - if (!isIndexPresent(clusterService, indexUUID)) { - // this is a temporary solution where snapshot deletion triggers remote store side - // cleanup if index is already deleted. We will add a poller in future to take - // care of remote store side cleanup. - // see https://github.com/opensearch-project/OpenSearch/issues/8469 - new RemoteSegmentStoreDirectoryFactory( - remoteStoreLockManagerFactory.getRepositoriesService(), - threadPool - ).newDirectory( - remoteStoreRepoForIndex, - indexUUID, - new ShardId(Index.UNKNOWN_INDEX_NAME, indexUUID, Integer.valueOf(shardBlob.getKey())) - ).close(); - } + String indexUUID = remoteStoreShardShallowCopySnapshot.getIndexUUID(); + String remoteStoreRepoForIndex = remoteStoreShardShallowCopySnapshot.getRemoteStoreRepository(); + // Releasing lock files before deleting the shallow-snap-UUID file because in case of any failure + // while releasing the lock file, we would still have the corresponding shallow-snap-UUID file + // and that would be used during next delete operation for releasing this stale lock file + RemoteStoreLockManager remoteStoreMetadataLockManager = remoteStoreLockManagerFactory.newLockManager( + remoteStoreRepoForIndex, + indexUUID, + shardBlob.getKey() + ); + remoteStoreMetadataLockManager.release( + FileLockInfo.getLockInfoBuilder().withAcquirerId(snapshotUUID.get()).build() + ); + if (!isIndexPresent(clusterService, indexUUID)) { + // this is a temporary solution where snapshot deletion triggers remote store side + // cleanup if index is already deleted. We will add a poller in future to take + // care of remote store side cleanup. + // see https://github.com/opensearch-project/OpenSearch/issues/8469 + new RemoteSegmentStoreDirectoryFactory( + remoteStoreLockManagerFactory.getRepositoriesService(), + threadPool + ).newDirectory( + remoteStoreRepoForIndex, + indexUUID, + new ShardId(Index.UNKNOWN_INDEX_NAME, indexUUID, Integer.parseInt(shardBlob.getKey())) + ).close(); } } } @@ -3365,12 +3358,7 @@ private static List unusedBlobs( blob.substring(SNAPSHOT_PREFIX.length(), blob.length() - ".dat".length()) ) == false) || (remoteStoreLockManagerFactory != null - ? (blob.startsWith(SHALLOW_SNAPSHOT_PREFIX) - && blob.endsWith(".dat") - && survivingSnapshotUUIDs.contains( - blob.substring(SHALLOW_SNAPSHOT_PREFIX.length(), blob.length() - ".dat".length()) - ) == false) - : false) + && extractShallowSnapshotUUID(blob).map(survivingSnapshotUUIDs::contains).orElse(false)) || (blob.startsWith(UPLOADED_DATA_BLOB_PREFIX) && updatedSnapshots.findNameFile(canonicalName(blob)) == null) || FsBlobContainer.isTempBlobName(blob) ) @@ -3512,6 +3500,13 @@ private static void failStoreIfCorrupted(Store store, Exception e) { } } + private static Optional extractShallowSnapshotUUID(String blobName) { + if (blobName.startsWith(SHALLOW_SNAPSHOT_PREFIX)) { + return Optional.of(blobName.substring(SHALLOW_SNAPSHOT_PREFIX.length(), blobName.length() - ".dat".length())); + } + return Optional.empty(); + } + /** * The result of removing a snapshot from a shard folder in the repository. */