Skip to content

Commit

Permalink
Avoid string out of bounds error in snapshot delete (#12337)
Browse files Browse the repository at this point in the history
* Avoid string out of bounds error in snapshot delete

Test failure #8771 shows cases where certain random seeds trigger this
case. The bug is clear: the substring() call should happen after the
startsWith() check in case the blob name is shorter than the prefix
length being used as the start index of the substring call. I don't yet
know if/how this manifests in real deployments.

Signed-off-by: Andrew Ross <[email protected]>

* Extract common UUID parsing method

Signed-off-by: Andrew Ross <[email protected]>

---------

Signed-off-by: Andrew Ross <[email protected]>
(cherry picked from commit f86bfa8)
Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
  • Loading branch information
github-actions[bot] authored and andrross committed Feb 22, 2024
1 parent 44f1417 commit ce2dcd1
Showing 1 changed file with 45 additions and 47 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1179,10 +1179,7 @@ private void executeStaleShardDelete(
String indexId = fileToDeletePath[1];
String shardId = fileToDeletePath[2];
String shallowSnapBlob = fileToDeletePath[3];
String snapshotUUID = shallowSnapBlob.substring(
SHALLOW_SNAPSHOT_PREFIX.length(),
shallowSnapBlob.length() - ".dat".length()
);
String snapshotUUID = extractShallowSnapshotUUID(shallowSnapBlob).orElseThrow();

Check warning on line 1182 in server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java#L1182

Added line #L1182 was not covered by tests
BlobContainer shardContainer = blobStore().blobContainer(indicesPath().add(indexId).add(shardId));
RemoteStoreShardShallowCopySnapshot remoteStoreShardShallowCopySnapshot =
REMOTE_STORE_SHARD_SHALLOW_COPY_SNAPSHOT_FORMAT.read(
Expand Down Expand Up @@ -1649,44 +1646,43 @@ private void executeOneStaleIndexDelete(
try {
logger.debug("[{}] Found stale index [{}]. Cleaning it up", metadata.name(), indexSnId);
if (remoteStoreLockManagerFactory != null) {
Map<String, BlobContainer> shardBlobs = indexEntry.getValue().children();
if (!shardBlobs.isEmpty()) {
for (Map.Entry<String, BlobContainer> shardBlob : shardBlobs.entrySet()) {
Map<String, BlobMetadata> shardLevelBlobs = shardBlob.getValue().listBlobs();
for (Map.Entry<String, BlobMetadata> shardLevelBlob : shardLevelBlobs.entrySet()) {
String blob = shardLevelBlob.getKey();
String snapshotUUID = blob.substring(SHALLOW_SNAPSHOT_PREFIX.length(), blob.length() - ".dat".length());
if (blob.startsWith(SHALLOW_SNAPSHOT_PREFIX) && blob.endsWith(".dat")) {
RemoteStoreShardShallowCopySnapshot remoteStoreShardShallowCopySnapshot =
REMOTE_STORE_SHARD_SHALLOW_COPY_SNAPSHOT_FORMAT.read(
shardBlob.getValue(),
snapshotUUID,
namedXContentRegistry
);
String indexUUID = remoteStoreShardShallowCopySnapshot.getIndexUUID();
String remoteStoreRepoForIndex = remoteStoreShardShallowCopySnapshot.getRemoteStoreRepository();
// Releasing lock files before deleting the shallow-snap-UUID file because in case of any failure
// while releasing the lock file, we would still have the corresponding shallow-snap-UUID file
// and that would be used during next delete operation for releasing this stale lock file
RemoteStoreLockManager remoteStoreMetadataLockManager = remoteStoreLockManagerFactory
.newLockManager(remoteStoreRepoForIndex, indexUUID, shardBlob.getKey());
remoteStoreMetadataLockManager.release(
FileLockInfo.getLockInfoBuilder().withAcquirerId(snapshotUUID).build()
final Map<String, BlobContainer> shardBlobs = indexEntry.getValue().children();
for (Map.Entry<String, BlobContainer> shardBlob : shardBlobs.entrySet()) {
for (String blob : shardBlob.getValue().listBlobs().keySet()) {
final Optional<String> snapshotUUID = extractShallowSnapshotUUID(blob);

Check warning on line 1652 in server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java#L1652

Added line #L1652 was not covered by tests
if (snapshotUUID.isPresent()) {
RemoteStoreShardShallowCopySnapshot remoteStoreShardShallowCopySnapshot =
REMOTE_STORE_SHARD_SHALLOW_COPY_SNAPSHOT_FORMAT.read(
shardBlob.getValue(),
snapshotUUID.get(),

Check warning on line 1657 in server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java#L1654-L1657

Added lines #L1654 - L1657 were not covered by tests
namedXContentRegistry
);
if (!isIndexPresent(clusterService, indexUUID)) {
// this is a temporary solution where snapshot deletion triggers remote store side
// cleanup if index is already deleted. We will add a poller in future to take
// care of remote store side cleanup.
// see https://github.com/opensearch-project/OpenSearch/issues/8469
new RemoteSegmentStoreDirectoryFactory(
remoteStoreLockManagerFactory.getRepositoriesService(),
threadPool
).newDirectory(
remoteStoreRepoForIndex,
indexUUID,
new ShardId(Index.UNKNOWN_INDEX_NAME, indexUUID, Integer.valueOf(shardBlob.getKey()))
).close();
}
String indexUUID = remoteStoreShardShallowCopySnapshot.getIndexUUID();
String remoteStoreRepoForIndex = remoteStoreShardShallowCopySnapshot.getRemoteStoreRepository();

Check warning on line 1661 in server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java#L1660-L1661

Added lines #L1660 - L1661 were not covered by tests
// Releasing lock files before deleting the shallow-snap-UUID file because in case of any failure
// while releasing the lock file, we would still have the corresponding shallow-snap-UUID file
// and that would be used during next delete operation for releasing this stale lock file
RemoteStoreLockManager remoteStoreMetadataLockManager = remoteStoreLockManagerFactory.newLockManager(

Check warning on line 1665 in server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java#L1665

Added line #L1665 was not covered by tests
remoteStoreRepoForIndex,
indexUUID,
shardBlob.getKey()

Check warning on line 1668 in server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java#L1668

Added line #L1668 was not covered by tests
);
remoteStoreMetadataLockManager.release(
FileLockInfo.getLockInfoBuilder().withAcquirerId(snapshotUUID.get()).build()

Check warning on line 1671 in server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java#L1670-L1671

Added lines #L1670 - L1671 were not covered by tests
);
if (!isIndexPresent(clusterService, indexUUID)) {
// this is a temporary solution where snapshot deletion triggers remote store side
// cleanup if index is already deleted. We will add a poller in future to take
// care of remote store side cleanup.
// see https://github.com/opensearch-project/OpenSearch/issues/8469
new RemoteSegmentStoreDirectoryFactory(
remoteStoreLockManagerFactory.getRepositoriesService(),

Check warning on line 1679 in server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java#L1678-L1679

Added lines #L1678 - L1679 were not covered by tests
threadPool
).newDirectory(

Check warning on line 1681 in server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java#L1681

Added line #L1681 was not covered by tests
remoteStoreRepoForIndex,
indexUUID,
new ShardId(Index.UNKNOWN_INDEX_NAME, indexUUID, Integer.parseInt(shardBlob.getKey()))
).close();

Check warning on line 1685 in server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java#L1684-L1685

Added lines #L1684 - L1685 were not covered by tests
}
}
}
Expand Down Expand Up @@ -3500,12 +3496,7 @@ private static List<String> unusedBlobs(
blob.substring(SNAPSHOT_PREFIX.length(), blob.length() - ".dat".length())
) == false)
|| (remoteStoreLockManagerFactory != null
? (blob.startsWith(SHALLOW_SNAPSHOT_PREFIX)
&& blob.endsWith(".dat")
&& survivingSnapshotUUIDs.contains(
blob.substring(SHALLOW_SNAPSHOT_PREFIX.length(), blob.length() - ".dat".length())
) == false)
: false)
&& extractShallowSnapshotUUID(blob).map(survivingSnapshotUUIDs::contains).orElse(false))
|| (blob.startsWith(UPLOADED_DATA_BLOB_PREFIX) && updatedSnapshots.findNameFile(canonicalName(blob)) == null)
|| FsBlobContainer.isTempBlobName(blob)
)
Expand Down Expand Up @@ -3649,6 +3640,13 @@ private static void failStoreIfCorrupted(Store store, Exception e) {
}
}

private static Optional<String> extractShallowSnapshotUUID(String blobName) {
if (blobName.startsWith(SHALLOW_SNAPSHOT_PREFIX)) {
return Optional.of(blobName.substring(SHALLOW_SNAPSHOT_PREFIX.length(), blobName.length() - ".dat".length()));

Check warning on line 3645 in server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java#L3645

Added line #L3645 was not covered by tests
}
return Optional.empty();

Check warning on line 3647 in server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java#L3647

Added line #L3647 was not covered by tests
}

/**
* The result of removing a snapshot from a shard folder in the repository.
*/
Expand Down

0 comments on commit ce2dcd1

Please sign in to comment.