diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java index 88f5de8686011..03cc14a75ff0d 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java @@ -397,6 +397,26 @@ public class AbfsConfiguration{ FS_AZURE_ENABLE_PAGINATED_DELETE, DefaultValue = DEFAULT_ENABLE_PAGINATED_DELETE) private boolean isPaginatedDeleteEnabled; + @LongConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_BLOB_COPY_PROGRESS_WAIT_MILLIS, + DefaultValue = DEFAULT_AZURE_BLOB_COPY_PROGRESS_WAIT_MILLIS) + private long blobCopyProgressPollWaitMillis; + + @LongConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_BLOB_ATOMIC_RENAME_LEASE_REFRESH_DURATION, + DefaultValue = DEFAULT_AZURE_BLOB_ATOMIC_RENAME_LEASE_REFRESH_DURATION) + private long blobAtomicRenameLeaseRefreshDuration; + + @IntegerConfigurationValidatorAnnotation(ConfigurationKey = + FS_AZURE_PRODUCER_QUEUE_MAX_SIZE, DefaultValue = DEFAULT_FS_AZURE_PRODUCER_QUEUE_MAX_SIZE) + private int producerQueueMaxSize; + + @IntegerConfigurationValidatorAnnotation(ConfigurationKey = + FS_AZURE_BLOB_DIR_RENAME_MAX_THREAD, DefaultValue = DEFAULT_FS_AZURE_BLOB_RENAME_THREAD) + private int blobRenameDirConsumptionParallelism; + + @IntegerConfigurationValidatorAnnotation(ConfigurationKey = + FS_AZURE_BLOB_DIR_DELETE_MAX_THREAD, DefaultValue = DEFAULT_FS_AZURE_BLOB_DELETE_THREAD) + private int blobDeleteDirConsumptionParallelism; + private String clientProvidedEncryptionKey; private String clientProvidedEncryptionKeySHA; @@ -1413,4 +1433,24 @@ public boolean getIsChecksumValidationEnabled() { public void setIsChecksumValidationEnabled(boolean isChecksumValidationEnabled) { this.isChecksumValidationEnabled = isChecksumValidationEnabled; } + + public long getBlobCopyProgressPollWaitMillis() { + return blobCopyProgressPollWaitMillis; + } + + public long getAtomicRenameLeaseRefreshDuration() { + return blobAtomicRenameLeaseRefreshDuration; + } + + public int getProducerQueueMaxSize() { + return producerQueueMaxSize; + } + + public int getBlobRenameDirConsumptionParallelism() { + return blobRenameDirConsumptionParallelism; + } + + public int getBlobDeleteDirConsumptionParallelism() { + return blobDeleteDirConsumptionParallelism; + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java index b370602e21157..792599048b81b 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java @@ -515,7 +515,9 @@ public boolean rename(final Path src, final Path dst) throws IOException { qualifiedDstPath = makeQualified(adjustedDst); - abfsStore.rename(qualifiedSrcPath, qualifiedDstPath, tracingContext, null); + getAbfsStore().rename(qualifiedSrcPath, qualifiedDstPath, tracingContext, + null + ); return true; } catch (AzureBlobFileSystemException ex) { LOG.debug("Rename operation failed. ", ex); @@ -523,6 +525,8 @@ public boolean rename(final Path src, final Path dst) throws IOException { src, ex, AzureServiceErrorCode.PATH_ALREADY_EXISTS, + AzureServiceErrorCode.BLOB_ALREADY_EXISTS, + AzureServiceErrorCode.BLOB_PATH_NOT_FOUND, AzureServiceErrorCode.INVALID_RENAME_SOURCE_PATH, AzureServiceErrorCode.SOURCE_PATH_NOT_FOUND, AzureServiceErrorCode.INVALID_SOURCE_OR_DESTINATION_RESOURCE_TYPE, @@ -595,8 +599,9 @@ public Pair commitSingleFileByRename( final Duration waitTime = rateLimiting.acquire(1); try { - final boolean recovered = abfsStore.rename(qualifiedSrcPath, - qualifiedDstPath, tracingContext, sourceEtag); + final boolean recovered = getAbfsStore().rename(qualifiedSrcPath, + qualifiedDstPath, tracingContext, sourceEtag + ); return Pair.of(recovered, waitTime); } catch (AzureBlobFileSystemException ex) { LOG.debug("Rename operation failed. ", ex); @@ -627,10 +632,11 @@ public boolean delete(final Path f, final boolean recursive) throws IOException TracingContext tracingContext = new TracingContext(clientCorrelationId, fileSystemId, FSOperationType.DELETE, tracingHeaderFormat, listener); - abfsStore.delete(qualifiedPath, recursive, tracingContext); + getAbfsStore().delete(qualifiedPath, recursive, tracingContext); return true; } catch (AzureBlobFileSystemException ex) { - checkException(f, ex, AzureServiceErrorCode.PATH_NOT_FOUND); + checkException(f, ex, AzureServiceErrorCode.PATH_NOT_FOUND, + AzureServiceErrorCode.BLOB_PATH_NOT_FOUND); return false; } @@ -647,7 +653,8 @@ public FileStatus[] listStatus(final Path f) throws IOException { TracingContext tracingContext = new TracingContext(clientCorrelationId, fileSystemId, FSOperationType.LISTSTATUS, true, tracingHeaderFormat, listener); - FileStatus[] result = abfsStore.listStatus(qualifiedPath, tracingContext); + FileStatus[] result = getAbfsStore().listStatus(qualifiedPath, + tracingContext); return result; } catch (AzureBlobFileSystemException ex) { checkException(f, ex); @@ -777,7 +784,9 @@ private FileStatus getFileStatus(final Path path, Path qualifiedPath = makeQualified(path); try { - return abfsStore.getFileStatus(qualifiedPath, tracingContext); + FileStatus fileStatus = getAbfsStore().getFileStatus(qualifiedPath, + tracingContext); + return fileStatus; } catch (AzureBlobFileSystemException ex) { checkException(path, ex); return null; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index 6f577f63d3d2a..fe9afc5f6fb7c 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -152,6 +152,7 @@ import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_FOOTER_READ_BUFFER_SIZE; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_BUFFERED_PREAD_DISABLE; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_IDENTITY_TRANSFORM_CLASS; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.INFINITE_LEASE_DURATION; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.ABFS_BLOB_DOMAIN_NAME; import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_ENCRYPTION_CONTEXT; @@ -175,7 +176,6 @@ public class AzureBlobFileSystemStore implements Closeable, ListingSupport { private final Map leaseRefs; private final AbfsConfiguration abfsConfiguration; - private final Set azureAtomicRenameDirSet; private Set azureInfiniteLeaseDirSet; private volatile Trilean isNamespaceEnabled; private final AuthType authType; @@ -243,8 +243,6 @@ public AzureBlobFileSystemStore( } LOG.trace("primaryUserGroup is {}", this.primaryUserGroup); - this.azureAtomicRenameDirSet = new HashSet<>(Arrays.asList( - abfsConfiguration.getAzureAtomicRenameDirs().split(AbfsHttpConstants.COMMA))); updateInfiniteLeaseDirs(); this.authType = abfsConfiguration.getAuthType(accountName); boolean usingOauth = (authType == AuthType.OAuth); @@ -719,8 +717,7 @@ private AbfsRestOperation conditionalCreateOverwriteFile(final String relativePa } } - String eTag = op.getResult() - .getResponseHeader(HttpHeaderConfigurations.ETAG); + String eTag = extractEtagHeader(op.getResult()); try { // overwrite only if eTag matches with the file properties fetched befpre @@ -803,6 +800,16 @@ private AbfsOutputStreamContext populateAbfsOutputStreamContext( .build(); } + /** + * Creates a directory. + * + * @param path Path of the directory to create. + * @param permission Permission of the directory. + * @param umask Umask of the directory. + * @param tracingContext tracing context + * + * @throws AzureBlobFileSystemException server error. + */ public void createDirectory(final Path path, final FsPermission permission, final FsPermission umask, TracingContext tracingContext) throws IOException { @@ -815,7 +822,6 @@ public void createDirectory(final Path path, final FsPermission permission, permission, umask, isNamespaceEnabled); - boolean overwrite = !isNamespaceEnabled || abfsConfiguration.isEnabledMkdirOverwrite(); Permissions permissions = new Permissions(isNamespaceEnabled, @@ -1042,16 +1048,11 @@ public boolean rename(final Path source, final Path destination, final TracingContext tracingContext, final String sourceEtag) throws - IOException { + IOException { final Instant startAggregate = abfsPerfTracker.getLatencyInstant(); long countAggregate = 0; boolean shouldContinue; - if (isAtomicRenameKey(source.getName())) { - LOG.warn("The atomic rename feature is not supported by the ABFS scheme; however rename," - +" create and delete operations are atomic if Namespace is enabled for your Azure Storage account."); - } - LOG.debug("renameAsync filesystem: {} source: {} destination: {}", getClient().getFileSystem(), source, @@ -1070,11 +1071,21 @@ public boolean rename(final Path source, final AbfsClientRenameResult abfsClientRenameResult = getClient().renamePath(sourceRelativePath, destinationRelativePath, continuation, tracingContext, sourceEtag, false, - isNamespaceEnabled); + isNamespaceEnabled); + AbfsRestOperation op = abfsClientRenameResult.getOp(); - perfInfo.registerResult(op.getResult()); - continuation = op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_CONTINUATION); + /* + * Blob endpoint does not have a rename API. The AbfsBlobClient would + * perform the copy and delete operation for renaming a path. + * As it would not be one operation, hence, the client would not return + * AbfsRestOperation object. + */ + if (op != null) { + perfInfo.registerResult(op.getResult()); + continuation = op.getResult() + .getResponseHeader(HttpHeaderConfigurations.X_MS_CONTINUATION); + } perfInfo.registerSuccess(true); countAggregate++; shouldContinue = continuation != null && !continuation.isEmpty(); @@ -1090,7 +1101,7 @@ public boolean rename(final Path source, } public void delete(final Path path, final boolean recursive, - TracingContext tracingContext) throws AzureBlobFileSystemException { + TracingContext tracingContext) throws AzureBlobFileSystemException { final Instant startAggregate = abfsPerfTracker.getLatencyInstant(); long countAggregate = 0; boolean shouldContinue = true; @@ -1108,8 +1119,16 @@ public void delete(final Path path, final boolean recursive, try (AbfsPerfInfo perfInfo = startTracking("delete", "deletePath")) { AbfsRestOperation op = getClient().deletePath(relativePath, recursive, continuation, tracingContext, getIsNamespaceEnabled(tracingContext)); - perfInfo.registerResult(op.getResult()); - continuation = op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_CONTINUATION); + /* + * Blob endpoint does not have a directory delete API. The AbfsBlobClient would + * perform multiple operation to delete a path, hence, the client would not return + * AbfsRestOperation object. + */ + if (op != null) { + perfInfo.registerResult(op.getResult()); + continuation = op.getResult() + .getResponseHeader(HttpHeaderConfigurations.X_MS_CONTINUATION); + } perfInfo.registerSuccess(true); countAggregate++; shouldContinue = continuation != null && !continuation.isEmpty(); @@ -1176,6 +1195,8 @@ public FileStatus getFileStatus(final Path path, perfInfo.registerSuccess(true); + getClient().takeGetPathStatusAtomicRenameKeyAction(path, tracingContext); + return new VersionedFileStatus( transformedOwner, transformedGroup, @@ -1196,6 +1217,7 @@ public FileStatus getFileStatus(final Path path, /** * @param path The list path. * @param tracingContext Tracks identifiers for request header + * * @return the entries in the path. * */ @Override @@ -1212,6 +1234,7 @@ public FileStatus[] listStatus(final Path path, TracingContext tracingContext) t * all entries after this non-existent entry in lexical order: * listStatus(Path("/folder"), "cfile") will return "/folder/hfile" and "/folder/ifile". * @param tracingContext Tracks identifiers for request header + * * @return the entries in the path start from "startFrom" in lexical order. * */ @InterfaceStability.Unstable @@ -1290,20 +1313,26 @@ public String listStatus(final Path path, final String startFrom, Path entryPath = new Path(File.separator + entry.name()); entryPath = entryPath.makeQualified(this.uri, entryPath); - fileStatuses.add( - new VersionedFileStatus( - owner, - group, - fsPermission, - hasAcl, - contentLength, - isDirectory, - 1, - blockSize, - lastModifiedMillis, - entryPath, - entry.eTag(), - encryptionContext)); + final boolean actionTakenOnRenamePendingJson + = getClient().takeListPathAtomicRenameKeyAction(entryPath, + (int) contentLength, + tracingContext); + if (!actionTakenOnRenamePendingJson) { + fileStatuses.add( + new VersionedFileStatus( + owner, + group, + fsPermission, + hasAcl, + contentLength, + isDirectory, + 1, + blockSize, + lastModifiedMillis, + entryPath, + entry.eTag(), + encryptionContext)); + } } perfInfo.registerSuccess(true); @@ -1706,10 +1735,6 @@ public void access(final Path path, final FsAction mode, } } - public boolean isAtomicRenameKey(String key) { - return isKeyForDirectorySet(key, azureAtomicRenameDirSet); - } - public boolean isInfiniteLeaseKey(String key) { if (azureInfiniteLeaseDirSet.isEmpty()) { return false; @@ -1865,7 +1890,7 @@ private boolean parseIsDirectory(final String resourceType) { && resourceType.equalsIgnoreCase(AbfsHttpConstants.DIRECTORY); } - private boolean isKeyForDirectorySet(String key, Set dirSet) { + public static boolean isKeyForDirectorySet(String key, Set dirSet) { for (String dir : dirSet) { if (dir.isEmpty() || key.startsWith(dir + AbfsHttpConstants.FORWARD_SLASH)) { return true; @@ -2142,7 +2167,8 @@ private AbfsLease maybeCreateLease(String relativePath, TracingContext tracingCo if (!enableInfiniteLease) { return null; } - AbfsLease lease = new AbfsLease(getClient(), relativePath, tracingContext); + AbfsLease lease = new AbfsLease(getClient(), relativePath, true, + INFINITE_LEASE_DURATION, null, tracingContext); leaseRefs.put(lease, null); return lease; } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java index af7193305e7ee..02ee0fff54f9e 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java @@ -184,6 +184,10 @@ public static ApiVersion getCurrentVersion() { */ public static final Integer HTTP_STATUS_CATEGORY_QUOTIENT = 100; + public static final String COPY_STATUS_SUCCESS = "success"; + public static final String COPY_STATUS_PENDING = "pending"; + public static final String COPY_STATUS_ABORTED = "aborted"; + public static final String COPY_STATUS_FAILED = "failed"; public static final String XML_TAG_NAME = "Name"; public static final String XML_TAG_BLOB = "Blob"; public static final String XML_TAG_PREFIX = "Prefix"; @@ -249,5 +253,8 @@ public static ApiVersion getCurrentVersion() { + "non-hierarchical-namespace account:" + CPK_CONFIG_LIST; + public static final String ATOMIC_DIR_RENAME_RECOVERY_ON_GET_PATH_EXCEPTION = + "Path had to be recovered from atomic rename operation."; + private AbfsHttpConstants() {} } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java index d6517baafca80..3268f2c4b168e 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java @@ -346,5 +346,20 @@ public static String accountProperty(String property, String account) { * @see FileSystem#openFile(org.apache.hadoop.fs.Path) */ public static final String FS_AZURE_BUFFERED_PREAD_DISABLE = "fs.azure.buffered.pread.disable"; + + /** + * Blob copy API is an async API, this configuration defines polling duration + * for checking copy status {@value} + */ + public static final String FS_AZURE_BLOB_COPY_PROGRESS_WAIT_MILLIS = "fs.azure.blob.copy.progress.wait.millis"; + /**Blob rename lease refresh duration {@value}*/ + public static final String FS_AZURE_BLOB_ATOMIC_RENAME_LEASE_REFRESH_DURATION + = "fs.azure.blob.atomic.rename.lease.refresh.duration"; + /**Maximum number of blob information enqueued in memory for rename or delete orchestration {@value}*/ + public static final String FS_AZURE_PRODUCER_QUEUE_MAX_SIZE = "fs.azure.blob.dir.list.producer.queue.max.size"; + /**Maximum number of thread per blob-rename orchestration {@value}*/ + public static final String FS_AZURE_BLOB_DIR_RENAME_MAX_THREAD = "fs.azure.blob.dir.rename.max.thread"; + /**Maximum number of thread per blob-delete orchestration {@value}*/ + public static final String FS_AZURE_BLOB_DIR_DELETE_MAX_THREAD = "fs.azure.blob.dir.delete.max.thread"; private ConfigurationKeys() {} } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java index 4f498146ba895..c67331f4785fb 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java @@ -167,8 +167,13 @@ public final class FileSystemConfigurations { public static final int ZERO = 0; public static final int HUNDRED = 100; public static final long THOUSAND = 1000L; + public static final long DEFAULT_AZURE_BLOB_COPY_PROGRESS_WAIT_MILLIS = 1_000L; + public static final long + DEFAULT_AZURE_BLOB_ATOMIC_RENAME_LEASE_REFRESH_DURATION = 60_000L; + public static final int DEFAULT_FS_AZURE_PRODUCER_QUEUE_MAX_SIZE = 10000; + public static final int DEFAULT_FS_AZURE_BLOB_RENAME_THREAD = 5; + public static final int DEFAULT_FS_AZURE_BLOB_DELETE_THREAD = 5; public static final int BLOCK_ID_LENGTH = 60; - private FileSystemConfigurations() {} } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpHeaderConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpHeaderConfigurations.java index 312a6815e64a0..9f89b105bd426 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpHeaderConfigurations.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpHeaderConfigurations.java @@ -77,6 +77,9 @@ public final class HttpHeaderConfigurations { public static final String X_MS_RANGE_GET_CONTENT_MD5 = "x-ms-range-get-content-md5"; public static final String X_MS_BLOB_TYPE = "x-ms-blob-type"; public static final String X_MS_META_HDI_ISFOLDER = "x-ms-meta-hdi_isfolder"; + public static final String X_MS_COPY_ID = "x-ms-copy-id"; + public static final String X_MS_COPY_STATUS_DESCRIPTION = "x-ms-copy-status-description"; + public static final String X_MS_COPY_STATUS = "x-ms-copy-status"; public static final String X_MS_METADATA_PREFIX = "x-ms-meta-"; public static final String X_MS_COPY_SOURCE = "x-ms-copy-source"; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AzureServiceErrorCode.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AzureServiceErrorCode.java index 62ab1744aa060..b671f7cf34f2c 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AzureServiceErrorCode.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AzureServiceErrorCode.java @@ -34,6 +34,7 @@ public enum AzureServiceErrorCode { FILE_SYSTEM_ALREADY_EXISTS("FilesystemAlreadyExists", HttpURLConnection.HTTP_CONFLICT, null), PATH_ALREADY_EXISTS("PathAlreadyExists", HttpURLConnection.HTTP_CONFLICT, null), + BLOB_ALREADY_EXISTS("BlobAlreadyExists", HttpURLConnection.HTTP_CONFLICT, null), INTERNAL_OPERATION_ABORT("InternalOperationAbortError", HttpURLConnection.HTTP_CONFLICT, null), PATH_CONFLICT("PathConflict", HttpURLConnection.HTTP_CONFLICT, null), FILE_SYSTEM_NOT_FOUND("FilesystemNotFound", HttpURLConnection.HTTP_NOT_FOUND, null), @@ -44,6 +45,7 @@ public enum AzureServiceErrorCode { INVALID_SOURCE_OR_DESTINATION_RESOURCE_TYPE("InvalidSourceOrDestinationResourceType", HttpURLConnection.HTTP_CONFLICT, null), RENAME_DESTINATION_PARENT_PATH_NOT_FOUND("RenameDestinationParentPathNotFound", HttpURLConnection.HTTP_NOT_FOUND, null), INVALID_RENAME_SOURCE_PATH("InvalidRenameSourcePath", HttpURLConnection.HTTP_CONFLICT, null), + DIRECTORY_NOT_EMPTY_DELETE("DirectoryNotEmpty", HttpURLConnection.HTTP_CONFLICT, "The recursive query parameter value must be true to delete a non-empty directory"), INGRESS_OVER_ACCOUNT_LIMIT("ServerBusy", HttpURLConnection.HTTP_UNAVAILABLE, "Ingress is over the account limit."), EGRESS_OVER_ACCOUNT_LIMIT("ServerBusy", HttpURLConnection.HTTP_UNAVAILABLE, @@ -53,10 +55,13 @@ public enum AzureServiceErrorCode { OTHER_SERVER_THROTTLING("ServerBusy", HttpURLConnection.HTTP_UNAVAILABLE, "The server is currently unable to receive requests. Please retry your request."), INVALID_QUERY_PARAMETER_VALUE("InvalidQueryParameterValue", HttpURLConnection.HTTP_BAD_REQUEST, null), + INVALID_RENAME_DESTINATION("InvalidRenameDestinationPath", HttpURLConnection.HTTP_BAD_REQUEST, null), AUTHORIZATION_PERMISSION_MISS_MATCH("AuthorizationPermissionMismatch", HttpURLConnection.HTTP_FORBIDDEN, null), ACCOUNT_REQUIRES_HTTPS("AccountRequiresHttps", HttpURLConnection.HTTP_BAD_REQUEST, null), MD5_MISMATCH("Md5Mismatch", HttpURLConnection.HTTP_BAD_REQUEST, "The MD5 value specified in the request did not match with the MD5 value calculated by the server."), + COPY_BLOB_FAILED("COPY_BLOB_FAILED", HttpURLConnection.HTTP_INTERNAL_ERROR, null), + COPY_BLOB_ABORTED("COPY_BLOB_ABORTED", HttpURLConnection.HTTP_INTERNAL_ERROR, null), BLOB_OPERATION_NOT_SUPPORTED("BlobOperationNotSupported", HttpURLConnection.HTTP_CONFLICT, null), INVALID_APPEND_OPERATION("InvalidAppendOperation", HttpURLConnection.HTTP_CONFLICT, null), UNKNOWN(null, -1, null); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/enums/BlobCopyProgress.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/enums/BlobCopyProgress.java new file mode 100644 index 0000000000000..1da1fa017c5fc --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/enums/BlobCopyProgress.java @@ -0,0 +1,26 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.enums; + +public enum BlobCopyProgress { + SUCCESS, + FAILURE, + ABORTED, + PENDING; +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/extensions/SASTokenProvider.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/extensions/SASTokenProvider.java index 0af3130143119..ec8fb2a59af08 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/extensions/SASTokenProvider.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/extensions/SASTokenProvider.java @@ -50,6 +50,9 @@ public interface SASTokenProvider { String SET_PERMISSION_OPERATION = "set-permission"; String SET_PROPERTIES_OPERATION = "set-properties"; String WRITE_OPERATION = "write"; + String COPY_BLOB_DESTINATION = "copy-blob-dst"; + String COPY_BLOB_SOURCE = "copy-blob-src"; + String GET_BLOCK_LIST = "get-block-list"; String APPEND_BLOCK_OPERATION = "append-block"; /** diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java index 8e0dbe69624b8..0dda37fa781d4 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java @@ -34,20 +34,23 @@ import java.nio.charset.CharsetEncoder; import java.nio.charset.StandardCharsets; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; import java.util.Hashtable; import java.util.List; import java.util.Map; import java.util.TreeMap; import java.util.UUID; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.classification.VisibleForTesting; + import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; import org.apache.commons.io.IOUtils; -import org.apache.commons.lang3.NotImplementedException; -import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore; @@ -63,7 +66,6 @@ import org.apache.hadoop.fs.azurebfs.contracts.services.BlobListResultEntrySchema; import org.apache.hadoop.fs.azurebfs.contracts.services.BlobListResultSchema; import org.apache.hadoop.fs.azurebfs.contracts.services.BlobListXmlParser; -import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultEntrySchema; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultSchema; import org.apache.hadoop.fs.azurebfs.contracts.services.StorageErrorResponseSchema; import org.apache.hadoop.fs.azurebfs.extensions.EncryptionContextProvider; @@ -79,6 +81,7 @@ import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APPLICATION_JSON; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APPLICATION_OCTET_STREAM; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APPLICATION_XML; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.ATOMIC_DIR_RENAME_RECOVERY_ON_GET_PATH_EXCEPTION; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.BLOCK; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.BLOCKLIST; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.BLOCK_BLOB_TYPE; @@ -89,7 +92,6 @@ import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.DEFAULT_LEASE_BREAK_PERIOD; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.FORWARD_SLASH; -import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.GET_ACCESS_CONTROL; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_DELETE; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_GET; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_HEAD; @@ -143,17 +145,20 @@ import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_MAX_RESULTS; import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_PREFIX; import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_RESTYPE; +import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.PATH_EXISTS; import static java.net.HttpURLConnection.HTTP_CONFLICT; -import static java.net.HttpURLConnection.HTTP_NOT_FOUND; +import static org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.isKeyForDirectorySet; + import static java.net.HttpURLConnection.HTTP_OK; -import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.PATH_EXISTS; /** * AbfsClient interacting with Blob endpoint. */ public class AbfsBlobClient extends AbfsClient implements Closeable { + private final HashSet azureAtomicRenameDirSet; + public AbfsBlobClient(final URL baseUrl, final SharedKeyCredentials sharedKeyCredentials, final AbfsConfiguration abfsConfiguration, @@ -162,6 +167,8 @@ public AbfsBlobClient(final URL baseUrl, final AbfsClientContext abfsClientContext) throws IOException { super(baseUrl, sharedKeyCredentials, abfsConfiguration, tokenProvider, encryptionContextProvider, abfsClientContext); + this.azureAtomicRenameDirSet = new HashSet<>(Arrays.asList( + abfsConfiguration.getAzureAtomicRenameDirs().split(AbfsHttpConstants.COMMA))); } public AbfsBlobClient(final URL baseUrl, @@ -172,6 +179,8 @@ public AbfsBlobClient(final URL baseUrl, final AbfsClientContext abfsClientContext) throws IOException { super(baseUrl, sharedKeyCredentials, abfsConfiguration, sasTokenProvider, encryptionContextProvider, abfsClientContext); + this.azureAtomicRenameDirSet = new HashSet<>(Arrays.asList( + abfsConfiguration.getAzureAtomicRenameDirs().split(AbfsHttpConstants.COMMA))); } @Override @@ -555,7 +564,7 @@ public AbfsRestOperation appendBlock(final String path, @Override public AbfsRestOperation listPath(final String relativePath, final boolean recursive, final int listMaxResults, final String continuation, TracingContext tracingContext) - throws AzureBlobFileSystemException { + throws IOException { return listPath(relativePath, recursive, listMaxResults, continuation, tracingContext, true); } @@ -623,11 +632,14 @@ private boolean isEmptyListResults(AbfsHttpOperation result) { */ @Override public AbfsRestOperation acquireLease(final String path, final int duration, - TracingContext tracingContext) throws AzureBlobFileSystemException { + final String eTag, TracingContext tracingContext) throws AzureBlobFileSystemException { final List requestHeaders = createDefaultHeaders(); requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ACTION, ACQUIRE_LEASE_ACTION)); requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_DURATION, Integer.toString(duration))); requestHeaders.add(new AbfsHttpHeader(X_MS_PROPOSED_LEASE_ID, UUID.randomUUID().toString())); + if (StringUtils.isNotEmpty(eTag)) { + requestHeaders.add(new AbfsHttpHeader(IF_MATCH, eTag)); + } final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); abfsUriQueryBuilder.addQuery(QUERY_PARAM_COMP, LEASE); @@ -718,7 +730,15 @@ public AbfsRestOperation breakLease(final String path, } /** - * Get Rest Operation for API. + * Rename a file or directory. + * If a source etag is passed in, the operation will attempt to recover + * from a missing source file by probing the destination for + * existence and comparing etags. + * The second value in the result will be true to indicate that this + * took place. + * As rename recovery is only attempted if the source etag is non-empty, + * in normal rename operations rename recovery will never happen. + * * @param source path to source file * @param destination destination of rename. * @param continuation continuation. @@ -727,20 +747,36 @@ public AbfsRestOperation breakLease(final String path, * @param isMetadataIncompleteState was there a rename failure due to * incomplete metadata state? * @param isNamespaceEnabled whether namespace enabled account or not - * @return - * @throws IOException + * + * @return AbfsClientRenameResult result of rename operation indicating the + * AbfsRest operation, rename recovery and incomplete metadata state failure. + * + * @throws AzureBlobFileSystemException failure, excluding any recovery from overload failures. */ @Override public AbfsClientRenameResult renamePath(final String source, final String destination, final String continuation, final TracingContext tracingContext, + String sourceEtag, + boolean isMetadataIncompleteState, + boolean isNamespaceEnabled) + throws IOException { + BlobRenameHandler blobRenameHandler = getBlobRenameHandler(source, + destination, sourceEtag, isAtomicRenameKey(source), tracingContext + ); + incrementAbfsRenamePath(); + return blobRenameHandler.execute(); + } + + @VisibleForTesting + BlobRenameHandler getBlobRenameHandler(final String source, + final String destination, final String sourceEtag, - final boolean isMetadataIncompleteState, - final boolean isNamespaceEnabled) throws IOException { - // Todo: To be implemented as part of rename-delete over blob endpoint work. - // This should redirect to rename handler to be implemented. - throw new NotImplementedException("Rename operation on Blob endpoint will be implemented in future."); + final boolean isAtomicRename, + final TracingContext tracingContext) { + return new BlobRenameHandler(source, + destination, this, sourceEtag, isAtomicRename, false, tracingContext); } /** @@ -1082,11 +1118,18 @@ public AbfsRestOperation getPathStatus(final String path, public AbfsRestOperation deletePath(final String path, final boolean recursive, final String continuation, - TracingContext tracingContext, + final TracingContext tracingContext, final boolean isNamespaceEnabled) throws AzureBlobFileSystemException { - // Todo: To be implemented as part of rename-delete over blob endpoint work. - // This should redirect to delete handler to be implemented. - throw new NotImplementedException("Delete operation on Blob endpoint will be implemented in future."); + getBlobDeleteHandler(path, recursive, tracingContext).execute(); + return null; + } + + @VisibleForTesting + public BlobDeleteHandler getBlobDeleteHandler(final String path, + final boolean recursive, + final TracingContext tracingContext) { + return new BlobDeleteHandler(new Path(path), recursive, this, + tracingContext); } @Override @@ -1230,7 +1273,7 @@ public AbfsRestOperation copyBlob(Path sourceBlobPath, final AbfsRestOperation op = getAbfsRestOperation(AbfsRestOperationType.CopyBlob, HTTP_METHOD_PUT, url, requestHeaders); - + op.execute(tracingContext); return op; } @@ -1386,6 +1429,106 @@ public String decodeAttribute(byte[] value) throws UnsupportedEncodingException return new String(value, XMS_PROPERTIES_ENCODING_UNICODE); } + public boolean isAtomicRenameKey(String key) { + return isKeyForDirectorySet(key, azureAtomicRenameDirSet); + } + + @Override + public void takeGetPathStatusAtomicRenameKeyAction(final Path path, + final TracingContext tracingContext) throws IOException { + if (path == null || path.isRoot() || !isAtomicRenameKey(path.toUri().getPath())) { + return; + } + AbfsRestOperation pendingJsonFileStatus; + Path pendingJsonPath = new Path(path.getParent(), + path.toUri().getPath() + RenameAtomicity.SUFFIX); + try { + pendingJsonFileStatus = getPathStatus( + pendingJsonPath.toUri().getPath(), tracingContext, null, false); + if (checkIsDir(pendingJsonFileStatus.getResult())) { + return; + } + } catch (AbfsRestOperationException ex) { + if (ex.getStatusCode() == HttpURLConnection.HTTP_NOT_FOUND) { + return; + } + throw ex; + } + + boolean renameSrcHasChanged; + try { + RenameAtomicity renameAtomicity = getRedoRenameAtomicity( + pendingJsonPath, Integer.parseInt(pendingJsonFileStatus.getResult() + .getResponseHeader(HttpHeaderConfigurations.CONTENT_LENGTH)), + tracingContext); + renameAtomicity.redo(); + renameSrcHasChanged = false; + } catch (AbfsRestOperationException ex) { + /* + * At this point, the source marked by the renamePending json file, might have + * already got renamed by some parallel thread, or at this point, the path + * would have got modified which would result in eTag change, which would lead + * to a HTTP_CONFLICT. In this case, no more operation needs to be taken, and + * the calling getPathStatus can return this source path as result. + */ + if (ex.getStatusCode() == HttpURLConnection.HTTP_NOT_FOUND + || ex.getStatusCode() == HttpURLConnection.HTTP_CONFLICT) { + renameSrcHasChanged = true; + } else { + throw ex; + } + } + if (!renameSrcHasChanged) { + throw new AbfsRestOperationException( + AzureServiceErrorCode.PATH_NOT_FOUND.getStatusCode(), + AzureServiceErrorCode.PATH_NOT_FOUND.getErrorCode(), + ATOMIC_DIR_RENAME_RECOVERY_ON_GET_PATH_EXCEPTION, + null); + } + } + + @Override + public boolean takeListPathAtomicRenameKeyAction(final Path path, + final int renamePendingJsonLen, final TracingContext tracingContext) + throws IOException { + if (path == null || path.isRoot() || !isAtomicRenameKey( + path.toUri().getPath()) || !path.toUri() + .getPath() + .endsWith(RenameAtomicity.SUFFIX)) { + return false; + } + try { + RenameAtomicity renameAtomicity + = getRedoRenameAtomicity(path, renamePendingJsonLen, tracingContext); + renameAtomicity.redo(); + } catch (AbfsRestOperationException ex) { + /* + * At this point, the source marked by the renamePending json file, might have + * already got renamed by some parallel thread, or at this point, the path + * would have got modified which would result in eTag change, which would lead + * to a HTTP_CONFLICT. In this case, no more operation needs to be taken, but + * since this is a renamePendingJson file and would be deleted by the redo operation, + * the calling listPath should not return this json path as result. + */ + if (ex.getStatusCode() != HttpURLConnection.HTTP_NOT_FOUND + && ex.getStatusCode() != HttpURLConnection.HTTP_CONFLICT) { + throw ex; + } + } + return true; + } + + @VisibleForTesting + RenameAtomicity getRedoRenameAtomicity(final Path path, int fileLen, + final TracingContext tracingContext) { + RenameAtomicity renameAtomicity = new RenameAtomicity(path, + fileLen, + tracingContext, + null, + this); + return renameAtomicity; + } + /** * Checks if the value contains pure ASCII characters or not. * @param value diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index 38d9c68b639f2..4fdcc3cc210fb 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -43,8 +43,8 @@ import java.util.concurrent.atomic.AtomicBoolean; import org.apache.hadoop.classification.VisibleForTesting; + import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore; import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsInvalidChecksumException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsDriverException; @@ -453,6 +453,7 @@ public abstract AbfsRestOperation createPath(final String path, public abstract AbfsRestOperation acquireLease(final String path, final int duration, + final String eTag, TracingContext tracingContext) throws AzureBlobFileSystemException; public abstract AbfsRestOperation renewLease(final String path, @@ -1221,6 +1222,31 @@ public boolean isMetricCollectionEnabled() { return isMetricCollectionEnabled; } + /** + * Action to be taken when atomic-key is present on a getPathStatus path. + * + * @param path path of the pendingJson for the atomic path. + * @param tracingContext tracing context. + * + * @throws IOException server error or the path is renamePending json file and action is taken. + */ + public abstract void takeGetPathStatusAtomicRenameKeyAction(final Path path, + final TracingContext tracingContext) throws IOException; + + /** + * Action to be taken when a pendingJson is child of an atomic-key listing. + * + * @param path path of the pendingJson for the atomic path. + * @param renamePendingJsonLen length of the json file + * @param tracingContext tracing context. + * + * @return if path is atomicRenameJson and action is taken. + * + * @throws IOException server error + */ + public abstract boolean takeListPathAtomicRenameKeyAction(final Path path, + final int renamePendingJsonLen, final TracingContext tracingContext) throws IOException; + class TimerTaskImpl extends TimerTask { TimerTaskImpl() { runningTimerTask = this; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java index c0b0ca3cd22eb..5653ec109bf57 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java @@ -64,9 +64,67 @@ import static org.apache.commons.lang3.StringUtils.isEmpty; import static org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.extractEtagHeader; -import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.*; -import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.*; -import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.*; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.ACQUIRE_LEASE_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APPEND_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APPEND_BLOB_TYPE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APPLICATION_JSON; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APPLICATION_OCTET_STREAM; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.BREAK_LEASE_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CHECK_ACCESS; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.COMMA; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.DEFAULT_LEASE_BREAK_PERIOD; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.DIRECTORY; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.FILE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.FILESYSTEM; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.FLUSH_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.FORWARD_SLASH; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.GET_ACCESS_CONTROL; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.GET_STATUS; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_DELETE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_GET; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_HEAD; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PATCH; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_POST; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HUNDRED_CONTINUE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.RELEASE_LEASE_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.RENEW_LEASE_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.SET_ACCESS_CONTROL; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.SET_PROPERTIES_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.SINGLE_WHITE_SPACE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.STAR; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.TRUE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.XMS_PROPERTIES_ENCODING_ASCII; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.ACCEPT; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.EXPECT; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.IF_MATCH; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.IF_NONE_MATCH; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.RANGE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.USER_AGENT; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_HTTP_METHOD_OVERRIDE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_EXISTING_RESOURCE_TYPE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_LEASE_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_LEASE_BREAK_PERIOD; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_LEASE_DURATION; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_LEASE_ID; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_PROPERTIES; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_PROPOSED_LEASE_ID; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_RANGE_GET_CONTENT_MD5; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_RENAME_SOURCE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_FS_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_BLOBTYPE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_CLOSE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_CONTINUATION; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_DIRECTORY; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_FLUSH; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_MAXRESULTS; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_PAGINATED; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_POSITION; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_RECURSIVE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_RESOURCE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_RETAIN_UNCOMMITTED_DATA; import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.RENAME_DESTINATION_PARENT_PATH_NOT_FOUND; import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.SOURCE_PATH_NOT_FOUND; @@ -111,7 +169,7 @@ public List createDefaultHeaders() { * @return default request headers */ @Override - public List createDefaultHeaders(ApiVersion xMsVersion) { + public List createDefaultHeaders(AbfsHttpConstants.ApiVersion xMsVersion) { List requestHeaders = super.createCommonHeaders(xMsVersion); requestHeaders.add(new AbfsHttpHeader(ACCEPT, APPLICATION_JSON + COMMA + SINGLE_WHITE_SPACE + APPLICATION_OCTET_STREAM)); @@ -354,7 +412,7 @@ public AbfsRestOperation createPath(final String path, */ @Override public AbfsRestOperation acquireLease(final String path, final int duration, - TracingContext tracingContext) throws AzureBlobFileSystemException { + final String eTag, TracingContext tracingContext) throws AzureBlobFileSystemException { final List requestHeaders = createDefaultHeaders(); requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ACTION, ACQUIRE_LEASE_ACTION)); requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_DURATION, Integer.toString(duration))); @@ -449,6 +507,30 @@ public AbfsRestOperation breakLease(final String path, TracingContext tracingCon return op; } + /** + * Rename a file or directory. + * If a source etag is passed in, the operation will attempt to recover + * from a missing source file by probing the destination for + * existence and comparing etags. + * The second value in the result will be true to indicate that this + * took place. + * As rename recovery is only attempted if the source etag is non-empty, + * in normal rename operations rename recovery will never happen. + * + * @param source path to source file + * @param destination destination of rename. + * @param continuation continuation. + * @param tracingContext trace context + * @param sourceEtag etag of source file. may be null or empty + * @param isMetadataIncompleteState was there a rename failure due to + * incomplete metadata state? + * @param isNamespaceEnabled whether namespace enabled account or not + * + * @return AbfsClientRenameResult result of rename operation indicating the + * AbfsRest operation, rename recovery and incomplete metadata state failure. + * + * @throws AzureBlobFileSystemException failure, excluding any recovery from overload failures. + */ @Override public AbfsClientRenameResult renamePath( final String source, @@ -924,8 +1006,8 @@ public AbfsRestOperation deletePath(final String path, final boolean recursive, */ final List requestHeaders = (isPaginatedDelete(recursive, isNamespaceEnabled) && xMsVersion.compareTo( - ApiVersion.AUG_03_2023) < 0) - ? createDefaultHeaders(ApiVersion.AUG_03_2023) + AbfsHttpConstants.ApiVersion.AUG_03_2023) < 0) + ? createDefaultHeaders(AbfsHttpConstants.ApiVersion.AUG_03_2023) : createDefaultHeaders(); final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); @@ -1247,6 +1329,18 @@ public String decodeAttribute(byte[] value) throws UnsupportedEncodingException return new String(value, XMS_PROPERTIES_ENCODING_ASCII); } + @Override + public void takeGetPathStatusAtomicRenameKeyAction(final Path path, + final TracingContext tracingContext) throws IOException { + + } + + @Override + public boolean takeListPathAtomicRenameKeyAction(final Path path, + final int renamePendingJsonLen, final TracingContext tracingContext) throws IOException { + return false; + } + private String convertXmsPropertiesToCommaSeparatedString(final Map properties) throws CharacterCodingException { StringBuilder commaSeparatedProperties = new StringBuilder(); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsLease.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsLease.java index 4c50b77860941..739d4072d1eaf 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsLease.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsLease.java @@ -19,6 +19,8 @@ package org.apache.hadoop.fs.azurebfs.services; import java.io.IOException; +import java.util.Timer; +import java.util.TimerTask; import java.util.concurrent.TimeUnit; import org.apache.hadoop.classification.VisibleForTesting; @@ -70,6 +72,10 @@ public final class AbfsLease { private volatile Throwable exception = null; private volatile int acquireRetryCount = 0; private volatile ListenableScheduledFuture future = null; + private final long leaseRefreshDuration; + private final Timer timer; + private LeaseTimerTask leaseTimerTask; + private final boolean isAsync; public static class LeaseException extends AzureBlobFileSystemException { public LeaseException(Throwable t) { @@ -81,27 +87,35 @@ public LeaseException(String s) { } } - public AbfsLease(AbfsClient client, String path, TracingContext tracingContext) throws AzureBlobFileSystemException { - this(client, path, DEFAULT_LEASE_ACQUIRE_MAX_RETRIES, - DEFAULT_LEASE_ACQUIRE_RETRY_INTERVAL, tracingContext); + public AbfsLease(AbfsClient client, String path, + final boolean isAsync, final long leaseRefreshDuration, + final String eTag, TracingContext tracingContext) throws AzureBlobFileSystemException { + this(client, path, isAsync, DEFAULT_LEASE_ACQUIRE_MAX_RETRIES, + DEFAULT_LEASE_ACQUIRE_RETRY_INTERVAL, leaseRefreshDuration, eTag, tracingContext); } @VisibleForTesting - public AbfsLease(AbfsClient client, String path, int acquireMaxRetries, - int acquireRetryInterval, TracingContext tracingContext) throws AzureBlobFileSystemException { + public AbfsLease(AbfsClient client, String path, final boolean isAsync, int acquireMaxRetries, + int acquireRetryInterval, final long leaseRefreshDuration, + final String eTag, + TracingContext tracingContext) throws AzureBlobFileSystemException { this.leaseFreed = false; this.client = client; this.path = path; this.tracingContext = tracingContext; + this.leaseRefreshDuration = leaseRefreshDuration; + this.isAsync = isAsync; - if (client.getNumLeaseThreads() < 1) { + if (isAsync && client.getNumLeaseThreads() < 1) { throw new LeaseException(ERR_NO_LEASE_THREADS); } // Try to get the lease a specified number of times, else throw an error RetryPolicy retryPolicy = RetryPolicies.retryUpToMaximumCountWithFixedSleep( acquireMaxRetries, acquireRetryInterval, TimeUnit.SECONDS); - acquireLease(retryPolicy, 0, acquireRetryInterval, 0, + this.timer = new Timer( + String.format("lease-refresh-timer-%s", path), true); + acquireLease(retryPolicy, 0, acquireRetryInterval, 0, eTag, new TracingContext(tracingContext)); while (leaseID == null && exception == null) { @@ -121,19 +135,22 @@ public AbfsLease(AbfsClient client, String path, int acquireMaxRetries, } private void acquireLease(RetryPolicy retryPolicy, int numRetries, - int retryInterval, long delay, TracingContext tracingContext) + int retryInterval, long delay, final String eTag, TracingContext tracingContext) throws LeaseException { LOG.debug("Attempting to acquire lease on {}, retry {}", path, numRetries); if (future != null && !future.isDone()) { throw new LeaseException(ERR_LEASE_FUTURE_EXISTS); } - future = client.schedule(() -> client.acquireLease(path, - INFINITE_LEASE_DURATION, tracingContext), - delay, TimeUnit.SECONDS); - client.addCallback(future, new FutureCallback() { + FutureCallback acquireCallback = new FutureCallback() { @Override public void onSuccess(@Nullable AbfsRestOperation op) { leaseID = op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_LEASE_ID); + if (leaseRefreshDuration != INFINITE_LEASE_DURATION) { + leaseTimerTask = new LeaseTimerTask(client, path, + leaseID, tracingContext); + timer.scheduleAtFixedRate(leaseTimerTask, leaseRefreshDuration / 2, + leaseRefreshDuration / 2); + } LOG.debug("Acquired lease {} on {}", leaseID, path); } @@ -145,7 +162,7 @@ public void onFailure(Throwable throwable) { LOG.debug("Failed to acquire lease on {}, retrying: {}", path, throwable); acquireRetryCount++; acquireLease(retryPolicy, numRetries + 1, retryInterval, - retryInterval, tracingContext); + retryInterval, eTag, tracingContext); } else { exception = throwable; } @@ -153,7 +170,21 @@ public void onFailure(Throwable throwable) { exception = throwable; } } - }); + }; + if (!isAsync) { + try { + AbfsRestOperation op = client.acquireLease(path, + INFINITE_LEASE_DURATION, eTag, tracingContext); + acquireCallback.onSuccess(op); + return; + } catch (AzureBlobFileSystemException ex) { + acquireCallback.onFailure(ex); + } + } + future = client.schedule(() -> client.acquireLease(path, + INFINITE_LEASE_DURATION, eTag, tracingContext), + delay, TimeUnit.SECONDS); + client.addCallback(future, acquireCallback); } /** @@ -170,6 +201,7 @@ public void free() { if (future != null && !future.isDone()) { future.cancel(true); } + cancelTimer(); TracingContext tracingContext = new TracingContext(this.tracingContext); tracingContext.setOperation(FSOperationType.RELEASE_LEASE); client.releaseLease(path, leaseID, tracingContext); @@ -184,6 +216,13 @@ public void free() { } } + public void cancelTimer() { + if (leaseTimerTask != null) { + leaseTimerTask.cancel(); + } + timer.purge(); + } + public boolean isFreed() { return leaseFreed; } @@ -201,4 +240,27 @@ public int getAcquireRetryCount() { public TracingContext getTracingContext() { return tracingContext; } + + private static class LeaseTimerTask extends TimerTask { + private final AbfsClient client; + private final String path; + private final String leaseID; + private final TracingContext tracingContext; + + LeaseTimerTask(AbfsClient client, String path, String leaseID, TracingContext tracingContext) { + this.client = client; + this.path = path; + this.leaseID = leaseID; + this.tracingContext = tracingContext; + } + + @Override + public void run() { + try { + client.renewLease(path, leaseID, tracingContext); + } catch (Exception e) { + LOG.error("Failed to renew lease on {}", path, e); + } + } + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/BlobDeleteHandler.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/BlobDeleteHandler.java new file mode 100644 index 0000000000000..ca21656c795a6 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/BlobDeleteHandler.java @@ -0,0 +1,184 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.util.concurrent.atomic.AtomicInteger; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathIOException; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; + +import static java.net.HttpURLConnection.HTTP_CONFLICT; +import static java.net.HttpURLConnection.HTTP_NOT_FOUND; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.DIRECTORY_NOT_EMPTY_DELETE; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.PATH_NOT_FOUND; + +/** + * Orchestrator for delete over Blob endpoint. Blob endpoint for flat-namespace + * account does not support director delete. This class is responsible for + * deleting the blobs and creating the parent directory marker file if needed. + */ +public class BlobDeleteHandler extends ListActionTaker { + + private static final Logger LOG = LoggerFactory.getLogger( + AzureBlobFileSystemStore.class); + + private final Path path; + + private final boolean recursive; + + private boolean nonRecursiveDeleteDirectoryFailed = false; + + private final TracingContext tracingContext; + + private final AtomicInteger deleteCount = new AtomicInteger(0); + + + public BlobDeleteHandler(final Path path, + final boolean recursive, + final AbfsBlobClient abfsBlobClient, + final TracingContext tracingContext) { + super(path, abfsBlobClient, tracingContext); + this.path = path; + this.recursive = recursive; + this.tracingContext = tracingContext; + } + + @Override + int getMaxConsumptionParallelism() { + return abfsClient.getAbfsConfiguration() + .getBlobDeleteDirConsumptionParallelism(); + } + + private boolean deleteInternal(final Path path) + throws AzureBlobFileSystemException { + abfsClient.deleteBlobPath(path, null, tracingContext); + deleteCount.incrementAndGet(); + return true; + } + + /** + * Orchestrate the delete operation. + * + * @return true if the delete operation is successful. + * @throws IOException if deletion fails due to server error or path doesn't exist. + */ + public boolean execute() throws AzureBlobFileSystemException { + /* + * ABFS is not aware if it's a file or directory. So, we need to list the + * path and delete the listed objects. The listing returns the children of + * the path and not the path itself. + */ + listRecursiveAndTakeAction(); + if (nonRecursiveDeleteDirectoryFailed) { + throw new AbfsRestOperationException(HTTP_CONFLICT, + DIRECTORY_NOT_EMPTY_DELETE.getErrorCode(), + DIRECTORY_NOT_EMPTY_DELETE.getErrorMessage(), + new PathIOException(path.toString(), + "Non-recursive delete of non-empty directory")); + } + tracingContext.setOperatedBlobCount(deleteCount.get() + 1); + /* + * If path is actually deleted. + */ + boolean deleted; + try { + /* + * Delete the required path. + * Directory needs to be safely delete the path, as the path can be implicit. + */ + deleted = recursive ? safeDelete(path) : deleteInternal(path); + } finally { + tracingContext.setOperatedBlobCount(null); + } + if (deleteCount.get() == 0) { + /* + * DeleteCount can be zero only if the path does not exist. + */ + throw new AbfsRestOperationException(HTTP_NOT_FOUND, + PATH_NOT_FOUND.getErrorCode(), PATH_NOT_FOUND.getErrorMessage(), + new PathIOException(path.toString(), "Path not found")); + } + + /* + * Ensure that parent directory of the deleted path is marked as a folder. This + * is required if the parent is an implicit directory (path with no marker blob), + * and the given path is the only child of the parent, the parent would become + * non-existing. + */ + if (deleted) { + ensurePathParentExist(); + } + return deleted; + } + + private void ensurePathParentExist() + throws AzureBlobFileSystemException { + if (!path.isRoot() && !path.getParent().isRoot()) { + try { + abfsClient.createPath(path.getParent().toUri().getPath(), false, false, + null, + false, null, null, tracingContext, false); + } catch (AbfsRestOperationException ex) { + if (ex.getStatusCode() != HTTP_CONFLICT) { + throw ex; + } + } + } + } + + /**{@inheritDoc}*/ + @Override + boolean takeAction(final Path path) throws AzureBlobFileSystemException { + if (!recursive) { + /* + * If the delete operation is non-recursive, then the path can not be a directory. + */ + nonRecursiveDeleteDirectoryFailed = true; + return false; + } + return safeDelete(path); + } + + /** + * Delete the path if it exists. Gracefully handles the case where the path does not exist. + * + * @param path path to delete. + * @return true if the path is deleted or is not found. + * @throws AzureBlobFileSystemException server error. + */ + private boolean safeDelete(final Path path) + throws AzureBlobFileSystemException { + try { + return deleteInternal(path); + } catch (AbfsRestOperationException ex) { + if (ex.getStatusCode() == HTTP_NOT_FOUND) { + return true; + } + throw ex; + } + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/BlobRenameHandler.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/BlobRenameHandler.java new file mode 100644 index 0000000000000..bbaea0a5908ea --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/BlobRenameHandler.java @@ -0,0 +1,539 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.net.HttpURLConnection; +import java.net.MalformedURLException; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathIOException; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; +import org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode; +import org.apache.hadoop.fs.azurebfs.enums.BlobCopyProgress; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; + +import static org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.extractEtagHeader; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.COPY_STATUS_ABORTED; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.COPY_STATUS_FAILED; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.COPY_STATUS_SUCCESS; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.ROOT_PATH; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_COPY_ID; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_COPY_SOURCE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_COPY_STATUS; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_COPY_STATUS_DESCRIPTION; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.COPY_BLOB_ABORTED; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.COPY_BLOB_FAILED; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.RENAME_DESTINATION_PARENT_PATH_NOT_FOUND; + +/** + * Orchestrator for rename over Blob endpoint. Handles both directory and file + * renames. Blob Endpoint does not expose rename API, this class is responsible + * for copying the blobs and deleting the source blobs. + * + * For directory rename, it recursively lists the blobs in the source directory and + * copies them to the destination directory. + */ +public class BlobRenameHandler extends ListActionTaker { + + public static final Logger LOG = LoggerFactory.getLogger(AbfsClient.class); + + private final String srcEtag; + + private final Path src, dst; + + private final boolean isAtomicRename, isAtomicRenameRecovery; + + private final TracingContext tracingContext; + + private AbfsLease srcAbfsLease; + + private String srcLeaseId; + + private final List leases = new ArrayList<>(); + + private final AtomicInteger operatedBlobCount = new AtomicInteger(0); + + public BlobRenameHandler(final String src, + final String dst, + final AbfsBlobClient abfsClient, + final String srcEtag, + final boolean isAtomicRename, + final boolean isAtomicRenameRecovery, + final TracingContext tracingContext) { + super(new Path(src), abfsClient, tracingContext); + this.srcEtag = srcEtag; + this.tracingContext = tracingContext; + this.src = new Path(src); + this.dst = new Path(dst); + this.isAtomicRename = isAtomicRename; + this.isAtomicRenameRecovery = isAtomicRenameRecovery; + } + + @Override + int getMaxConsumptionParallelism() { + return abfsClient.getAbfsConfiguration() + .getBlobRenameDirConsumptionParallelism(); + } + + /** + * Orchestrates the rename operation. + */ + public AbfsClientRenameResult execute() throws IOException { + PathInformation pathInformation = new PathInformation(); + boolean result = false; + if (preCheck(src, dst, pathInformation)) { + RenameAtomicity renameAtomicity = null; + try { + if (isAtomicRename) { + /* + * Conditionally get a lease on the source blob to prevent other writers + * from changing it. This is used for correctness in HBase when log files + * are renamed. When the HBase master renames a log file folder, the lease + * locks out other writers. This prevents a region server that the master + * thinks is dead, but is still alive, from committing additional updates. + * This is different than when HBase runs on HDFS, where the region server + * recovers the lease on a log file, to gain exclusive access to it, before + * it splits it. + */ + srcAbfsLease = takeLease(src, srcEtag); + srcLeaseId = srcAbfsLease.getLeaseID(); + if (!isAtomicRenameRecovery && pathInformation.getIsDirectory()) { + /* + * if it is not a resume of a previous failed atomic rename operation, + * perform the pre-rename operation. + */ + renameAtomicity = getRenameAtomicity(pathInformation); + renameAtomicity.preRename(); + } + } + if (pathInformation.getIsDirectory()) { + result = listRecursiveAndTakeAction() && finalSrcRename(); + } else { + result = renameInternal(src, dst); + } + } finally { + if (srcAbfsLease != null) { + // If the operation is successful, cancel the timer and no need to release + // the lease as delete on the blob-path has taken place. + if (result) { + srcAbfsLease.cancelTimer(); + } else { + srcAbfsLease.free(); + } + } + } + if (result && renameAtomicity != null) { + renameAtomicity.postRename(); + } + return new AbfsClientRenameResult(null, result, false); + } else { + return new AbfsClientRenameResult(null, false, false); + } + } + + private boolean finalSrcRename() throws IOException { + tracingContext.setOperatedBlobCount(operatedBlobCount.get() + 1); + try { + return renameInternal(src, dst); + } finally { + tracingContext.setOperatedBlobCount(null); + } + } + + @VisibleForTesting + public RenameAtomicity getRenameAtomicity(final PathInformation pathInformation) + throws IOException { + return new RenameAtomicity(src, + dst, + new Path(src.getParent(), src.getName() + RenameAtomicity.SUFFIX), + tracingContext, + pathInformation.getETag(), + abfsClient); + } + + private AbfsLease takeLease(final Path path, final String eTag) + throws AzureBlobFileSystemException { + AbfsLease lease = new AbfsLease(abfsClient, path.toUri().getPath(), false, + abfsClient.getAbfsConfiguration() + .getAtomicRenameLeaseRefreshDuration(), + eTag, tracingContext); + leases.add(lease); + return lease; + } + + private boolean containsColon(Path p) { + return p.toUri().getPath().contains(":"); + } + + /** + * Since, server doesn't have a rename API and would not be able to check HDFS + * contracts, client would have to ensure that no HDFS contract is violated. + * + * @param src source path + * @param dst destination path + * @param pathInformation object in which path information of the source path would be stored + * + * @return true if the pre-checks pass + * @throws AzureBlobFileSystemException if server call fails or given paths are invalid. + */ + private boolean preCheck(final Path src, final Path dst, + final PathInformation pathInformation) + throws AzureBlobFileSystemException { + validateDestinationPath(src, dst); + + setSrcPathInformation(src, pathInformation); + validateSourcePath(pathInformation); + validateDestinationPathNotExist(src, dst, pathInformation); + validateDestinationParentExist(src, dst, pathInformation); + + return true; + } + + /** + * Validate if the format of the destination path is correct and if the destination + * path is not a sub-directory of the source path. + * + * @param src source path + * @param dst destination path + * + * @throws AbfsRestOperationException if the destination path is invalid + */ + private void validateDestinationPath(final Path src, final Path dst) + throws AbfsRestOperationException { + if (containsColon(dst)) { + throw new AbfsRestOperationException( + HttpURLConnection.HTTP_BAD_REQUEST, + AzureServiceErrorCode.INVALID_RENAME_DESTINATION.getErrorCode(), null, + new PathIOException(dst.toUri().getPath(), + "Destination path contains colon")); + } + + validateDestinationIsNotSubDir(src, dst); + } + + /** + * Validate if the destination path is not a sub-directory of the source path. + * + * @param src source path + * @param dst destination path + */ + private void validateDestinationIsNotSubDir(final Path src, + final Path dst) throws AbfsRestOperationException { + LOG.debug("Check if the destination is subDirectory"); + Path nestedDstParent = dst.getParent(); + if (nestedDstParent != null && nestedDstParent.toUri() + .getPath() + .indexOf(src.toUri().getPath()) == 0) { + LOG.info("Rename src: {} dst: {} failed as dst is subDir of src", + src, dst); + throw new AbfsRestOperationException(HttpURLConnection.HTTP_CONFLICT, + AzureServiceErrorCode.INVALID_RENAME_SOURCE_PATH.getErrorCode(), + AzureServiceErrorCode.INVALID_RENAME_SOURCE_PATH.getErrorMessage(), + new Exception( + AzureServiceErrorCode.INVALID_RENAME_SOURCE_PATH.getErrorCode())); + } + } + + private void setSrcPathInformation(final Path src, + final PathInformation pathInformation) + throws AzureBlobFileSystemException { + pathInformation.copy(getPathInformation(src, tracingContext)); + } + + /** + * Validate if the source path exists and if the client knows the ETag of the source path, + * then the ETag should match with the server. + * + * @param pathInformation object containing the path information of the source path + * + * @throws AbfsRestOperationException if the source path is not found or if the ETag of the source + * path does not match with the server. + */ + private void validateSourcePath(final PathInformation pathInformation) + throws AzureBlobFileSystemException { + if (!pathInformation.getPathExists()) { + throw new AbfsRestOperationException( + HttpURLConnection.HTTP_NOT_FOUND, + AzureServiceErrorCode.SOURCE_PATH_NOT_FOUND.getErrorCode(), null, + new Exception( + AzureServiceErrorCode.SOURCE_PATH_NOT_FOUND.getErrorCode())); + } + if (srcEtag != null && !srcEtag.equals(pathInformation.getETag())) { + throw new AbfsRestOperationException( + HttpURLConnection.HTTP_CONFLICT, + AzureServiceErrorCode.PATH_ALREADY_EXISTS.getErrorCode(), null, + new Exception( + AzureServiceErrorCode.PATH_ALREADY_EXISTS.getErrorCode())); + } + } + + private void validateDestinationPathNotExist(final Path src, + final Path dst, + final PathInformation pathInformation) + throws AzureBlobFileSystemException { + /* + * Destination path name can be same to that of source path name only in the + * case of a directory rename. + * + * In case the directory is being renamed to some other name, the destination + * check would happen on the AzureBlobFileSystem#rename method. + */ + if (pathInformation.getIsDirectory() && dst.getName() + .equals(src.getName())) { + PathInformation dstPathInformation = getPathInformation( + dst, + tracingContext); + if (dstPathInformation.getPathExists()) { + LOG.info( + "Rename src: {} dst: {} failed as qualifiedDst already exists", + src, dst); + throw new AbfsRestOperationException( + HttpURLConnection.HTTP_CONFLICT, + AzureServiceErrorCode.PATH_ALREADY_EXISTS.getErrorCode(), null, + null); + } + } + } + + private void validateDestinationParentExist(final Path src, + final Path dst, + final PathInformation pathInformation) + throws AzureBlobFileSystemException { + final Path nestedDstParent = dst.getParent(); + if (!dst.isRoot() && nestedDstParent != null && !nestedDstParent.isRoot() + && ( + !pathInformation.getIsDirectory() || !dst.getName() + .equals(src.getName()))) { + PathInformation nestedDstInfo = getPathInformation( + nestedDstParent, + tracingContext); + if (!nestedDstInfo.getPathExists() || !nestedDstInfo.getIsDirectory()) { + throw new AbfsRestOperationException( + HttpURLConnection.HTTP_NOT_FOUND, + RENAME_DESTINATION_PARENT_PATH_NOT_FOUND.getErrorCode(), null, + new Exception( + RENAME_DESTINATION_PARENT_PATH_NOT_FOUND.getErrorCode())); + } + } + } + + @Override + boolean takeAction(final Path path) throws AzureBlobFileSystemException { + return renameInternal(path, + createDestinationPathForBlobPartOfRenameSrcDir(dst, path, src)); + } + + private boolean renameInternal(final Path path, + final Path destinationPathForBlobPartOfRenameSrcDir) + throws AzureBlobFileSystemException { + final String leaseId; + AbfsLease abfsLease = null; + if (isAtomicRename) { + /* + * To maintain atomicity of rename of the path, lease is taken on the path. + */ + if (path.equals(src)) { + abfsLease = srcAbfsLease; + leaseId = srcLeaseId; + } else { + abfsLease = takeLease(path, null); + leaseId = abfsLease.getLeaseID(); + } + } else { + leaseId = null; + } + boolean operated = false; + try { + copyPath(path, destinationPathForBlobPartOfRenameSrcDir, leaseId); + abfsClient.deleteBlobPath(path, leaseId, tracingContext); + operated = true; + } finally { + if (abfsLease != null) { + // If the operation is successful, cancel the timer and no need to release + // the lease as delete on the blob-path has taken place. + if (operated) { + abfsLease.cancelTimer(); + } else { + abfsLease.free(); + } + } + } + operatedBlobCount.incrementAndGet(); + return true; + } + + private void copyPath(final Path src, final Path dst, final String leaseId) + throws AzureBlobFileSystemException { + String copyId; + try { + AbfsRestOperation copyPathOp = abfsClient.copyBlob(src, dst, leaseId, + tracingContext); + final String progress = copyPathOp.getResult() + .getResponseHeader(X_MS_COPY_STATUS); + if (COPY_STATUS_SUCCESS.equalsIgnoreCase(progress)) { + return; + } + copyId = copyPathOp.getResult() + .getResponseHeader(X_MS_COPY_ID); + } catch (AbfsRestOperationException ex) { + if (ex.getStatusCode() == HttpURLConnection.HTTP_CONFLICT) { + AbfsRestOperation dstPathStatus = abfsClient.getPathStatus( + dst.toUri().getPath(), + tracingContext, null, false); + final String srcCopyPath = ROOT_PATH + abfsClient.getFileSystem() + + src.toUri().getPath(); + if (dstPathStatus.getResult() != null && (srcCopyPath.equals( + getDstSource(dstPathStatus)))) { + return; + } + } + throw ex; + } + final long pollWait = abfsClient.getAbfsConfiguration() + .getBlobCopyProgressPollWaitMillis(); + while (handleCopyInProgress(dst, tracingContext, copyId) + == BlobCopyProgress.PENDING) { + try { + Thread.sleep(pollWait); + } catch (InterruptedException ignored) { + + } + } + } + + private String getDstSource(final AbfsRestOperation dstPathStatus) { + try { + String responseHeader = dstPathStatus.getResult() + .getResponseHeader(X_MS_COPY_SOURCE); + if (responseHeader == null) { + return null; + } + return new URL(responseHeader).toURI().getPath(); + } catch (URISyntaxException | MalformedURLException e) { + throw new RuntimeException(e); + } + } + + /** + * Verifies if the blob copy is success or a failure or still in progress. + * + * @param dstPath path of the destination for the copying + * @param tracingContext object of tracingContext used for the tracing of the + * server calls. + * @param copyId id returned by server on the copy server-call. This id gets + * attached to blob and is returned by GetBlobProperties API on the destination. + * + * @return true if copying is success, false if it is still in progress. + * + * @throws AzureBlobFileSystemException exception returned in making server call + * for GetBlobProperties on the path. It can be thrown if the copyStatus is failure + * or is aborted. + */ + @VisibleForTesting + public BlobCopyProgress handleCopyInProgress(final Path dstPath, + final TracingContext tracingContext, + final String copyId) throws AzureBlobFileSystemException { + AbfsRestOperation op = abfsClient.getPathStatus(dstPath.toUri().getPath(), + tracingContext, null, false); + + if (op.getResult() != null && copyId.equals( + op.getResult().getResponseHeader(X_MS_COPY_ID))) { + final String copyStatus = op.getResult() + .getResponseHeader(X_MS_COPY_STATUS); + if (COPY_STATUS_SUCCESS.equalsIgnoreCase(copyStatus)) { + return BlobCopyProgress.SUCCESS; + } + if (COPY_STATUS_FAILED.equalsIgnoreCase(copyStatus)) { + throw new AbfsRestOperationException( + COPY_BLOB_FAILED.getStatusCode(), COPY_BLOB_FAILED.getErrorCode(), + String.format("copy to path %s failed due to: %s", + dstPath.toUri().getPath(), + op.getResult().getResponseHeader(X_MS_COPY_STATUS_DESCRIPTION)), + new Exception(COPY_BLOB_FAILED.getErrorCode())); + } + if (COPY_STATUS_ABORTED.equalsIgnoreCase(copyStatus)) { + throw new AbfsRestOperationException( + COPY_BLOB_ABORTED.getStatusCode(), COPY_BLOB_ABORTED.getErrorCode(), + String.format("copy to path %s aborted", dstPath.toUri().getPath()), + new Exception(COPY_BLOB_ABORTED.getErrorCode())); + } + } + return BlobCopyProgress.PENDING; + } + + /** + * Translates the destination path for a blob part of a source directory getting + * renamed. + * + * @param destinationDir destination directory for the rename operation + * @param blobPath path of blob inside sourceDir being renamed. + * @param sourceDir source directory for the rename operation + * + * @return translated path for the blob + */ + private Path createDestinationPathForBlobPartOfRenameSrcDir(final Path destinationDir, + final Path blobPath, final Path sourceDir) { + String destinationPathStr = destinationDir.toUri().getPath(); + String sourcePathStr = sourceDir.toUri().getPath(); + String srcBlobPropertyPathStr = blobPath.toUri().getPath(); + if (sourcePathStr.equals(srcBlobPropertyPathStr)) { + return destinationDir; + } + return new Path( + destinationPathStr + ROOT_PATH + srcBlobPropertyPathStr.substring( + sourcePathStr.length())); + } + + private PathInformation getPathInformation(Path path, + TracingContext tracingContext) + throws AzureBlobFileSystemException { + try { + AbfsRestOperation op = abfsClient.getPathStatus(path.toString(), false, + tracingContext, null); + + return new PathInformation(true, + abfsClient.checkIsDir(op.getResult()), + extractEtagHeader(op.getResult())); + } catch (AzureBlobFileSystemException e) { + if (e instanceof AbfsRestOperationException) { + AbfsRestOperationException ex = (AbfsRestOperationException) e; + if (ex.getStatusCode() == HttpURLConnection.HTTP_NOT_FOUND) { + return new PathInformation(false, false, null); + } + } + throw e; + } + } + + @VisibleForTesting + public List getLeases() { + return leases; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListActionTaker.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListActionTaker.java new file mode 100644 index 0000000000000..d40e18681271a --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListActionTaker.java @@ -0,0 +1,192 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicBoolean; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; +import org.apache.hadoop.fs.azurebfs.contracts.services.BlobListResultSchema; +import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultEntrySchema; +import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultSchema; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; + +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.ROOT_PATH; + +/** + * ListActionTaker is an abstract class that provides a way to list the paths + * recursively and take action on each path. The implementations of this class + * should provide the action to be taken on each listed path. + */ +public abstract class ListActionTaker { + + private static final Logger LOG = LoggerFactory.getLogger(ListActionTaker.class); + + protected final Path path; + + protected final AbfsBlobClient abfsClient; + + protected final TracingContext tracingContext; + + private final ExecutorService executorService; + + private final AtomicBoolean producerThreadToBeStopped = new AtomicBoolean( + false); + + public ListActionTaker(Path path, + AbfsBlobClient abfsClient, + TracingContext tracingContext) { + this.path = path; + this.abfsClient = abfsClient; + this.tracingContext = tracingContext; + executorService = Executors.newFixedThreadPool( + getMaxConsumptionParallelism()); + } + + abstract int getMaxConsumptionParallelism(); + + abstract boolean takeAction(Path path) throws AzureBlobFileSystemException; + + private boolean takeAction(List paths) throws AzureBlobFileSystemException { + List> futureList = new ArrayList<>(); + for (Path path : paths) { + Future future = executorService.submit(() -> { + return takeAction(path); + }); + futureList.add(future); + } + + AzureBlobFileSystemException executionException = null; + boolean actionResult = true; + for (Future future : futureList) { + try { + Boolean result = future.get(); + if (!result) { + actionResult = false; + } + } catch (InterruptedException e) { + LOG.debug("Thread interrupted while taking action on path: {}", + path.toUri().getPath()); + } catch (ExecutionException e) { + executionException = (AzureBlobFileSystemException) e.getCause(); + } + } + if (executionException != null) { + throw executionException; + } + return actionResult; + } + + /** + * Spawns a producer thread that list the children of the path recursively and queue + * them in into {@link ListBlobQueue}. On the main thread, it dequeues the + * path and supply them to parallel thread for relevant action which is defined + * in {@link #takeAction(Path)}. + */ + public boolean listRecursiveAndTakeAction() throws AzureBlobFileSystemException { + AbfsConfiguration configuration = abfsClient.getAbfsConfiguration(); + Thread producerThread = null; + try { + ListBlobQueue listBlobQueue = new ListBlobQueue( + configuration.getProducerQueueMaxSize(), getMaxConsumptionParallelism()); + producerThread = new Thread(() -> { + try { + produceConsumableList(listBlobQueue); + } catch (AzureBlobFileSystemException e) { + listBlobQueue.markProducerFailure(e); + } + }); + producerThread.start(); + + while (!listBlobQueue.getIsCompleted()) { + List paths = listBlobQueue.consume(); + if (paths == null) { + continue; + } + try { + boolean resultOnPartAction = takeAction(paths); + if (!resultOnPartAction) { + return false; + } + } catch (AzureBlobFileSystemException parallelConsumptionException) { + listBlobQueue.markConsumptionFailed(); + throw parallelConsumptionException; + } + } + return true; + } finally { + if (producerThread != null) { + producerThreadToBeStopped.set(true); + } + executorService.shutdownNow(); + } + } + + private void produceConsumableList(final ListBlobQueue listBlobQueue) + throws AzureBlobFileSystemException { + String continuationToken = null; + do { + List paths = new ArrayList<>(); + final int queueAvailableSize = listBlobQueue.availableSize(); + if (queueAvailableSize == 0) { + break; + } + final AbfsRestOperation op; + try { + op = abfsClient.listPath(path.toUri().getPath(), + true, + queueAvailableSize, continuationToken, + tracingContext); + } catch (AzureBlobFileSystemException ex) { + throw ex; + } catch (IOException ex) { + throw new AbfsRestOperationException(-1, null, + "Unknown exception from listing: " + ex.getMessage(), ex); + } + + ListResultSchema retrievedSchema = op.getResult().getListResultSchema(); + if (retrievedSchema == null) { + continue; + } + continuationToken + = ((BlobListResultSchema) retrievedSchema).getNextMarker(); + for (ListResultEntrySchema entry : retrievedSchema.paths()) { + Path entryPath = new Path(ROOT_PATH, entry.name()); + if (!entryPath.equals(this.path)) { + paths.add(entryPath); + } + } + listBlobQueue.enqueue(paths); + } while (!producerThreadToBeStopped.get() && continuationToken != null + && !listBlobQueue.getConsumptionFailed()); + listBlobQueue.complete(); + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListBlobQueue.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListBlobQueue.java new file mode 100644 index 0000000000000..40f8a2a4cd94a --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListBlobQueue.java @@ -0,0 +1,135 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.List; +import java.util.Queue; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; + +/** + * Data-structure to hold the list of paths to be processed. The paths are + * enqueued by the producer and dequeued by the consumer. The producer can + * enqueue the paths until the queue is full. The consumer can consume the paths + * until the queue is empty. The producer can mark the queue as completed once + * all the paths are enqueued and there is no more paths that can be returned from + * server. The consumer can mark the queue as failed if it encounters any exception + * while consuming the paths. + */ +class ListBlobQueue { + + private final Queue pathQueue = new ArrayDeque<>(); + + private final int maxSize; + + private final int consumeSetSize; + + private volatile boolean isCompleted = false; + + private volatile boolean isConsumptionFailed = false; + + private volatile AzureBlobFileSystemException failureFromProducer; + + ListBlobQueue(int maxSize, int consumeSetSize) { + this.maxSize = maxSize; + this.consumeSetSize = consumeSetSize; + } + + void markProducerFailure(AzureBlobFileSystemException failure) { + failureFromProducer = failure; + } + + void complete() { + isCompleted = true; + } + + synchronized void markConsumptionFailed() { + isConsumptionFailed = true; + notify(); + } + + boolean getConsumptionFailed() { + return isConsumptionFailed; + } + + boolean getIsCompleted() { + return isCompleted && size() == 0; + } + + private AzureBlobFileSystemException getException() { + return failureFromProducer; + } + + synchronized void enqueue(List pathList) { + if (isCompleted) { + throw new IllegalStateException( + "Cannot enqueue paths as the queue is already marked as completed"); + } + pathQueue.addAll(pathList); + } + + synchronized List consume() throws AzureBlobFileSystemException { + AzureBlobFileSystemException exception = getException(); + if (exception != null) { + throw exception; + } + return dequeue(); + } + + private List dequeue() { + List pathListForConsumption = new ArrayList<>(); + int counter = 0; + while (counter < consumeSetSize && pathQueue.size() > 0) { + pathListForConsumption.add(pathQueue.poll()); + counter++; + } + if(counter > 0) { + notify(); + } + return pathListForConsumption; + } + + private synchronized int size() { + return pathQueue.size(); + } + + /** + * Returns the available size of the queue. This is calculated by subtracting the current size of the queue + * from its maximum size. If the queue is full, this method will wait until some elements are consumed and + * space becomes available. If consumption has failed, it immediately returns zero. This method is synchronized + * to prevent concurrent modifications of the queue. + * + * @return the available size of the queue + */ + synchronized int availableSize() { + while(maxSize - size() <= 0) { + if(isConsumptionFailed) { + return 0; + } + try { + wait(); + } catch (InterruptedException ignored) { + } + } + return maxSize - size(); + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListingSupport.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListingSupport.java index dc070a1d405d8..f3c08c4a30036 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListingSupport.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListingSupport.java @@ -75,7 +75,7 @@ public interface ListingSupport { * result. * @param continuation Contiuation token. null means start rom the begining. * @param tracingContext TracingContext instance to track identifiers - * @return Continuation tokem + * @return Continuation token * @throws IOException in case of error */ String listStatus(Path path, String startFrom, List fileStatuses, diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/PathInformation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/PathInformation.java new file mode 100644 index 0000000000000..dcb1f0d8c66cb --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/PathInformation.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +public class PathInformation { + private Boolean pathExists; + private Boolean isDirectory; + private String eTag; + + public PathInformation(Boolean pathExists, Boolean isDirectory, String eTag) { + this.pathExists = pathExists; + this.isDirectory = isDirectory; + this.eTag = eTag; + } + + public PathInformation() { + } + + public void copy(PathInformation pathInformation) { + this.pathExists = pathInformation.getPathExists(); + this.isDirectory = pathInformation.getIsDirectory(); + this.eTag = pathInformation.getETag(); + } + + public String getETag() { + return eTag; + } + + public Boolean getPathExists() { + return pathExists; + } + + public Boolean getIsDirectory() { + return isDirectory; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/RenameAtomicity.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/RenameAtomicity.java new file mode 100644 index 0000000000000..4daa03be0c234 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/RenameAtomicity.java @@ -0,0 +1,349 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; + +import java.util.Collections; +import java.util.Random; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; + +import org.apache.commons.codec.binary.Base64; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; +import org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; + +import static java.net.HttpURLConnection.HTTP_NOT_FOUND; +import static java.net.HttpURLConnection.HTTP_PRECON_FAILED; +import static org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.extractEtagHeader; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.BLOCK_ID_LENGTH; +import static org.apache.hadoop.fs.azurebfs.services.AzureIngressHandler.generateBlockListXml; + +/** + * For a directory enabled for atomic-rename, before rename starts, a file with + * -RenamePending.json suffix is created. In this file, the states required for the + * rename operation are given. This file is created by {@link #preRename()} method. + * This is important in case the JVM process crashes during rename, the atomicity + * will be maintained, when the job calls {@link AzureBlobFileSystem#listStatus(Path)} + * or {@link AzureBlobFileSystem#getFileStatus(Path)}. On these API calls to filesystem, + * it will be checked if there is any RenamePending JSON file. If yes, the crashed rename + * operation would be resumed as per the file. + */ +public class RenameAtomicity { + + private final TracingContext tracingContext; + + private Path src, dst; + + private String srcEtag; + + private final AbfsBlobClient abfsClient; + + private final Path renameJsonPath; + + public static final String SUFFIX = "-RenamePending.json"; + + private int preRenameRetryCount = 0; + + private int renamePendingJsonLen; + + private static final ObjectMapper objectMapper = new ObjectMapper(); + + /** + * Performs pre-rename operations. Creates a file with -RenamePending.json + * suffix in the source parent directory. This file contains the states + * required for the rename operation. + * + * @param src Source path + * @param dst Destination path + * @param renameJsonPath Path of the JSON file to be created + * @param tracingContext Tracing context + * @param srcEtag ETag of the source directory + * @param abfsClient AbfsClient instance + */ + public RenameAtomicity(final Path src, final Path dst, + final Path renameJsonPath, + TracingContext tracingContext, + final String srcEtag, + final AbfsClient abfsClient) { + this.src = src; + this.dst = dst; + this.abfsClient = (AbfsBlobClient) abfsClient; + this.renameJsonPath = renameJsonPath; + this.tracingContext = tracingContext; + this.srcEtag = srcEtag; + } + + /** + * Resumes the rename operation from the JSON file. + * + * @param renameJsonPath Path of the JSON file + * @param tracingContext Tracing context + * @param srcEtag ETag of the source directory + * @param abfsClient AbfsClient instance + */ + public RenameAtomicity(final Path renameJsonPath, + final int renamePendingJsonFileLen, + TracingContext tracingContext, + final String srcEtag, + final AbfsClient abfsClient) { + this.abfsClient = (AbfsBlobClient) abfsClient; + this.renameJsonPath = renameJsonPath; + this.tracingContext = tracingContext; + this.srcEtag = srcEtag; + this.renamePendingJsonLen = renamePendingJsonFileLen; + } + + /** + * Redo the rename operation from the JSON file. + */ + public void redo() throws IOException { + byte[] buffer = readRenamePendingJson(renameJsonPath, renamePendingJsonLen); + String contents = new String(buffer, Charset.defaultCharset()); + try { + final RenamePendingJsonFormat renamePendingJsonFormatObj; + try { + renamePendingJsonFormatObj = objectMapper.readValue(contents, + RenamePendingJsonFormat.class); + } catch (JsonProcessingException e) { + return; + } + if (renamePendingJsonFormatObj != null && StringUtils.isNotEmpty( + renamePendingJsonFormatObj.getOldFolderName()) + && StringUtils.isNotEmpty( + renamePendingJsonFormatObj.getNewFolderName()) + && StringUtils.isNotEmpty(renamePendingJsonFormatObj.getETag())) { + this.src = new Path(renamePendingJsonFormatObj.getOldFolderName()); + this.dst = new Path(renamePendingJsonFormatObj.getNewFolderName()); + this.srcEtag = renamePendingJsonFormatObj.getETag(); + + BlobRenameHandler blobRenameHandler = new BlobRenameHandler( + this.src.toUri().getPath(), dst.toUri().getPath(), + abfsClient, srcEtag, true, true, tracingContext); + + blobRenameHandler.execute(); + } + } finally { + deleteRenamePendingJson(); + } + } + + @VisibleForTesting + byte[] readRenamePendingJson(Path path, int len) + throws AzureBlobFileSystemException { + byte[] bytes = new byte[len]; + abfsClient.read(path.toUri().getPath(), 0, bytes, 0, + len, null, null, null, + tracingContext); + return bytes; + } + + @VisibleForTesting + void createRenamePendingJson(Path path, byte[] bytes) + throws AzureBlobFileSystemException { + // PutBlob on the path. + AbfsRestOperation putBlobOp = abfsClient.createPath(path.toUri().getPath(), + true, + true, null, false, null, null, tracingContext, false); + String eTag = extractEtagHeader(putBlobOp.getResult()); + + // PutBlock on the path. + byte[] blockIdByteArray = new byte[BLOCK_ID_LENGTH]; + new Random().nextBytes(blockIdByteArray); + String blockId = new String(Base64.encodeBase64(blockIdByteArray), + StandardCharsets.UTF_8); + AppendRequestParameters appendRequestParameters + = new AppendRequestParameters(0, 0, + bytes.length, AppendRequestParameters.Mode.APPEND_MODE, false, null, + abfsClient.getAbfsConfiguration().isExpectHeaderEnabled(), blockId, + eTag); + + abfsClient.append(path.toUri().getPath(), bytes, + appendRequestParameters, null, null, tracingContext); + + // PutBlockList on the path. + String blockList = generateBlockListXml(Collections.singleton(blockId)); + abfsClient.flush(blockList.getBytes(StandardCharsets.UTF_8), + path.toUri().getPath(), true, null, null, eTag, null, tracingContext); + } + + /** + * Before starting the attomic rename, create a file with -RenamePending.json + * suffix in the source parent directory. This file contains the states + * required source, destination, and source-eTag for the rename operation. + * + * If the path that is getting renamed is a /sourcePath, then the JSON file + * will be /sourcePath-RenamePending.json. + * + * @return Length of the JSON file. + */ + @VisibleForTesting + public int preRename() throws IOException { + String makeRenamePendingFileContents = makeRenamePendingFileContents( + srcEtag); + + try { + createRenamePendingJson(renameJsonPath, + makeRenamePendingFileContents.getBytes(StandardCharsets.UTF_8)); + return makeRenamePendingFileContents.length(); + } catch (IOException e) { + /* + * Scenario: file has been deleted by parallel thread before the RenameJSON + * could be written and flushed. In such case, there has to be one retry of + * preRename. + * ref: https://issues.apache.org/jira/browse/HADOOP-12678 + * On DFS endpoint, flush API is called. If file is not there, server returns + * 404. + * On blob endpoint, flush API is not there. PutBlockList is called with + * if-match header. If file is not there, the conditional header will fail, + * the server will return 412. + */ + if (isPreRenameRetriableException(e)) { + preRenameRetryCount++; + if (preRenameRetryCount == 1) { + return preRename(); + } + } + throw e; + } + } + + private boolean isPreRenameRetriableException(IOException e) { + AbfsRestOperationException ex; + while (e != null) { + if (e instanceof AbfsRestOperationException) { + ex = (AbfsRestOperationException) e; + return ex.getStatusCode() == HTTP_NOT_FOUND + || ex.getStatusCode() == HTTP_PRECON_FAILED; + } + e = (IOException) e.getCause(); + } + return false; + } + + public void postRename() throws IOException { + deleteRenamePendingJson(); + } + + private void deleteRenamePendingJson() throws AzureBlobFileSystemException { + try { + abfsClient.deleteBlobPath(renameJsonPath, null, + tracingContext); + } catch (AzureBlobFileSystemException e) { + if (e instanceof AbfsRestOperationException + && ((AbfsRestOperationException) e).getStatusCode() + == HTTP_NOT_FOUND) { + return; + } + throw e; + } + } + + + /** + * Return the contents of the JSON file to represent the operations + * to be performed for a folder rename. + * + * @return JSON string which represents the operation. + */ + private String makeRenamePendingFileContents(String eTag) throws + JsonProcessingException { + + final RenamePendingJsonFormat renamePendingJsonFormat = new RenamePendingJsonFormat(); + renamePendingJsonFormat.setOldFolderName(src.toUri().getPath()); + renamePendingJsonFormat.setNewFolderName(dst.toUri().getPath()); + renamePendingJsonFormat.setETag(eTag); + + return objectMapper.writeValueAsString(renamePendingJsonFormat); + } + + /** + * This is an exact copy of org.codehaus.jettison.json.JSONObject.quote + * method. + * + * Produce a string in double quotes with backslash sequences in all the + * right places. A backslash will be inserted within getFileSystem().getAbfsClient() .renamePath("testABC/test.xt", "testABC/abc.txt", null, - getTestTracingContext(getFileSystem(), false), null, false, isHNSEnabled)); + getTestTracingContext(getFileSystem(), false), null, false, isHNSEnabled + )); } @Test diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java index 885f098153367..1439f8857b253 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java @@ -19,25 +19,35 @@ package org.apache.hadoop.fs.azurebfs; import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.file.AccessDeniedException; import java.util.ArrayList; import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; import org.assertj.core.api.Assertions; +import org.assertj.core.api.Assumptions; +import org.junit.Assert; import org.junit.Assume; import org.junit.Test; +import org.mockito.Mock; import org.mockito.Mockito; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; +import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter; +import org.apache.hadoop.fs.azurebfs.services.AbfsBlobClient; import org.apache.hadoop.fs.azurebfs.contracts.services.StorageErrorResponseSchema; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; import org.apache.hadoop.fs.azurebfs.services.AbfsClientTestUtil; +import org.apache.hadoop.fs.azurebfs.services.AbfsDfsClient; import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; import org.apache.hadoop.fs.azurebfs.services.ITestAbfsClient; @@ -45,14 +55,16 @@ import org.apache.hadoop.fs.azurebfs.utils.TestMockHelpers; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderValidator; -import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.test.LambdaTestUtils; import static java.net.HttpURLConnection.HTTP_BAD_REQUEST; +import static java.net.HttpURLConnection.HTTP_FORBIDDEN; import static java.net.HttpURLConnection.HTTP_NOT_FOUND; import static java.net.HttpURLConnection.HTTP_OK; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.ROOT_PATH; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.doCallRealMethod; import static org.mockito.Mockito.doReturn; @@ -161,7 +173,7 @@ public Void call() throws Exception { fs.getAbfsStore().getAbfsConfiguration().getClientCorrelationId(), fs.getFileSystemId(), FSOperationType.DELETE, false, 0)); // first try a non-recursive delete, expect failure - intercept(FileAlreadyExistsException.class, + intercept(IOException.class, () -> fs.delete(dir, false)); fs.registerListener(null); assertDeleted(fs, dir, true); @@ -227,6 +239,15 @@ public void testDeleteIdempotency() throws Exception { public void testDeleteIdempotencyTriggerHttp404() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); + + /* + * Delete call for a folder on DFS endpoint is one-server call and the + * orchestration of delete of paths inside the directory. For Blob + * endpoint, the orchestration would be done by the client. The idempotency + * issue would not happen for blob endpoint. + */ + Assumptions.assumeThat(fs.getAbfsClient()) + .isInstanceOf(AbfsDfsClient.class); AbfsClient client = ITestAbfsClient.createTestClientFromCurrentContext( fs.getAbfsStore().getClient(), this.getConfiguration()); @@ -274,6 +295,22 @@ public void testDeleteIdempotencyTriggerHttp404() throws Exception { doReturn(idempotencyRetOp).when(mockClient).deleteIdempotencyCheckOp(any()); TracingContext tracingContext = getTestTracingContext(fs, false); doReturn(tracingContext).when(idempotencyRetOp).createNewTracingContext(any()); + if (mockClient instanceof AbfsBlobClient) { + doCallRealMethod().when((AbfsBlobClient) mockClient) + .getBlobDeleteHandler(Mockito.nullable(String.class), + Mockito.anyBoolean(), Mockito.nullable(TracingContext.class)); + doCallRealMethod().when(mockClient) + .listPath(Mockito.nullable(String.class), Mockito.anyBoolean(), + Mockito.anyInt(), Mockito.nullable(String.class), + Mockito.nullable(TracingContext.class)); + doCallRealMethod().when((AbfsBlobClient) mockClient) + .listPath(Mockito.nullable(String.class), Mockito.anyBoolean(), + Mockito.anyInt(), Mockito.nullable(String.class), + Mockito.nullable(TracingContext.class), + Mockito.anyBoolean()); + doCallRealMethod().when((AbfsBlobClient) mockClient).getPathStatus(Mockito.nullable(String.class), Mockito.nullable(TracingContext.class), Mockito.nullable( + ContextEncryptionAdapter.class), Mockito.anyBoolean()); + } when(mockClient.deletePath("/NonExistingPath", false, null, tracingContext, fs.getIsNamespaceEnabled(tracingContext))) .thenCallRealMethod(); @@ -314,4 +351,277 @@ public void deleteBlobDirParallelThreadToDeleteOnDifferentTracingContext() fs.delete(new Path("/testDir"), true); fs.close(); } + + private void assumeBlobClient() throws IOException { + Assumptions.assumeThat(getFileSystem().getAbfsClient()) + .isInstanceOf(AbfsBlobClient.class); + } + + /** + * Assert that deleting an implicit directory delete all the children of the + * folder. + */ + @Test + public void testDeleteImplicitDir() throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + assumeBlobClient(); + fs.mkdirs(new Path("/testDir/dir1")); + fs.create(new Path("/testDir/dir1/file1")); + AbfsBlobClient client = (AbfsBlobClient) fs.getAbfsClient(); + client.deleteBlobPath(new Path("/testDir/dir1"), + null, getTestTracingContext(fs, true)); + fs.delete(new Path("/testDir/dir1"), true); + + Assertions.assertThat(!fs.exists(new Path("/testDir/dir1"))) + .describedAs("FileStatus of the deleted directory should not exist") + .isTrue(); + Assertions.assertThat(!fs.exists(new Path("/testDir/dir1/file1"))) + .describedAs("Child of a deleted directory should not be present"); + } + + /** + * Assert deleting an implicit directory, for which paginated list is required. + */ + @Test + public void testDeleteImplicitDirWithSingleListResults() throws Exception { + AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance( + getRawConfiguration()); + assumeBlobClient(); + AbfsBlobClient client = (AbfsBlobClient) fs.getAbfsClient(); + AbfsBlobClient spiedClient = Mockito.spy(client); + fs.getAbfsStore().setClient(spiedClient); + fs.mkdirs(new Path("/testDir/dir1")); + for (int i = 0; i < 10; i++) { + fs.create(new Path("/testDir/dir1/file" + i)); + } + + Mockito.doAnswer(answer -> { + String path = answer.getArgument(0); + boolean recursive = answer.getArgument(1); + String continuation = answer.getArgument(3); + TracingContext context = answer.getArgument(4); + + return client.listPath(path, recursive, 1, continuation, context); + }) + .when(spiedClient) + .listPath(Mockito.anyString(), Mockito.anyBoolean(), Mockito.anyInt(), + Mockito.nullable(String.class), + Mockito.any(TracingContext.class)); + + client.deleteBlobPath(new Path("/testDir/dir1"), + null, getTestTracingContext(fs, true)); + + fs.delete(new Path("/testDir/dir1"), true); + + Assertions.assertThat(fs.exists(new Path("/testDir/dir1"))) + .describedAs("FileStatus of the deleted directory should not exist") + .isFalse(); + } + + /** + * Assert deleting of the only child of an implicit directory ensures that the + * parent directory's marker is present. + */ + @Test + public void testDeleteExplicitDirInImplicitParentDir() throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + assumeBlobClient(); + AbfsBlobClient client = (AbfsBlobClient) fs.getAbfsClient(); + fs.mkdirs(new Path("/testDir/dir1")); + fs.create(new Path("/testDir/dir1/file1")); + client.deleteBlobPath(new Path("/testDir/"), + null, getTestTracingContext(fs, true)); + + fs.delete(new Path("/testDir/dir1"), true); + + Assertions.assertThat(fs.exists(new Path("/testDir/dir1"))) + .describedAs("Deleted directory should not exist") + .isFalse(); + Assertions.assertThat(fs.exists(new Path("/testDir/dir1/file1"))) + .describedAs("Child of a deleted directory should not be present") + .isFalse(); + Assertions.assertThat(fs.exists(new Path("/testDir"))) + .describedAs("Parent Implicit directory should exist") + .isTrue(); + } + + @Test + public void testDeleteParallelBlobFailure() throws Exception { + AzureBlobFileSystem fs = Mockito.spy(getFileSystem()); + assumeBlobClient(); + AbfsBlobClient client = Mockito.spy((AbfsBlobClient) fs.getAbfsClient()); + AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore()); + store.setClient(client); + Mockito.doReturn(store).when(fs).getAbfsStore(); + + fs.mkdirs(new Path("/testDir")); + fs.create(new Path("/testDir/file1")); + fs.create(new Path("/testDir/file2")); + fs.create(new Path("/testDir/file3")); + + Mockito.doThrow( + new AbfsRestOperationException(HTTP_FORBIDDEN, "", "", new Exception())) + .when(client) + .deleteBlobPath(Mockito.any(Path.class), Mockito.nullable(String.class), + Mockito.any(TracingContext.class)); + + LambdaTestUtils.intercept( + AccessDeniedException.class, + () -> { + fs.delete(new Path("/testDir"), true); + }); + } + + @Test + public void testDeleteRootWithNonRecursion() throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + fs.mkdirs(new Path("/testDir")); + Assertions.assertThat(fs.delete(new Path(ROOT_PATH), false)).isFalse(); + } + + /** + * Assert that delete operation failure should stop List producer. + */ + @Test + public void testProducerStopOnDeleteFailure() throws Exception { + assumeBlobClient(); + Configuration configuration = Mockito.spy(getRawConfiguration()); + AzureBlobFileSystem fs = Mockito.spy( + (AzureBlobFileSystem) FileSystem.get(configuration)); + + fs.mkdirs(new Path("/src")); + ExecutorService executorService = Executors.newFixedThreadPool(10); + List futureList = new ArrayList<>(); + for (int i = 0; i < 20; i++) { + int iter = i; + Future future = executorService.submit(() -> { + try { + fs.create(new Path("/src/file" + iter)); + } catch (IOException ex) {} + }); + futureList.add(future); + } + + for (Future future : futureList) { + future.get(); + } + + AbfsBlobClient client = (AbfsBlobClient) fs.getAbfsClient(); + AbfsBlobClient spiedClient = Mockito.spy(client); + AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore()); + store.setClient(spiedClient); + Mockito.doReturn(store).when(fs).getAbfsStore(); + + final int[] deleteCallInvocation = new int[1]; + deleteCallInvocation[0] = 0; + Mockito.doAnswer(answer -> { + throw new AbfsRestOperationException(HTTP_FORBIDDEN, "", "", + new Exception()); + }).when(spiedClient) + .deleteBlobPath(Mockito.any(Path.class), Mockito.nullable(String.class), + Mockito.any(TracingContext.class)); + + AbfsClientTestUtil.mockGetDeleteBlobHandler(spiedClient, + (blobDeleteHandler) -> { + Mockito.doAnswer(answer -> { + try { + answer.callRealMethod(); + } catch (AbfsRestOperationException ex) { + if (ex.getStatusCode() == HTTP_FORBIDDEN) { + deleteCallInvocation[0]++; + } + throw ex; + } + throw new AssertionError("List Consumption should have failed"); + }) + .when(blobDeleteHandler).listRecursiveAndTakeAction(); + return null; + }); + + final int[] listCallInvocation = new int[1]; + listCallInvocation[0] = 0; + Mockito.doAnswer(answer -> { + if (listCallInvocation[0] == 1) { + while (deleteCallInvocation[0] == 0) ; + } + listCallInvocation[0]++; + return answer.callRealMethod(); + }) + .when(spiedClient) + .listPath(Mockito.anyString(), Mockito.anyBoolean(), Mockito.anyInt(), + Mockito.nullable(String.class), Mockito.any(TracingContext.class)); + + intercept(AccessDeniedException.class, + () -> { + fs.delete(new Path("/src"), true); + }); + + Mockito.verify(spiedClient, Mockito.times(1)) + .listPath(Mockito.anyString(), Mockito.anyBoolean(), Mockito.anyInt(), + Mockito.nullable(String.class), Mockito.any(TracingContext.class)); + } + + /** + * Test to assert that the CID in src marker delete contains the + * total number of blobs operated in the delete directory. + * Also, to assert that all operations in the delete-directory flow have same + * primaryId and opType. + */ + @Test + public void testDeleteEmitDeletionCountInClientRequestId() throws Exception { + AzureBlobFileSystem fs = Mockito.spy(getFileSystem()); + assumeBlobClient(); + AbfsBlobClient client = (AbfsBlobClient) fs.getAbfsClient(); + AbfsBlobClient spiedClient = Mockito.spy(client); + AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore()); + store.setClient(spiedClient); + Mockito.doReturn(store).when(fs).getAbfsStore(); + + String dirPathStr = "/testDir/dir1"; + fs.mkdirs(new Path(dirPathStr)); + ExecutorService executorService = Executors.newFixedThreadPool(5); + List futures = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + final int iter = i; + Future future = executorService.submit(() -> { + return fs.create(new Path("/testDir/dir1/file" + iter)); + }); + futures.add(future); + } + + for (Future future : futures) { + future.get(); + } + executorService.shutdown(); + + final TracingHeaderValidator tracingHeaderValidator + = new TracingHeaderValidator( + fs.getAbfsStore().getAbfsConfiguration().getClientCorrelationId(), + fs.getFileSystemId(), FSOperationType.DELETE, false, 0); + fs.registerListener(tracingHeaderValidator); + + Mockito.doAnswer(answer -> { + Mockito.doAnswer(deleteAnswer -> { + if (dirPathStr.equalsIgnoreCase( + ((Path) deleteAnswer.getArgument(0)).toUri().getPath())) { + tracingHeaderValidator.setOperatedBlobCount(11); + Object result = deleteAnswer.callRealMethod(); + tracingHeaderValidator.setOperatedBlobCount(null); + return result; + } + return deleteAnswer.callRealMethod(); + }) + .when(spiedClient) + .deleteBlobPath(Mockito.any(Path.class), + Mockito.nullable(String.class), + Mockito.any(TracingContext.class)); + + return answer.callRealMethod(); + }) + .when(store) + .delete(Mockito.any(Path.class), Mockito.anyBoolean(), + Mockito.any(TracingContext.class)); + + fs.delete(new Path(dirPathStr), true); + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemLease.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemLease.java index 271c7f67308c3..f3c980648a243 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemLease.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemLease.java @@ -40,6 +40,7 @@ import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.LambdaTestUtils; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.INFINITE_LEASE_DURATION; import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.CONDITION_NOT_MET; import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.ERR_LEASE_EXPIRED_BLOB; import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.ERR_NO_LEASE_ID_SPECIFIED_BLOB; @@ -350,7 +351,8 @@ public void testAcquireRetry() throws Exception { tracingContext.setListener(listener); AbfsLease lease = new AbfsLease(fs.getAbfsClient(), - testFilePath.toUri().getPath(), tracingContext); + testFilePath.toUri().getPath(), true, INFINITE_LEASE_DURATION, + null, tracingContext); Assert.assertNotNull("Did not successfully lease file", lease.getLeaseID()); listener.setOperation(FSOperationType.RELEASE_LEASE); lease.free(); @@ -362,19 +364,20 @@ public void testAcquireRetry() throws Exception { doThrow(new AbfsLease.LeaseException("failed to acquire 1")) .doThrow(new AbfsLease.LeaseException("failed to acquire 2")) .doCallRealMethod().when(mockClient) - .acquireLease(anyString(), anyInt(), any(TracingContext.class)); + .acquireLease(anyString(), anyInt(), null, any(TracingContext.class)); - lease = new AbfsLease(mockClient, testFilePath.toUri().getPath(), 5, 1, tracingContext); + lease = new AbfsLease(mockClient, testFilePath.toUri().getPath(), true, 5, 1, + INFINITE_LEASE_DURATION, null, tracingContext); Assert.assertNotNull("Acquire lease should have retried", lease.getLeaseID()); lease.free(); Assert.assertEquals("Unexpected acquire retry count", 2, lease.getAcquireRetryCount()); doThrow(new AbfsLease.LeaseException("failed to acquire")).when(mockClient) - .acquireLease(anyString(), anyInt(), any(TracingContext.class)); + .acquireLease(anyString(), anyInt(), null, any(TracingContext.class)); LambdaTestUtils.intercept(AzureBlobFileSystemException.class, () -> { - new AbfsLease(mockClient, testFilePath.toUri().getPath(), 5, 1, - tracingContext); + new AbfsLease(mockClient, testFilePath.toUri().getPath(), true, 5, 1, + INFINITE_LEASE_DURATION, null, tracingContext); }); } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java index d041c9a1fcd6a..b6f7c86008a73 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java @@ -171,7 +171,8 @@ public void testListPathTracingContext() throws Exception { }); List fileStatuses = new ArrayList<>(); - spiedStore.listStatus(new Path("/"), "", fileStatuses, true, null, spiedTracingContext); + spiedStore.listStatus(new Path("/"), "", fileStatuses, true, null, spiedTracingContext + ); // Assert that there were 2 paginated ListPath calls were made 1 and 2. // 1. Without continuation token diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java index ea07650e90110..feddcd6db360b 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java @@ -18,22 +18,67 @@ package org.apache.hadoop.fs.azurebfs; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.AccessDeniedException; import java.util.ArrayList; import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import org.assertj.core.api.Assertions; +import org.assertj.core.api.Assumptions; import org.junit.Assert; import org.junit.Test; +import org.mockito.Mockito; +import org.mockito.stubbing.Answer; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; +import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; +import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter; +import org.apache.hadoop.fs.azurebfs.services.AbfsBlobClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsClientTestUtil; +import org.apache.hadoop.fs.azurebfs.services.AbfsDfsClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; +import org.apache.hadoop.fs.azurebfs.services.AbfsLease; +import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; +import org.apache.hadoop.fs.azurebfs.services.BlobRenameHandler; +import org.apache.hadoop.fs.azurebfs.services.RenameAtomicity; +import org.apache.hadoop.fs.azurebfs.services.RenameAtomicityTestUtils; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderValidator; import org.apache.hadoop.fs.statistics.IOStatisticAssertions; import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.test.LambdaTestUtils; +import org.apache.hadoop.util.functional.FunctionRaisingIOE; +import static java.net.HttpURLConnection.HTTP_CONFLICT; +import static java.net.HttpURLConnection.HTTP_FORBIDDEN; +import static java.net.HttpURLConnection.HTTP_NOT_FOUND; import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.RENAME_PATH_ATTEMPTS; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.COPY_STATUS_ABORTED; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.COPY_STATUS_FAILED; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.COPY_STATUS_PENDING; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.ROOT_PATH; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_LEASE_THREADS; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.COPY_BLOB_ABORTED; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.COPY_BLOB_FAILED; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.SOURCE_PATH_NOT_FOUND; +import static org.apache.hadoop.fs.azurebfs.services.RenameAtomicity.SUFFIX; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertIsFile; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertMkdirs; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertPathDoesNotExist; @@ -41,6 +86,7 @@ import static org.apache.hadoop.fs.contract.ContractTestUtils.assertRenameOutcome; import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; /** * Test rename operation. @@ -192,10 +238,1170 @@ public void testRenameWithNoDestinationParentDir() throws Exception { // Verify that metadata was in an incomplete state after the rename // failure, and we retired the rename once more. IOStatistics ioStatistics = fs.getIOStatistics(); + AbfsClient client = fs.getAbfsStore().getClient(); IOStatisticAssertions.assertThatStatisticCounter(ioStatistics, RENAME_PATH_ATTEMPTS.getStatName()) - .describedAs("There should be 2 rename attempts if metadata " - + "incomplete state failure is hit") + .describedAs("For Dfs endpoint: There should be 2 rename " + + "attempts if metadata incomplete state failure is hit." + + "For Blob endpoint: There would be only one rename attempt which " + + "would have a failed precheck.") + .isEqualTo(client instanceof AbfsDfsClient ? 2 : 1); + } + + @Test + public void testRenameToRoot() throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + fs.mkdirs(new Path("/src1/src2")); + Assert.assertTrue(fs.rename(new Path("/src1/src2"), new Path("/"))); + Assert.assertTrue(fs.exists(new Path("/src2"))); + } + + @Test + public void testRenameNotFoundBlobToEmptyRoot() throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + Assert.assertFalse(fs.rename(new Path("/file"), new Path("/"))); + } + + private void assumeNonHnsAccountBlobEndpoint(final AzureBlobFileSystem fs) { + Assumptions.assumeThat(fs.getAbfsStore().getClient()) + .describedAs("Client has to be of type AbfsBlobClient") + .isInstanceOf(AbfsBlobClient.class); + } + + @Test(expected = IOException.class) + public void testRenameBlobToDstWithColonInPath() throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + assumeNonHnsAccountBlobEndpoint(fs); + fs.create(new Path("/src")); + fs.rename(new Path("/src"), new Path("/dst:file")); + } + + @Test + public void testRenameBlobInSameDirectoryWithNoMarker() throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + assumeNonHnsAccountBlobEndpoint(fs); + AbfsBlobClient client = (AbfsBlobClient) fs.getAbfsStore().getClient(); + fs.create(new Path("/srcDir/dir/file")); + client.deleteBlobPath(new Path("/srcDir/dir"), null, + getTestTracingContext(fs, true)); + Assert.assertTrue(fs.rename(new Path("/srcDir/dir"), new Path("/srcDir"))); + } + + /** + *
+   * Test to check behaviour of rename API if the destination directory is already
+   * there. The HNS call and the one for Blob endpoint should have same behaviour.
+   *
+   * /testDir2/test1/test2/test3 contains (/file)
+   * There is another path that exists: /testDir2/test4/test3
+   * On rename(/testDir2/test1/test2/test3, /testDir2/test4).
+   * 
+ * + * Expectation for HNS / Blob endpoint:
    + *
  1. Rename should fail
  2. + *
  3. No file should be transferred to destination directory
  4. + *
+ */ + @Test + public void testPosixRenameDirectoryWhereDirectoryAlreadyThereOnDestination() + throws Exception { + final AzureBlobFileSystem fs = this.getFileSystem(); + fs.mkdirs(new Path("testDir2/test1/test2/test3")); + fs.create(new Path("testDir2/test1/test2/test3/file")); + fs.mkdirs(new Path("testDir2/test4/test3")); + assertTrue(fs.exists(new Path("testDir2/test1/test2/test3/file"))); + Assert.assertFalse(fs.rename(new Path("testDir2/test1/test2/test3"), + new Path("testDir2/test4"))); + assertTrue(fs.exists(new Path("testDir2"))); + assertTrue(fs.exists(new Path("testDir2/test1/test2"))); + assertTrue(fs.exists(new Path("testDir2/test4"))); + assertTrue(fs.exists(new Path("testDir2/test1/test2/test3"))); + if (getIsNamespaceEnabled(fs) + || fs.getAbfsClient() instanceof AbfsBlobClient) { + assertFalse(fs.exists(new Path("testDir2/test4/test3/file"))); + assertTrue(fs.exists(new Path("testDir2/test1/test2/test3/file"))); + } else { + assertTrue(fs.exists(new Path("testDir2/test4/test3/file"))); + assertFalse(fs.exists(new Path("testDir2/test1/test2/test3/file"))); + } + } + + @Test + public void testPosixRenameDirectoryWherePartAlreadyThereOnDestination() + throws Exception { + final AzureBlobFileSystem fs = this.getFileSystem(); + fs.mkdirs(new Path("testDir2/test1/test2/test3")); + fs.create(new Path("testDir2/test1/test2/test3/file")); + fs.create(new Path("testDir2/test1/test2/test3/file1")); + fs.mkdirs(new Path("testDir2/test4/")); + fs.create(new Path("testDir2/test4/file1")); + byte[] etag = fs.getXAttr(new Path("testDir2/test4/file1"), "ETag"); + assertTrue(fs.exists(new Path("testDir2/test1/test2/test3/file"))); + assertTrue(fs.exists(new Path("testDir2/test1/test2/test3/file1"))); + Assert.assertTrue(fs.rename(new Path("testDir2/test1/test2/test3"), + new Path("testDir2/test4"))); + assertTrue(fs.exists(new Path("testDir2"))); + assertTrue(fs.exists(new Path("testDir2/test1/test2"))); + assertTrue(fs.exists(new Path("testDir2/test4"))); + assertFalse(fs.exists(new Path("testDir2/test1/test2/test3"))); + + + assertFalse(fs.exists(new Path("testDir2/test4/file"))); + assertTrue(fs.exists(new Path("testDir2/test4/file1"))); + assertTrue(fs.exists(new Path("testDir2/test4/test3/file"))); + assertTrue(fs.exists(new Path("testDir2/test4/test3/file1"))); + assertTrue(fs.exists(new Path("testDir2/test4/file1"))); + assertFalse(fs.exists(new Path("testDir2/test1/test2/test3/file"))); + assertFalse(fs.exists(new Path("testDir2/test1/test2/test3/file1"))); + } + + /** + * Test that after completing rename for a directory which is enabled for + * AtomicRename, the RenamePending JSON file is deleted. + */ + @Test + public void testRenamePendingJsonIsRemovedPostSuccessfulRename() + throws Exception { + final AzureBlobFileSystem fs = Mockito.spy(this.getFileSystem()); + assumeNonHnsAccountBlobEndpoint(fs); + AbfsBlobClient client = (AbfsBlobClient) addSpyHooksOnClient(fs); + + assumeNonHnsAccountBlobEndpoint(fs); + fs.setWorkingDirectory(new Path("/")); + fs.mkdirs(new Path("hbase/test1/test2/test3")); + fs.create(new Path("hbase/test1/test2/test3/file")); + fs.create(new Path("hbase/test1/test2/test3/file1")); + fs.mkdirs(new Path("hbase/test4/")); + fs.create(new Path("hbase/test4/file1")); + final Integer[] correctDeletePathCount = new Integer[1]; + correctDeletePathCount[0] = 0; + + Mockito.doAnswer(answer -> { + final String correctDeletePath = "/hbase/test1/test2/test3" + SUFFIX; + if (correctDeletePath.equals( + ((Path) answer.getArgument(0)).toUri().getPath())) { + correctDeletePathCount[0] = 1; + } + return null; + }) + .when(client) + .deleteBlobPath(Mockito.any(Path.class), Mockito.nullable(String.class), + Mockito.any(TracingContext.class)); + assertTrue(fs.rename(new Path("hbase/test1/test2/test3"), + new Path("hbase/test4"))); + assertTrue("RenamePendingJson should be deleted", + correctDeletePathCount[0] == 1); + } + + private AbfsClient addSpyHooksOnClient(final AzureBlobFileSystem fs) { + AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore()); + Mockito.doReturn(store).when(fs).getAbfsStore(); + AbfsClient client = Mockito.spy(store.getClient()); + Mockito.doReturn(client).when(store).getClient(); + return client; + } + + /** + * Test for a directory in /hbase directory. To simulate the crash of process, + * test will throw an exception with 403 on a copy of one of the blob.
+ * ListStatus API will be called on the directory. Expectation is that the ListStatus + * API of {@link AzureBlobFileSystem} should recover the paused rename. + */ + @Test + public void testHBaseHandlingForFailedRenameWithListRecovery() + throws Exception { + AzureBlobFileSystem fs = Mockito.spy(this.getFileSystem()); + assumeNonHnsAccountBlobEndpoint(fs); + AbfsBlobClient client = (AbfsBlobClient) addSpyHooksOnClient(fs); + + String srcPath = "hbase/test1/test2"; + final String failedCopyPath = srcPath + "/test3/file1"; + fs.setWorkingDirectory(new Path("/")); + fs.mkdirs(new Path(srcPath)); + fs.mkdirs(new Path(srcPath, "test3")); + fs.create(new Path(srcPath + "/test3/file")); + fs.create(new Path(failedCopyPath)); + fs.mkdirs(new Path("hbase/test4/")); + fs.create(new Path("hbase/test4/file1")); + + crashRenameAndRecover(fs, client, srcPath, (abfsFs) -> { + abfsFs.listStatus(new Path(srcPath).getParent()); + return null; + }); + } + + /** + * Test for a directory in /hbase directory. To simulate the crash of process, + * test will throw an exception with 403 on a copy of one of the blob. The + * source directory is a nested directory.
+ * GetFileStatus API will be called on the directory. Expectation is that the + * GetFileStatus API of {@link AzureBlobFileSystem} should recover the paused + * rename. + */ + @Test + public void testHBaseHandlingForFailedRenameWithGetFileStatusRecovery() + throws Exception { + AzureBlobFileSystem fs = Mockito.spy(this.getFileSystem()); + assumeNonHnsAccountBlobEndpoint(fs); + AbfsBlobClient client = (AbfsBlobClient) addSpyHooksOnClient(fs); + + String srcPath = "hbase/test1/test2"; + final String failedCopyPath = srcPath + "/test3/file1"; + fs.setWorkingDirectory(new Path("/")); + fs.mkdirs(new Path(srcPath)); + fs.mkdirs(new Path(srcPath, "test3")); + fs.create(new Path(srcPath + "/test3/file")); + fs.create(new Path(failedCopyPath)); + fs.mkdirs(new Path("hbase/test4/")); + fs.create(new Path("hbase/test4/file1")); + + crashRenameAndRecover(fs, client, srcPath, (abfsFs) -> { + abfsFs.exists(new Path(srcPath)); + return null; + }); + } + + private void crashRenameAndRecover(final AzureBlobFileSystem fs, + AbfsBlobClient client, + final String srcPath, + final FunctionRaisingIOE recoveryCallable) + throws Exception { + crashRename(fs, client, srcPath); + + AzureBlobFileSystem fs2 = Mockito.spy(getFileSystem()); + fs2.setWorkingDirectory(new Path(ROOT_PATH)); + client = (AbfsBlobClient) addSpyHooksOnClient(fs2); + int[] renameJsonDeleteCounter = new int[1]; + renameJsonDeleteCounter[0] = 0; + Mockito.doAnswer(answer -> { + if ((ROOT_PATH + srcPath + SUFFIX) + .equalsIgnoreCase(((Path) answer.getArgument(0)).toUri().getPath())) { + renameJsonDeleteCounter[0] = 1; + } + return answer.callRealMethod(); + }) + .when(client) + .deleteBlobPath(Mockito.any(Path.class), Mockito.nullable(String.class), + Mockito.any(TracingContext.class)); + + recoveryCallable.apply(fs2); + Assertions.assertThat(renameJsonDeleteCounter[0]) + .describedAs("RenamePendingJson should be deleted") + .isEqualTo(1); + + //List would complete the rename orchestration. + assertFalse(fs2.exists(new Path("hbase/test1/test2"))); + assertFalse(fs2.exists(new Path("hbase/test1/test2/test3"))); + assertTrue(fs2.exists(new Path("hbase/test4/test2/test3"))); + assertFalse(fs2.exists(new Path("hbase/test1/test2/test3/file"))); + assertTrue(fs2.exists(new Path("hbase/test4/test2/test3/file"))); + assertFalse(fs2.exists(new Path("hbase/test1/test2/test3/file1"))); + assertTrue(fs2.exists(new Path("hbase/test4/test2/test3/file1"))); + } + + private void crashRename(final AzureBlobFileSystem fs, + final AbfsBlobClient client, + final String srcPath) throws Exception { + BlobRenameHandler[] blobRenameHandlers = new BlobRenameHandler[1]; + AbfsClientTestUtil.mockGetRenameBlobHandler(client, + blobRenameHandler -> { + blobRenameHandlers[0] = blobRenameHandler; + return null; + }); + + //Fail rename orchestration on path hbase/test1/test2/test3/file1 + Mockito.doThrow(new AbfsRestOperationException(HTTP_FORBIDDEN, "", "", + new Exception())) + .when(client) + .copyBlob(Mockito.any(Path.class), Mockito.any(Path.class), + Mockito.nullable(String.class), + Mockito.any(TracingContext.class)); + + LambdaTestUtils.intercept(AccessDeniedException.class, () -> { + fs.rename(new Path(srcPath), + new Path("hbase/test4")); + }); + + //Release all the leases taken by atomic rename orchestration + List leases = new ArrayList<>(blobRenameHandlers[0].getLeases()); + for (AbfsLease lease : leases) { + lease.free(); + } + } + + /** + * Simulates a scenario where HMaster in Hbase starts up and executes listStatus + * API on the directory that has to be renamed by some other executor-machine. + * The scenario is that RenamePending JSON is created but before it could be + * appended, it has been opened by the HMaster. The HMaster will delete it. The + * machine doing rename would have to recreate the JSON file. + * ref: issue + */ + @Test + public void testHbaseListStatusBeforeRenamePendingFileAppendedWithIngressOnBlob() + throws Exception { + final AzureBlobFileSystem fs = Mockito.spy(this.getFileSystem()); + assumeNonHnsAccountBlobEndpoint(fs); + fs.setWorkingDirectory(new Path(ROOT_PATH)); + testRenamePreRenameFailureResolution(fs); + testAtomicityRedoInvalidFile(fs); + } + + private void testRenamePreRenameFailureResolution(final AzureBlobFileSystem fs) + throws Exception { + AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore()); + AbfsBlobClient client = (AbfsBlobClient) addSpyHooksOnClient(fs); + + Path src = new Path("hbase/test1/test2"); + Path dest = new Path("hbase/test4"); + fs.mkdirs(src); + fs.mkdirs(new Path(src, "test3")); + + final int[] renamePendingJsonWriteCounter = new int[1]; + + /* + * Fail the creation of RenamePendingJson file on the first attempt. + */ + Answer renamePendingJsonCreateAns = createAnswer -> { + Path path = createAnswer.getArgument(0); + Mockito.doAnswer(clientFlushAns -> { + if (renamePendingJsonWriteCounter[0]++ == 0) { + fs.delete(path, true); + } + return clientFlushAns.callRealMethod(); + }) + .when(client) + .flush(Mockito.any(byte[].class), Mockito.anyString(), + Mockito.anyBoolean(), Mockito.nullable(String.class), + Mockito.nullable(String.class), Mockito.anyString(), + Mockito.nullable(ContextEncryptionAdapter.class), + Mockito.any(TracingContext.class)); + return createAnswer.callRealMethod(); + }; + + RenameAtomicityTestUtils.addCreatePathMock(client, + renamePendingJsonCreateAns); + fs.rename(src, dest); + + Assertions.assertThat(renamePendingJsonWriteCounter[0]) + .describedAs("Creation of RenamePendingJson should be attempted twice") .isEqualTo(2); } + + private void testAtomicityRedoInvalidFile(final AzureBlobFileSystem fs) + throws Exception { + AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore()); + AbfsBlobClient client = (AbfsBlobClient) addSpyHooksOnClient(fs); + + Path path = new Path("/hbase/test1/test2"); + fs.mkdirs(new Path(path, "test3")); + Path renameJson = new Path(path.getParent(), path.getName() + SUFFIX); + OutputStream os = fs.create(renameJson); + os.write("{".getBytes(StandardCharsets.UTF_8)); + os.close(); + + int[] renameJsonDeleteCounter = new int[1]; + renameJsonDeleteCounter[0] = 0; + Mockito.doAnswer(deleteAnswer -> { + Path ansPath = deleteAnswer.getArgument(0); + if (renameJson.toUri() + .getPath() + .equalsIgnoreCase(ansPath.toUri().getPath())) { + renameJsonDeleteCounter[0]++; + } + return deleteAnswer.callRealMethod(); + }) + .when(client) + .deleteBlobPath(Mockito.any(Path.class), Mockito.nullable(String.class), + Mockito.any(TracingContext.class)); + + new RenameAtomicity(renameJson, 1, + getTestTracingContext(fs, true), null, client).redo(); + + Assertions.assertThat(renameJsonDeleteCounter[0]) + .describedAs("RenamePendingJson should be deleted") + .isEqualTo(1); + Mockito.verify(client, Mockito.times(0)).copyBlob(Mockito.any(Path.class), + Mockito.any(Path.class), Mockito.nullable(String.class), + Mockito.any(TracingContext.class)); + } + + @Test + public void testRenameJsonDeletedBeforeRenameAtomicityCanDelete() + throws Exception { + final AzureBlobFileSystem fs = Mockito.spy(this.getFileSystem()); + assumeNonHnsAccountBlobEndpoint(fs); + AbfsBlobClient client = (AbfsBlobClient) addSpyHooksOnClient(fs); + + fs.setWorkingDirectory(new Path(ROOT_PATH)); + + Path path = new Path("/hbase/test1/test2"); + fs.mkdirs(new Path(path, "test3")); + Path renameJson = new Path(path.getParent(), path.getName() + SUFFIX); + OutputStream os = fs.create(renameJson); + os.write("{}".getBytes(StandardCharsets.UTF_8)); + os.close(); + + int[] renameJsonDeleteCounter = new int[1]; + renameJsonDeleteCounter[0] = 0; + Mockito.doAnswer(deleteAnswer -> { + Path ansPath = deleteAnswer.getArgument(0); + if (renameJson.toUri() + .getPath() + .equalsIgnoreCase(ansPath.toUri().getPath())) { + renameJsonDeleteCounter[0]++; + } + getFileSystem().delete(ansPath, true); + return deleteAnswer.callRealMethod(); + }) + .when(client) + .deleteBlobPath(Mockito.any(Path.class), Mockito.nullable(String.class), + Mockito.any(TracingContext.class)); + + new RenameAtomicity(renameJson, 2, + getTestTracingContext(fs, true), null, client); + } + + @Test + public void testRenameCompleteBeforeRenameAtomicityRedo() throws Exception { + final AzureBlobFileSystem fs = Mockito.spy(this.getFileSystem()); + assumeNonHnsAccountBlobEndpoint(fs); + AbfsBlobClient client = (AbfsBlobClient) addSpyHooksOnClient(fs); + + fs.setWorkingDirectory(new Path(ROOT_PATH)); + + Path path = new Path("/hbase/test1/test2"); + fs.mkdirs(new Path(path, "test3")); + Path renameJson = new Path(path.getParent(), path.getName() + SUFFIX); + + /* + * Create renameJson file. + */ + AzureBlobFileSystemStore.VersionedFileStatus fileStatus + = (AzureBlobFileSystemStore.VersionedFileStatus) fs.getFileStatus(path); + int jsonLen = new RenameAtomicity(path, + new Path("/hbase/test4"), renameJson, + getTestTracingContext(fs, true), fileStatus.getEtag(), client).preRename(); + + RenameAtomicity redoRenameAtomicity = Mockito.spy( + new RenameAtomicity(renameJson, jsonLen, + getTestTracingContext(fs, true), null, client)); + RenameAtomicityTestUtils.addReadPathMock(redoRenameAtomicity, + readCallbackAnswer -> { + byte[] bytes = (byte[]) readCallbackAnswer.callRealMethod(); + fs.delete(path, true); + return bytes; + }); + AbfsRestOperationException ex = intercept(AbfsRestOperationException.class, + () -> { + redoRenameAtomicity.redo(); + }); + Assertions.assertThat(ex.getStatusCode()) + .describedAs("RenameAtomicity redo should fail with 404") + .isEqualTo(SOURCE_PATH_NOT_FOUND.getStatusCode()); + Assertions.assertThat(ex.getErrorCode()) + .describedAs("RenameAtomicity redo should fail with 404") + .isEqualTo(SOURCE_PATH_NOT_FOUND); + } + + @Test + public void testCopyBlobIdempotency() throws Exception { + final AzureBlobFileSystem fs = Mockito.spy(this.getFileSystem()); + assumeNonHnsAccountBlobEndpoint(fs); + AbfsBlobClient client = (AbfsBlobClient) addSpyHooksOnClient(fs); + + fs.setWorkingDirectory(new Path(ROOT_PATH)); + Path src = new Path("/srcDir/src"); + Path dst = new Path("/dst"); + fs.create(src); + + Mockito.doAnswer(answer -> { + Path srcCopy = answer.getArgument(0); + Path dstCopy = answer.getArgument(1); + String leaseId = answer.getArgument(2); + TracingContext tracingContext = answer.getArgument(3); + /* + * To fail copyBlob with idempotency issue, making a copy of the source to destination + * before the invoked copy + */ + ((AbfsBlobClient) getFileSystem().getAbfsClient()).copyBlob(srcCopy, + dstCopy, leaseId, tracingContext); + return answer.callRealMethod(); + }).when(client).copyBlob(Mockito.any(Path.class), Mockito.any(Path.class), + Mockito.nullable(String.class), + Mockito.any(TracingContext.class)); + + Assertions.assertThat(fs.rename(src, dst)) + .describedAs("Rename should be successful and copyBlob should" + + "be able to handle idempotency issue") + .isTrue(); + + Assertions.assertThat(fs.exists(src)) + .describedAs("Source should not exist after rename") + .isFalse(); + + Assertions.assertThat(fs.exists(dst)) + .describedAs("Destination should exist after rename") + .isTrue(); + } + + @Test + public void testRenameBlobIdempotencyWhereDstIsCreatedFromSomeOtherProcess() + throws IOException { + final AzureBlobFileSystem fs = Mockito.spy(this.getFileSystem()); + assumeNonHnsAccountBlobEndpoint(fs); + AbfsBlobClient client = (AbfsBlobClient) addSpyHooksOnClient(fs); + + fs.setWorkingDirectory(new Path(ROOT_PATH)); + Path src = new Path("/src"); + Path dst = new Path("/dst"); + fs.create(src); + + Mockito.doAnswer(answer -> { + Path dstCopy = answer.getArgument(1); + fs.create(dstCopy); + return answer.callRealMethod(); + }).when(client).copyBlob(Mockito.any(Path.class), Mockito.any(Path.class), + Mockito.nullable(String.class), + Mockito.any(TracingContext.class)); + + Assertions.assertThat(fs.rename(src, dst)) + .describedAs("Rename should be successful and copyBlob should" + + "be able to handle idempotency issue") + .isFalse(); + + Assertions.assertThat(fs.exists(src)) + .describedAs("Source should exist after rename failure") + .isTrue(); + } + + @Test + public void testRenameDirWhenMarkerBlobIsAbsentOnDstDir() throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + assumeNonHnsAccountBlobEndpoint(fs); + fs.mkdirs(new Path("/test1")); + fs.mkdirs(new Path("/test1/test2")); + fs.mkdirs(new Path("/test1/test2/test3")); + fs.create(new Path("/test1/test2/test3/file")); + + ((AbfsBlobClient) fs.getAbfsClient()) + .deleteBlobPath(new Path("/test1/test2"), + null, getTestTracingContext(fs, true)); + fs.mkdirs(new Path("/test4/test5")); + fs.rename(new Path("/test4"), new Path("/test1/test2")); + + assertTrue(fs.exists(new Path("/test1/test2/test4/test5"))); + } + + @Test + public void testBlobRenameSrcDirHasNoMarker() throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + assumeNonHnsAccountBlobEndpoint(fs); + fs.create(new Path("/test1/test2/file1")); + ((AbfsBlobClient) fs.getAbfsStore().getClient()) + .deleteBlobPath(new Path("/test1"), null, + getTestTracingContext(fs, true)); + fs.mkdirs(new Path("/test2")); + fs.rename(new Path("/test1"), new Path("/test2")); + assertTrue(fs.exists(new Path("/test2/test1"))); + } + + private void addMockForProgressStatusOnCopyOperation(final AbfsBlobClient spiedClient) + throws AzureBlobFileSystemException { + Mockito.doAnswer(answer -> { + AbfsRestOperation op = Mockito.spy( + (AbfsRestOperation) answer.callRealMethod()); + AbfsHttpOperation httpOp = Mockito.spy(op.getResult()); + Mockito.doReturn(COPY_STATUS_PENDING).when(httpOp).getResponseHeader( + HttpHeaderConfigurations.X_MS_COPY_STATUS); + Mockito.doReturn(httpOp).when(op).getResult(); + return op; + }) + .when(spiedClient) + .copyBlob(Mockito.any(Path.class), Mockito.any(Path.class), + Mockito.nullable(String.class), Mockito.any(TracingContext.class)); + } + + @Test + public void testCopyBlobTakeTime() throws Exception { + AzureBlobFileSystem fileSystem = Mockito.spy(getFileSystem()); + assumeNonHnsAccountBlobEndpoint(fileSystem); + AbfsBlobClient spiedClient = (AbfsBlobClient) addSpyHooksOnClient( + fileSystem); + + addMockForProgressStatusOnCopyOperation(spiedClient); + fileSystem.create(new Path("/test1/file")); + + BlobRenameHandler[] blobRenameHandlers = new BlobRenameHandler[1]; + AbfsClientTestUtil.mockGetRenameBlobHandler(spiedClient, + blobRenameHandler -> { + blobRenameHandlers[0] = blobRenameHandler; + return null; + }); + + fileSystem.rename(new Path("/test1/file"), new Path("/test1/file2")); + Assert.assertTrue(fileSystem.exists(new Path("/test1/file2"))); + + Mockito.verify(blobRenameHandlers[0], Mockito.times(1)) + .handleCopyInProgress(Mockito.any(Path.class), + Mockito.any(TracingContext.class), Mockito.any(String.class)); + } + + private void addMockForCopyOperationFinalStatus(final AbfsBlobClient spiedClient, + final String requiredCopyFinalStatus) { + AbfsClientTestUtil.mockGetRenameBlobHandler(spiedClient, + blobRenameHandler -> { + + Mockito.doAnswer(onHandleCopyInProgress -> { + Path handlePath = onHandleCopyInProgress.getArgument(0); + TracingContext tracingContext = onHandleCopyInProgress.getArgument( + 1); + Mockito.doAnswer(onStatusCheck -> { + AbfsRestOperation op = Mockito.spy( + (AbfsRestOperation) onStatusCheck.callRealMethod()); + AbfsHttpOperation httpOp = Mockito.spy(op.getResult()); + Mockito.doReturn(requiredCopyFinalStatus) + .when(httpOp) + .getResponseHeader( + HttpHeaderConfigurations.X_MS_COPY_STATUS); + Mockito.doReturn(httpOp).when(op).getResult(); + return op; + }) + .when(spiedClient) + .getPathStatus(handlePath.toUri().getPath(), + tracingContext, null, false); + return onHandleCopyInProgress.callRealMethod(); + }) + .when(blobRenameHandler) + .handleCopyInProgress(Mockito.any(Path.class), + Mockito.any(TracingContext.class), Mockito.any(String.class)); + return null; + }); + } + + @Test + public void testCopyBlobTakeTimeAndEventuallyFail() throws Exception { + AzureBlobFileSystem fileSystem = Mockito.spy(getFileSystem()); + assumeNonHnsAccountBlobEndpoint(fileSystem); + AbfsBlobClient spiedClient = (AbfsBlobClient) addSpyHooksOnClient( + fileSystem); + + addMockForProgressStatusOnCopyOperation(spiedClient); + fileSystem.create(new Path("/test1/file")); + + final String requiredCopyFinalStatus = COPY_STATUS_FAILED; + addMockForCopyOperationFinalStatus(spiedClient, requiredCopyFinalStatus); + + AbfsRestOperationException ex = intercept(AbfsRestOperationException.class, + () -> { + fileSystem.rename(new Path("/test1/file"), new Path("/test1/file2")); + }); + Assertions.assertThat(ex.getStatusCode()) + .describedAs("Expecting COPY_FAILED status code") + .isEqualTo(COPY_BLOB_FAILED.getStatusCode()); + Assertions.assertThat(ex.getErrorCode()) + .describedAs("Expecting COPY_FAILED error code") + .isEqualTo(COPY_BLOB_FAILED); + } + + @Test + public void testCopyBlobTakeTimeAndEventuallyAborted() throws Exception { + AzureBlobFileSystem fileSystem = Mockito.spy(getFileSystem()); + assumeNonHnsAccountBlobEndpoint(fileSystem); + AbfsBlobClient spiedClient = (AbfsBlobClient) addSpyHooksOnClient( + fileSystem); + + addMockForProgressStatusOnCopyOperation(spiedClient); + fileSystem.create(new Path("/test1/file")); + + final String requiredCopyFinalStatus = COPY_STATUS_ABORTED; + addMockForCopyOperationFinalStatus(spiedClient, requiredCopyFinalStatus); + + AbfsRestOperationException ex = intercept(AbfsRestOperationException.class, + () -> { + fileSystem.rename(new Path("/test1/file"), new Path("/test1/file2")); + }); + Assertions.assertThat(ex.getStatusCode()) + .describedAs("Expecting COPY_ABORTED status code") + .isEqualTo(COPY_BLOB_ABORTED.getStatusCode()); + Assertions.assertThat(ex.getErrorCode()) + .describedAs("Expecting COPY_ABORTED error code") + .isEqualTo(COPY_BLOB_ABORTED); + } + + @Test + public void testCopyBlobTakeTimeAndBlobIsDeleted() throws Exception { + AzureBlobFileSystem fileSystem = Mockito.spy(getFileSystem()); + assumeNonHnsAccountBlobEndpoint(fileSystem); + AbfsBlobClient spiedClient = (AbfsBlobClient) addSpyHooksOnClient( + fileSystem); + String srcFile = "/test1/file"; + String dstFile = "/test1/file2"; + + Mockito.doAnswer(answer -> { + AbfsRestOperation op = Mockito.spy( + (AbfsRestOperation) answer.callRealMethod()); + fileSystem.delete(new Path(dstFile), false); + AbfsHttpOperation httpOp = Mockito.spy(op.getResult()); + Mockito.doReturn(COPY_STATUS_PENDING).when(httpOp).getResponseHeader( + HttpHeaderConfigurations.X_MS_COPY_STATUS); + Mockito.doReturn(httpOp).when(op).getResult(); + return op; + }) + .when(spiedClient) + .copyBlob(Mockito.any(Path.class), Mockito.any(Path.class), + Mockito.nullable(String.class), Mockito.any(TracingContext.class)); + + fileSystem.create(new Path(srcFile)); + + assertFalse(fileSystem.rename(new Path(srcFile), new Path(dstFile))); + assertFalse(fileSystem.exists(new Path(dstFile))); + } + + @Test + public void testCopyAfterSourceHasBeenDeleted() throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + assumeNonHnsAccountBlobEndpoint(fs); + AbfsBlobClient client = (AbfsBlobClient) fs.getAbfsClient(); + fs.create(new Path("/src")); + TracingContext tracingContext = new TracingContext("clientCorrelationId", + "fileSystemId", FSOperationType.TEST_OP, + getConfiguration().getTracingHeaderFormat(), + null); + client.deleteBlobPath(new Path("/src"), null, + getTestTracingContext(fs, true)); + Boolean srcBlobNotFoundExReceived = false; + + AbfsRestOperationException ex = intercept(AbfsRestOperationException.class, + () -> { + client.copyBlob(new Path("/src"), new Path("/dst"), + null, getTestTracingContext(fs, true)); + }); + Assertions.assertThat(ex.getStatusCode()) + .describedAs("Source has to be not found at copy") + .isEqualTo(HTTP_NOT_FOUND); + } + + @Test + public void testParallelRenameForAtomicRenameShouldFail() throws Exception { + Configuration config = getRawConfiguration(); + config.set(FS_AZURE_LEASE_THREADS, "2"); + AzureBlobFileSystem fs = Mockito.spy( + (AzureBlobFileSystem) FileSystem.newInstance(config)); + assumeNonHnsAccountBlobEndpoint(fs); + fs.setWorkingDirectory(new Path(ROOT_PATH)); + AbfsBlobClient client = (AbfsBlobClient) addSpyHooksOnClient(fs); + + Path src = new Path("/hbase/src"); + Path dst = new Path("/hbase/dst"); + fs.mkdirs(src); + + AtomicBoolean leaseAcquired = new AtomicBoolean(false); + AtomicBoolean exceptionOnParallelRename = new AtomicBoolean(false); + AtomicBoolean parallelThreadDone = new AtomicBoolean(false); + Mockito.doAnswer(answer -> { + AbfsRestOperation op = (AbfsRestOperation) answer.callRealMethod(); + leaseAcquired.set(true); + while (!parallelThreadDone.get()) ; + return op; + }) + .when(client) + .acquireLease(Mockito.anyString(), Mockito.anyInt(), + Mockito.nullable(String.class), + Mockito.any(TracingContext.class)); + + new Thread(() -> { + while (!leaseAcquired.get()) ; + try { + fs.rename(src, dst); + } catch (Exception e) { + if (e.getCause() instanceof AbfsLease.LeaseException + && e.getCause().getCause() instanceof AbfsRestOperationException && + ((AbfsRestOperationException) e.getCause() + .getCause()).getStatusCode() == HTTP_CONFLICT) { + exceptionOnParallelRename.set(true); + } + } finally { + parallelThreadDone.set(true); + } + }).start(); + fs.rename(src, dst); + while (!parallelThreadDone.get()) ; + Assertions.assertThat(exceptionOnParallelRename.get()) + .describedAs("Parallel rename should fail") + .isTrue(); + } + + @Test + public void testAppendAtomicBlobDuringRename() throws Exception { + AzureBlobFileSystem fs = Mockito.spy(getFileSystem()); + assumeNonHnsAccountBlobEndpoint(fs); + AbfsBlobClient client = (AbfsBlobClient) addSpyHooksOnClient(fs); + + Path src = new Path("/hbase/src"); + Path dst = new Path("/hbase/dst"); + FSDataOutputStream os = fs.create(src); + + AtomicBoolean copyInProgress = new AtomicBoolean(false); + AtomicBoolean outputStreamClosed = new AtomicBoolean(false); + AtomicBoolean appendFailed = new AtomicBoolean(false); + Mockito.doAnswer(answer -> { + copyInProgress.set(true); + while (!outputStreamClosed.get()) ; + return answer.callRealMethod(); + }).when(client).copyBlob(Mockito.any(Path.class), Mockito.any(Path.class), + Mockito.nullable(String.class), Mockito.any(TracingContext.class)); + new Thread(() -> { + while (!copyInProgress.get()) ; + try { + os.write(1); + os.close(); + } catch (IOException e) { + appendFailed.set(true); + } finally { + outputStreamClosed.set(true); + } + }).start(); + + fs.rename(src, dst); + + Assertions.assertThat(appendFailed.get()) + .describedAs("Append should fail") + .isTrue(); + } + + @Test + public void testBlobRenameOfDirectoryHavingNeighborWithSamePrefix() + throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + assumeNonHnsAccountBlobEndpoint(fs); + fs.mkdirs(new Path("/testDir/dir")); + fs.mkdirs(new Path("/testDir/dirSamePrefix")); + fs.create(new Path("/testDir/dir/file1")); + fs.create(new Path("/testDir/dir/file2")); + + fs.create(new Path("/testDir/dirSamePrefix/file1")); + fs.create(new Path("/testDir/dirSamePrefix/file2")); + + fs.rename(new Path("/testDir/dir"), new Path("/testDir/dir2")); + + Assertions.assertThat(fs.exists(new Path("/testDir/dirSamePrefix/file1"))) + .isTrue(); + Assertions.assertThat(fs.exists(new Path("/testDir/dir/file1"))) + .isFalse(); + Assertions.assertThat(fs.exists(new Path("/testDir/dir/file2"))) + .isFalse(); + Assertions.assertThat(fs.exists(new Path("/testDir/dir/"))) + .isFalse(); + } + + @Test + public void testBlobRenameWithListGivingPaginatedResultWithOneObjectPerList() + throws Exception { + AzureBlobFileSystem fs = Mockito.spy(getFileSystem()); + assumeNonHnsAccountBlobEndpoint(fs); + AbfsBlobClient spiedClient = (AbfsBlobClient) addSpyHooksOnClient(fs); + + fs.mkdirs(new Path("/testDir/dir1")); + for (int i = 0; i < 10; i++) { + fs.create(new Path("/testDir/dir1/file" + i)); + } + + Mockito.doAnswer(answer -> { + String path = answer.getArgument(0); + boolean recursive = answer.getArgument(1); + String continuation = answer.getArgument(3); + TracingContext context = answer.getArgument(4); + + return getFileSystem().getAbfsClient() + .listPath(path, recursive, 1, continuation, context); + }) + .when(spiedClient) + .listPath(Mockito.anyString(), Mockito.anyBoolean(), Mockito.anyInt(), + Mockito.nullable(String.class), + Mockito.any(TracingContext.class)); + + fs.rename(new Path("/testDir/dir1"), new Path("/testDir/dir2")); + + for (int i = 0; i < 10; i++) { + Assertions.assertThat(fs.exists(new Path("/testDir/dir2/file" + i))) + .describedAs("File " + i + " should exist in /testDir/dir2") + .isTrue(); + } + } + + /** + * Assert that Rename operation failure should stop List producer. + */ + @Test + public void testProducerStopOnRenameFailure() throws Exception { + AzureBlobFileSystem fs = Mockito.spy(getFileSystem()); + assumeNonHnsAccountBlobEndpoint(fs); + + fs.mkdirs(new Path("/src")); + ExecutorService executorService = Executors.newFixedThreadPool(10); + List futureList = new ArrayList<>(); + for (int i = 0; i < 20; i++) { + int iter = i; + Future future = executorService.submit(() -> { + try { + fs.create(new Path("/src/file" + iter)); + } catch (IOException ex) {} + }); + futureList.add(future); + } + + for (Future future : futureList) { + future.get(); + } + + AbfsBlobClient client = (AbfsBlobClient) fs.getAbfsClient(); + AbfsBlobClient spiedClient = Mockito.spy(client); + AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore()); + store.setClient(spiedClient); + Mockito.doReturn(store).when(fs).getAbfsStore(); + + final int[] copyCallInvocation = new int[1]; + copyCallInvocation[0] = 0; + Mockito.doAnswer(answer -> { + throw new AbfsRestOperationException(HTTP_FORBIDDEN, "", "", + new Exception()); + }).when(spiedClient) + .copyBlob(Mockito.any(Path.class), Mockito.any(Path.class), + Mockito.nullable(String.class), Mockito.any(TracingContext.class)); + + AbfsClientTestUtil.mockGetRenameBlobHandler(spiedClient, + (blobRenameHandler) -> { + Mockito.doAnswer(answer -> { + try { + answer.callRealMethod(); + } catch (AbfsRestOperationException ex) { + if (ex.getStatusCode() == HTTP_FORBIDDEN) { + copyCallInvocation[0]++; + } + throw ex; + } + throw new AssertionError("List Consumption should have failed"); + }) + .when(blobRenameHandler).listRecursiveAndTakeAction(); + return null; + }); + + final int[] listCallInvocation = new int[1]; + listCallInvocation[0] = 0; + Mockito.doAnswer(answer -> { + if (answer.getArgument(0).equals("/src")) { + if (listCallInvocation[0] == 1) { + while (copyCallInvocation[0] == 0) ; + } + listCallInvocation[0]++; + return getFileSystem().getAbfsClient().listPath(answer.getArgument(0), + answer.getArgument(1), 1, + answer.getArgument(3), answer.getArgument(4)); + } + return answer.callRealMethod(); + }) + .when(spiedClient) + .listPath(Mockito.anyString(), Mockito.anyBoolean(), Mockito.anyInt(), + Mockito.nullable(String.class), Mockito.any(TracingContext.class)); + + intercept(AccessDeniedException.class, + () -> { + fs.rename(new Path("/src"), new Path("/dst")); + }); + + Assertions.assertThat(listCallInvocation[0]) + .describedAs("List on src should have been invoked at-most twice." + + "One before consumption and the other after consumption has starting." + + "Once consumption fails, listing would be stopped.") + .isLessThanOrEqualTo(2); + } + + @Test + public void testRenameResumeThroughListStatusWithSrcDirDeletedJustBeforeResume() + throws Exception { + AzureBlobFileSystem fs = Mockito.spy(getFileSystem()); + assumeNonHnsAccountBlobEndpoint(fs); + AbfsBlobClient client = (AbfsBlobClient) addSpyHooksOnClient(fs); + fs.setWorkingDirectory(new Path(ROOT_PATH)); + + Path srcPath = new Path("hbase/test1/"); + Path failurePath = new Path(srcPath, "file"); + fs.mkdirs(srcPath); + fs.create(failurePath); + + crashRename(fs, client, srcPath.toUri().getPath()); + fs.delete(srcPath, true); + AtomicInteger copiedBlobs = new AtomicInteger(0); + Mockito.doAnswer(answer -> { + copiedBlobs.incrementAndGet(); + return answer.callRealMethod(); + }).when(client).copyBlob(Mockito.any(Path.class), Mockito.any(Path.class), + Mockito.nullable(String.class), Mockito.any(TracingContext.class)); + fs.listStatus(new Path("hbase")); + + Assertions.assertThat(copiedBlobs.get()) + .describedAs("No Copy on resume") + .isEqualTo(0); + } + + @Test + public void testRenameResumeThroughListStatusWithSrcDirETagChangedJustBeforeResume() + throws Exception { + AzureBlobFileSystem fs = Mockito.spy(getFileSystem()); + assumeNonHnsAccountBlobEndpoint(fs); + AbfsBlobClient client = (AbfsBlobClient) addSpyHooksOnClient(fs); + fs.setWorkingDirectory(new Path(ROOT_PATH)); + + Path srcPath = new Path("hbase/test1/"); + Path failurePath = new Path(srcPath, "file"); + fs.mkdirs(srcPath); + fs.create(failurePath); + + crashRename(fs, client, srcPath.toUri().getPath() + ); + fs.delete(srcPath, true); + fs.mkdirs(srcPath); + AtomicInteger copiedBlobs = new AtomicInteger(0); + Mockito.doAnswer(answer -> { + copiedBlobs.incrementAndGet(); + return answer.callRealMethod(); + }).when(client).copyBlob(Mockito.any(Path.class), Mockito.any(Path.class), + Mockito.nullable(String.class), Mockito.any(TracingContext.class)); + + AtomicInteger pendingJsonDeleted = new AtomicInteger(0); + Mockito.doAnswer(listAnswer -> { + Path path = listAnswer.getArgument(0); + if (path.toUri().getPath().endsWith(SUFFIX)) { + pendingJsonDeleted.incrementAndGet(); + } + return listAnswer.callRealMethod(); + }) + .when(client) + .deleteBlobPath(Mockito.any(Path.class), Mockito.nullable(String.class), + Mockito.any(TracingContext.class)); + + fs.listStatus(new Path("/hbase")); + + Assertions.assertThat(copiedBlobs.get()) + .describedAs("No Copy on resume") + .isEqualTo(0); + Assertions.assertThat(pendingJsonDeleted.get()) + .describedAs("RenamePendingJson should be deleted") + .isEqualTo(1); + } + + @Test + public void testRenameResumeThroughGetStatusWithSrcDirETagChangedJustBeforeResume() + throws Exception { + AzureBlobFileSystem fs = Mockito.spy(getFileSystem()); + assumeNonHnsAccountBlobEndpoint(fs); + AbfsBlobClient client = (AbfsBlobClient) addSpyHooksOnClient(fs); + fs.setWorkingDirectory(new Path(ROOT_PATH)); + + Path srcPath = new Path("hbase/test1/"); + Path failurePath = new Path(srcPath, "file"); + fs.mkdirs(srcPath); + fs.create(failurePath); + + crashRename(fs, client, srcPath.toUri().getPath() + ); + fs.delete(srcPath, true); + fs.mkdirs(srcPath); + AtomicInteger copiedBlobs = new AtomicInteger(0); + Mockito.doAnswer(answer -> { + copiedBlobs.incrementAndGet(); + return answer.callRealMethod(); + }).when(client).copyBlob(Mockito.any(Path.class), Mockito.any(Path.class), + Mockito.nullable(String.class), Mockito.any(TracingContext.class)); + + AtomicInteger pendingJsonDeleted = new AtomicInteger(0); + Mockito.doAnswer(listAnswer -> { + Path path = listAnswer.getArgument(0); + if (path.toUri().getPath().endsWith(SUFFIX)) { + pendingJsonDeleted.incrementAndGet(); + } + return listAnswer.callRealMethod(); + }) + .when(client) + .deleteBlobPath(Mockito.any(Path.class), Mockito.nullable(String.class), + Mockito.any(TracingContext.class)); + + Assertions.assertThat(fs.exists(srcPath)) + .describedAs("Source should exist") + .isTrue(); + + Assertions.assertThat(copiedBlobs.get()) + .describedAs("No Copy on resume") + .isEqualTo(0); + Assertions.assertThat(pendingJsonDeleted.get()) + .describedAs("RenamePendingJson should be deleted") + .isEqualTo(1); + } + + /** + * Test to assert that the CID in src marker blob copy and delete contains the + * total number of blobs operated in the rename directory. + * Also, to assert that all operations in the rename-directory flow have same + * primaryId and opType. + */ + @Test + public void testRenameSrcDirDeleteEmitDeletionCountInClientRequestId() + throws Exception { + AzureBlobFileSystem fs = Mockito.spy(getFileSystem()); + assumeNonHnsAccountBlobEndpoint(fs); + AbfsBlobClient client = (AbfsBlobClient) addSpyHooksOnClient(fs); + + String dirPathStr = "/testDir/dir1"; + fs.mkdirs(new Path(dirPathStr)); + ExecutorService executorService = Executors.newFixedThreadPool(5); + List futures = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + final int iter = i; + Future future = executorService.submit(() -> { + return fs.create(new Path("/testDir/dir1/file" + iter)); + }); + futures.add(future); + } + + for (Future future : futures) { + future.get(); + } + executorService.shutdown(); + + final TracingHeaderValidator tracingHeaderValidator + = new TracingHeaderValidator( + fs.getAbfsStore().getAbfsConfiguration().getClientCorrelationId(), + fs.getFileSystemId(), FSOperationType.RENAME, true, 0); + fs.registerListener(tracingHeaderValidator); + + Mockito.doAnswer(copyAnswer -> { + if (dirPathStr.equalsIgnoreCase( + ((Path) copyAnswer.getArgument(0)).toUri().getPath())) { + tracingHeaderValidator.setOperatedBlobCount(11); + return copyAnswer.callRealMethod(); + } + return copyAnswer.callRealMethod(); + }) + .when(client) + .copyBlob(Mockito.any(Path.class), Mockito.any(Path.class), + Mockito.nullable(String.class), + Mockito.any(TracingContext.class)); + + Mockito.doAnswer(deleteAnswer -> { + if (dirPathStr.equalsIgnoreCase( + ((Path) deleteAnswer.getArgument(0)).toUri().getPath())) { + Object result = deleteAnswer.callRealMethod(); + tracingHeaderValidator.setOperatedBlobCount(null); + return result; + } + return deleteAnswer.callRealMethod(); + }) + .when(client) + .deleteBlobPath(Mockito.any(Path.class), + Mockito.nullable(String.class), + Mockito.any(TracingContext.class)); + + fs.rename(new Path(dirPathStr), new Path("/dst/")); + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRenameUnicode.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRenameUnicode.java index f913da7b15ed0..a64fab2ee6e6a 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRenameUnicode.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRenameUnicode.java @@ -20,19 +20,26 @@ import java.util.Arrays; +import org.assertj.core.api.Assertions; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathIOException; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; +import org.apache.hadoop.fs.azurebfs.services.AbfsBlobClient; +import static java.net.HttpURLConnection.HTTP_BAD_REQUEST; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.COLON; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertIsDirectory; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertIsFile; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertMkdirs; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertPathDoesNotExist; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertPathExists; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertRenameOutcome; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; /** * Parameterized test of rename operations of unicode paths. @@ -84,6 +91,19 @@ public void testRenameFileUsingUnicode() throws Exception { assertIsFile(fs, filePath); Path folderPath2 = new Path(destDir); + if (getFileSystem().getAbfsClient() instanceof AbfsBlobClient + && destDir.contains(COLON)) { + AbfsRestOperationException ex = intercept( + AbfsRestOperationException.class, () -> { + fs.rename(folderPath1, folderPath2); + return null; + }); + Assertions.assertThat(ex.getCause()) + .isInstanceOf(PathIOException.class); + Assertions.assertThat(ex.getStatusCode()) + .isEqualTo(HTTP_BAD_REQUEST); + return; + } assertRenameOutcome(fs, folderPath1, folderPath2, true); assertPathDoesNotExist(fs, "renamed", folderPath1); assertIsDirectory(fs, folderPath2); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java index 2b60cb57fdf39..fac0e3d754808 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java @@ -37,6 +37,7 @@ import org.mockito.stubbing.Answer; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.util.functional.CallableRaisingIOE; import org.apache.hadoop.util.functional.FunctionRaisingIOE; import static java.net.HttpURLConnection.HTTP_OK; @@ -201,4 +202,37 @@ public Object answer(final InvocationOnMock invocationOnMock) Mockito.anyString(), Mockito.any(URL.class), Mockito.anyList(), Mockito.nullable(String.class)); } + + public static void mockGetDeleteBlobHandler(AbfsBlobClient blobClient, + FunctionRaisingIOE functionRaisingIOE) { + Mockito.doAnswer(answer -> { + BlobDeleteHandler blobDeleteHandler = Mockito.spy( + (BlobDeleteHandler) answer.callRealMethod()); + Mockito.doAnswer(answer1 -> { + functionRaisingIOE.apply(blobDeleteHandler); + return answer1.callRealMethod(); + }).when(blobDeleteHandler).execute(); + return blobDeleteHandler; + }) + .when(blobClient) + .getBlobDeleteHandler(Mockito.anyString(), Mockito.anyBoolean(), + Mockito.any(TracingContext.class)); + } + + public static void mockGetRenameBlobHandler(AbfsBlobClient blobClient, + FunctionRaisingIOE functionRaisingIOE) { + Mockito.doAnswer(answer -> { + BlobRenameHandler blobRenameHandler = Mockito.spy( + (BlobRenameHandler) answer.callRealMethod()); + Mockito.doAnswer(answer1 -> { + functionRaisingIOE.apply(blobRenameHandler); + return answer1.callRealMethod(); + }).when(blobRenameHandler).execute(); + return blobRenameHandler; + }) + .when(blobClient) + .getBlobRenameHandler(Mockito.anyString(), Mockito.anyString(), + Mockito.nullable(String.class), Mockito.anyBoolean(), + Mockito.any(TracingContext.class)); + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java index f3a1bf18891cc..49a58bbde43c7 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java @@ -366,7 +366,6 @@ public static AbfsClient getMockAbfsClient(AbfsClient baseAbfsClientInstance, (currentAuthType == AuthType.SharedKey) || (currentAuthType == AuthType.OAuth)); - // Todo: [FnsOverBlob] Update later to work with Blob Endpoint as well. AbfsClient client = mock(AbfsDfsClient.class); AbfsPerfTracker tracker = new AbfsPerfTracker( "test", diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsPaginatedDelete.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsPaginatedDelete.java index 5dd92f430e059..c34c6cfa013e7 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsPaginatedDelete.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsPaginatedDelete.java @@ -23,6 +23,7 @@ import java.util.UUID; import org.assertj.core.api.Assertions; +import org.junit.Assume; import org.junit.Test; import org.mockito.Mockito; @@ -141,6 +142,8 @@ private void setTestUserConf(Configuration conf, String key, String value) { */ @Test public void testRecursiveDeleteWithPagination() throws Exception { + Assume.assumeTrue( + getFileSystem().getAbfsStore().getClient() instanceof AbfsDfsClient); testRecursiveDeleteWithPaginationInternal(false, true, AbfsHttpConstants.ApiVersion.DEC_12_2019); testRecursiveDeleteWithPaginationInternal(false, true, @@ -163,6 +166,8 @@ public void testRecursiveDeleteWithPagination() throws Exception { */ @Test public void testNonRecursiveDeleteWithPagination() throws Exception { + Assume.assumeTrue( + getFileSystem().getAbfsStore().getClient() instanceof AbfsDfsClient); testNonRecursiveDeleteWithPaginationInternal(true); testNonRecursiveDeleteWithPaginationInternal(false); } @@ -173,6 +178,8 @@ public void testNonRecursiveDeleteWithPagination() throws Exception { */ @Test public void testRecursiveDeleteWithInvalidCT() throws Exception { + Assume.assumeTrue( + getFileSystem().getAbfsStore().getClient() instanceof AbfsDfsClient); testRecursiveDeleteWithInvalidCTInternal(true); testRecursiveDeleteWithInvalidCTInternal(false); } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/RenameAtomicityTestUtils.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/RenameAtomicityTestUtils.java new file mode 100644 index 0000000000000..c6b31babd0aa3 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/RenameAtomicityTestUtils.java @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import org.mockito.Mockito; +import org.mockito.stubbing.Answer; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; + +public class RenameAtomicityTestUtils { + + /** + * Creates a spied object of {@link BlobRenameHandler} and {@link RenameAtomicity} + * and adds mocked behavior to {@link RenameAtomicity#createRenamePendingJson(Path, byte[])}. + * + * @param client client that would supply BlobRenameHandler and RenameAtomicity. + * @param answer mocked behavior for {@link RenameAtomicity#createRenamePendingJson(Path, byte[])}. + */ + public static void addCreatePathMock(AbfsBlobClient client, Answer answer) { + Mockito.doAnswer(clientHandlerAns -> { + BlobRenameHandler renameHandler = Mockito.spy( + (BlobRenameHandler) clientHandlerAns.callRealMethod()); + Mockito.doAnswer(getRenameAtomicityAns -> { + RenameAtomicity renameAtomicity = Mockito.spy( + (RenameAtomicity) getRenameAtomicityAns.callRealMethod()); + Mockito.doAnswer(answer) + .when(renameAtomicity) + .createRenamePendingJson(Mockito.any( + Path.class), Mockito.any(byte[].class)); + return renameAtomicity; + }) + .when(renameHandler) + .getRenameAtomicity(Mockito.any(PathInformation.class)); + return renameHandler; + }) + .when(client) + .getBlobRenameHandler(Mockito.anyString(), Mockito.anyString(), + Mockito.nullable(String.class), Mockito.anyBoolean(), Mockito.any( + TracingContext.class)); + } + + + /** + * Adds mocked behavior to {@link RenameAtomicity#readRenamePendingJson(Path, int)}. + * + * @param redoRenameAtomicity {@link RenameAtomicity} to be spied. + * @param answer mocked behavior for {@link RenameAtomicity#readRenamePendingJson(Path, int)}. + * + * @throws AzureBlobFileSystemException server error or error from mocked behavior. + */ + public static void addReadPathMock(RenameAtomicity redoRenameAtomicity, + Answer answer) + throws AzureBlobFileSystemException { + Mockito.doAnswer(answer) + .when(redoRenameAtomicity) + .readRenamePendingJson(Mockito.any(Path.class), Mockito.anyInt()); + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java index 1c53e62dd58bc..0b9f57b702b36 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java @@ -29,6 +29,7 @@ import org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys; import org.apache.hadoop.fs.statistics.IOStatistics; import org.assertj.core.api.Assertions; +import org.assertj.core.api.Assumptions; import org.junit.Assume; import org.junit.Test; import org.mockito.Mockito; @@ -429,6 +430,9 @@ public void testExistingPathCorrectlyRejected() throws Exception { @Test public void testRenameRecoveryUnsupportedForFlatNamespace() throws Exception { Assume.assumeTrue(!isNamespaceEnabled); + // In DFS endpoint, renamePath is O(1) API call and idempotency issue can happen. + // For blob endpoint, client orchestrates the rename operation. + Assumptions.assumeThat(getFileSystem().getAbfsStore().getClient() instanceof AbfsDfsClient).isTrue(); AzureBlobFileSystem fs = getFileSystem(); AzureBlobFileSystemStore abfsStore = fs.getAbfsStore(); TracingContext testTracingContext = getTestTracingContext(fs, false); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/DelegationSASGenerator.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/DelegationSASGenerator.java index 64aed941c1f2c..407464d11ad4f 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/DelegationSASGenerator.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/DelegationSASGenerator.java @@ -79,12 +79,19 @@ public String getDelegationSAS(String accountName, String containerName, String case SASTokenProvider.LIST_OPERATION: sp = "l"; break; + case SASTokenProvider.COPY_BLOB_DESTINATION: + sp = "w"; + break; + case SASTokenProvider.COPY_BLOB_SOURCE: + sp = "r"; + break; case SASTokenProvider.LIST_BLOB_OPERATION: sp = "l"; sr = "c"; break; case SASTokenProvider.GET_PROPERTIES_OPERATION: case SASTokenProvider.READ_OPERATION: + case SASTokenProvider.GET_BLOCK_LIST: sp = "r"; break; case SASTokenProvider.RENAME_DESTINATION_OPERATION: diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderValidator.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderValidator.java index 4e09e4b899728..994c698f632ee 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderValidator.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderValidator.java @@ -41,6 +41,8 @@ public class TracingHeaderValidator implements Listener { private String ingressHandler = null; private String position = null; + private Integer operatedBlobCount = null; + @Override public void callTracingHeaderValidator(String tracingContextHeader, TracingHeaderFormat format) { @@ -54,6 +56,7 @@ public TracingHeaderValidator getClone() { clientCorrelationId, fileSystemId, operation, needsPrimaryRequestId, retryNum, streamID); tracingHeaderValidator.primaryRequestId = primaryRequestId; + tracingHeaderValidator.operatedBlobCount = operatedBlobCount; tracingHeaderValidator.ingressHandler = ingressHandler; tracingHeaderValidator.position = position; return tracingHeaderValidator; @@ -82,6 +85,13 @@ private void validateTracingHeader(String tracingContextHeader) { if (format != TracingHeaderFormat.ALL_ID_FORMAT) { return; } + if (idList.length >= 8) { + if (operatedBlobCount != null) { + Assertions.assertThat(Integer.parseInt(idList[7])) + .describedAs("OperatedBlobCount is incorrect") + .isEqualTo(operatedBlobCount); + } + } if (!primaryRequestId.isEmpty() && !idList[3].isEmpty()) { Assertions.assertThat(idList[3]) .describedAs("PrimaryReqID should be common for these requests") @@ -96,7 +106,7 @@ private void validateTracingHeader(String tracingContextHeader) { private void validateBasicFormat(String[] idList) { if (format == TracingHeaderFormat.ALL_ID_FORMAT) { - int expectedSize = 7; + int expectedSize = operatedBlobCount == null ? 7 : 8; if (ingressHandler != null) { expectedSize += 2; } @@ -162,6 +172,10 @@ public void updatePrimaryRequestID(String primaryRequestId) { this.primaryRequestId = primaryRequestId; } + public void setOperatedBlobCount(Integer operatedBlobCount) { + this.operatedBlobCount = operatedBlobCount; + } + @Override public void updateIngressHandler(String ingressHandler) { this.ingressHandler = ingressHandler;