diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java index 3c99eb4070d8c..88f5de8686011 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java @@ -478,9 +478,9 @@ public boolean isDfsToBlobFallbackEnabled() { public void validateConfiguredServiceType(boolean isHNSEnabled) throws InvalidConfigurationValueException { // Todo: [FnsOverBlob] - Remove this check, Failing FS Init with Blob Endpoint Until FNS over Blob is ready. - if (getFsConfiguredServiceType() == AbfsServiceType.BLOB) { - throw new InvalidConfigurationValueException(FS_DEFAULT_NAME_KEY, "Blob Endpoint Support not yet available"); - } +// if (getFsConfiguredServiceType() == AbfsServiceType.BLOB) { +// throw new InvalidConfigurationValueException(FS_DEFAULT_NAME_KEY, "Blob Endpoint Support not yet available"); +// } if (isHNSEnabled && getConfiguredServiceTypeForFNSAccounts() == AbfsServiceType.BLOB) { throw new InvalidConfigurationValueException( FS_AZURE_FNS_ACCOUNT_SERVICE_TYPE, "Cannot be BLOB for HNS Account"); @@ -758,6 +758,10 @@ public boolean isSmallWriteOptimizationEnabled() { return this.enableSmallWriteOptimization; } + public void setSmallWriteOptimization(final boolean enableSmallWriteOptimization) { + this.enableSmallWriteOptimization = enableSmallWriteOptimization; + } + public boolean readSmallFilesCompletely() { return this.readSmallFilesCompletely; } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index a238347ed0b42..6f577f63d3d2a 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -607,9 +607,10 @@ public OutputStream createFile(final Path path, final FsPermission permission, final FsPermission umask, TracingContext tracingContext) throws IOException { try (AbfsPerfInfo perfInfo = startTracking("createFile", "createPath")) { + AbfsClient createClient = getClientHandler().getIngressClient(); boolean isNamespaceEnabled = getIsNamespaceEnabled(tracingContext); LOG.debug("createFile filesystem: {} path: {} overwrite: {} permission: {} umask: {} isNamespaceEnabled: {}", - getClient().getFileSystem(), + createClient.getFileSystem(), path, overwrite, permission, @@ -632,9 +633,9 @@ public OutputStream createFile(final Path path, } final ContextEncryptionAdapter contextEncryptionAdapter; - if (getClient().getEncryptionType() == EncryptionType.ENCRYPTION_CONTEXT) { + if (createClient.getEncryptionType() == EncryptionType.ENCRYPTION_CONTEXT) { contextEncryptionAdapter = new ContextProviderEncryptionAdapter( - getClient().getEncryptionContextProvider(), getRelativePath(path)); + createClient.getEncryptionContextProvider(), getRelativePath(path)); } else { contextEncryptionAdapter = NoContextEncryptionAdapter.getInstance(); } @@ -649,27 +650,28 @@ public OutputStream createFile(final Path path, ); } else { - op = getClient().createPath(relativePath, true, + op = createClient.createPath(relativePath, true, overwrite, new Permissions(isNamespaceEnabled, permission, umask), isAppendBlob, null, contextEncryptionAdapter, - tracingContext); + tracingContext, isNamespaceEnabled); } perfInfo.registerResult(op.getResult()).registerSuccess(true); AbfsLease lease = maybeCreateLease(relativePath, tracingContext); - + String eTag = extractEtagHeader(op.getResult()); return new AbfsOutputStream( populateAbfsOutputStreamContext( isAppendBlob, lease, - getClient(), + getClientHandler(), statistics, relativePath, 0, + eTag, contextEncryptionAdapter, tracingContext)); } @@ -692,13 +694,13 @@ private AbfsRestOperation conditionalCreateOverwriteFile(final String relativePa final ContextEncryptionAdapter contextEncryptionAdapter, final TracingContext tracingContext) throws IOException { AbfsRestOperation op; - + AbfsClient createClient = getClientHandler().getIngressClient(); try { // Trigger a create with overwrite=false first so that eTag fetch can be // avoided for cases when no pre-existing file is present (major portion // of create file traffic falls into the case of no pre-existing file). - op = getClient().createPath(relativePath, true, false, permissions, - isAppendBlob, null, contextEncryptionAdapter, tracingContext); + op = createClient.createPath(relativePath, true, false, permissions, + isAppendBlob, null, contextEncryptionAdapter, tracingContext, getIsNamespaceEnabled(tracingContext)); } catch (AbfsRestOperationException e) { if (e.getStatusCode() == HttpURLConnection.HTTP_CONFLICT) { @@ -722,8 +724,8 @@ private AbfsRestOperation conditionalCreateOverwriteFile(final String relativePa try { // overwrite only if eTag matches with the file properties fetched befpre - op = getClient().createPath(relativePath, true, true, permissions, - isAppendBlob, eTag, contextEncryptionAdapter, tracingContext); + op = createClient.createPath(relativePath, true, true, permissions, + isAppendBlob, eTag, contextEncryptionAdapter, tracingContext, getIsNamespaceEnabled(tracingContext)); } catch (AbfsRestOperationException ex) { if (ex.getStatusCode() == HttpURLConnection.HTTP_PRECON_FAILED) { // Is a parallel access case, as file with eTag was just queried @@ -750,7 +752,7 @@ private AbfsRestOperation conditionalCreateOverwriteFile(final String relativePa * * @param isAppendBlob is Append blob support enabled? * @param lease instance of AbfsLease for this AbfsOutputStream. - * @param client AbfsClient. + * @param clientHandler AbfsClientHandler. * @param statistics FileSystem statistics. * @param path Path for AbfsOutputStream. * @param position Position or offset of the file being opened, set to 0 @@ -762,10 +764,11 @@ private AbfsRestOperation conditionalCreateOverwriteFile(final String relativePa private AbfsOutputStreamContext populateAbfsOutputStreamContext( boolean isAppendBlob, AbfsLease lease, - AbfsClient client, + AbfsClientHandler clientHandler, FileSystem.Statistics statistics, String path, long position, + String eTag, ContextEncryptionAdapter contextEncryptionAdapter, TracingContext tracingContext) { int bufferSize = abfsConfiguration.getWriteBufferSize(); @@ -786,7 +789,7 @@ private AbfsOutputStreamContext populateAbfsOutputStreamContext( .withEncryptionAdapter(contextEncryptionAdapter) .withBlockFactory(getBlockFactory()) .withBlockOutputActiveBlocks(blockOutputActiveBlocks) - .withClient(getClient()) + .withClientHandler(clientHandler) .withPosition(position) .withFsStatistics(statistics) .withPath(path) @@ -794,6 +797,9 @@ private AbfsOutputStreamContext populateAbfsOutputStreamContext( blockOutputActiveBlocks, true)) .withTracingContext(tracingContext) .withAbfsBackRef(fsBackRef) + .withIngressServiceType(abfsConfiguration.getIngressServiceType()) + .withDFSToBlobFallbackEnabled(abfsConfiguration.isDfsToBlobFallbackEnabled()) + .withETag(eTag) .build(); } @@ -801,9 +807,10 @@ public void createDirectory(final Path path, final FsPermission permission, final FsPermission umask, TracingContext tracingContext) throws IOException { try (AbfsPerfInfo perfInfo = startTracking("createDirectory", "createPath")) { + AbfsClient createClient = getClientHandler().getIngressClient(); boolean isNamespaceEnabled = getIsNamespaceEnabled(tracingContext); LOG.debug("createDirectory filesystem: {} path: {} permission: {} umask: {} isNamespaceEnabled: {}", - getClient().getFileSystem(), + createClient.getFileSystem(), path, permission, umask, @@ -813,8 +820,8 @@ public void createDirectory(final Path path, final FsPermission permission, !isNamespaceEnabled || abfsConfiguration.isEnabledMkdirOverwrite(); Permissions permissions = new Permissions(isNamespaceEnabled, permission, umask); - final AbfsRestOperation op = getClient().createPath(getRelativePath(path), - false, overwrite, permissions, false, null, null, tracingContext); + final AbfsRestOperation op = createClient.createPath(getRelativePath(path), + false, overwrite, permissions, false, null, null, tracingContext, isNamespaceEnabled); perfInfo.registerResult(op.getResult()).registerSuccess(true); } } @@ -949,6 +956,7 @@ public OutputStream openFileForWrite(final Path path, overwrite); String relativePath = getRelativePath(path); + AbfsClient writeClient = getClientHandler().getIngressClient(); final AbfsRestOperation op = getClient() .getPathStatus(relativePath, false, tracingContext, null); @@ -961,7 +969,7 @@ public OutputStream openFileForWrite(final Path path, throw new AbfsRestOperationException( AzureServiceErrorCode.PATH_NOT_FOUND.getStatusCode(), AzureServiceErrorCode.PATH_NOT_FOUND.getErrorCode(), - "openFileForRead must be used with files and not directories", + "openFileForWrite must be used with files and not directories", null); } @@ -975,8 +983,9 @@ public OutputStream openFileForWrite(final Path path, } AbfsLease lease = maybeCreateLease(relativePath, tracingContext); + final String eTag = extractEtagHeader(op.getResult()); final ContextEncryptionAdapter contextEncryptionAdapter; - if (getClient().getEncryptionType() == EncryptionType.ENCRYPTION_CONTEXT) { + if (writeClient.getEncryptionType() == EncryptionType.ENCRYPTION_CONTEXT) { final String encryptionContext = op.getResult() .getResponseHeader( HttpHeaderConfigurations.X_MS_ENCRYPTION_CONTEXT); @@ -985,20 +994,20 @@ public OutputStream openFileForWrite(final Path path, "File doesn't have encryptionContext."); } contextEncryptionAdapter = new ContextProviderEncryptionAdapter( - getClient().getEncryptionContextProvider(), getRelativePath(path), + writeClient.getEncryptionContextProvider(), getRelativePath(path), encryptionContext.getBytes(StandardCharsets.UTF_8)); } else { contextEncryptionAdapter = NoContextEncryptionAdapter.getInstance(); } - return new AbfsOutputStream( populateAbfsOutputStreamContext( isAppendBlob, lease, - getClient(), + getClientHandler(), statistics, relativePath, offset, + eTag, contextEncryptionAdapter, tracingContext)); } @@ -1442,7 +1451,7 @@ public void modifyAclEntries(final Path path, final List aclSpec, final AbfsRestOperation op = getClient() .getAclStatus(relativePath, useUpn, tracingContext); perfInfoGet.registerResult(op.getResult()); - final String eTag = op.getResult().getResponseHeader(HttpHeaderConfigurations.ETAG); + final String eTag = extractEtagHeader(op.getResult()); final Map aclEntries = AbfsAclHelper.deserializeAclSpec(op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_ACL)); @@ -1485,7 +1494,7 @@ public void removeAclEntries(final Path path, final List aclSpec, final AbfsRestOperation op = getClient() .getAclStatus(relativePath, isUpnFormat, tracingContext); perfInfoGet.registerResult(op.getResult()); - final String eTag = op.getResult().getResponseHeader(HttpHeaderConfigurations.ETAG); + final String eTag = extractEtagHeader(op.getResult()); final Map aclEntries = AbfsAclHelper.deserializeAclSpec(op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_ACL)); @@ -1523,7 +1532,7 @@ public void removeDefaultAcl(final Path path, TracingContext tracingContext) final AbfsRestOperation op = getClient() .getAclStatus(relativePath, tracingContext); perfInfoGet.registerResult(op.getResult()); - final String eTag = op.getResult().getResponseHeader(HttpHeaderConfigurations.ETAG); + final String eTag = extractEtagHeader(op.getResult()); final Map aclEntries = AbfsAclHelper.deserializeAclSpec(op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_ACL)); final Map defaultAclEntries = new HashMap<>(); @@ -1567,7 +1576,7 @@ public void removeAcl(final Path path, TracingContext tracingContext) final AbfsRestOperation op = getClient() .getAclStatus(relativePath, tracingContext); perfInfoGet.registerResult(op.getResult()); - final String eTag = op.getResult().getResponseHeader(HttpHeaderConfigurations.ETAG); + final String eTag = extractEtagHeader(op.getResult()); final Map aclEntries = AbfsAclHelper.deserializeAclSpec(op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_ACL)); final Map newAclEntries = new HashMap<>(); @@ -1613,7 +1622,7 @@ public void setAcl(final Path path, final List aclSpec, final AbfsRestOperation op = getClient() .getAclStatus(relativePath, isUpnFormat, tracingContext); perfInfoGet.registerResult(op.getResult()); - final String eTag = op.getResult().getResponseHeader(HttpHeaderConfigurations.ETAG); + final String eTag = extractEtagHeader(op.getResult()); final Map getAclEntries = AbfsAclHelper.deserializeAclSpec(op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_ACL)); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java index 0eb701fba5a38..af7193305e7ee 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java @@ -60,6 +60,7 @@ public final class AbfsHttpConstants { public static final String BLOCK_BLOB_TYPE = "BlockBlob"; public static final String BLOCK_TYPE_COMMITTED = "committed"; public static final String TOKEN_VERSION = "2"; + public static final String APPEND_BLOCK = "appendblock"; public static final String JAVA_VENDOR = "java.vendor"; public static final String JAVA_VERSION = "java.version"; @@ -164,7 +165,7 @@ public String toString() { } public static ApiVersion getCurrentVersion() { - return DEC_12_2019; + return AUG_03_2023; } } @@ -213,7 +214,11 @@ public static ApiVersion getCurrentVersion() { public static final String XML_TAG_BLOB_ERROR_MESSAGE_END_XML = ""; public static final String XML_TAG_COMMITTED_BLOCKS = "CommittedBlocks"; public static final String XML_TAG_BLOCK_NAME = "Block"; - + public static final String XML_VERSION = "\n"; + public static final String BLOCK_LIST_START_TAG = "\n"; + public static final String BLOCK_LIST_END_TAG = "\n"; + public static final String LATEST_BLOCK_FORMAT = "%s\n"; + public static final String PUT_BLOCK_LIST = "PutBlockList"; /** * List of configurations that are related to Customer-Provided-Keys. *
    diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java index 0b071351ef5aa..4f498146ba895 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java @@ -168,5 +168,7 @@ public final class FileSystemConfigurations { public static final int HUNDRED = 100; public static final long THOUSAND = 1000L; + public static final int BLOCK_ID_LENGTH = 60; + private FileSystemConfigurations() {} } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/InvalidIngressServiceException.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/InvalidIngressServiceException.java new file mode 100644 index 0000000000000..94171485d084b --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/InvalidIngressServiceException.java @@ -0,0 +1,35 @@ + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.contracts.exceptions; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +@InterfaceAudience.Public +@InterfaceStability.Evolving +public final class InvalidIngressServiceException + extends AbfsRestOperationException { + public InvalidIngressServiceException(final int statusCode, + final String errorCode, + final String errorMessage, + final Exception innerException) { + super(statusCode, errorCode, errorMessage, innerException); + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AppendRequestParameters.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AppendRequestParameters.java index 12c0b9e1473cc..26d4be7ed6bee 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AppendRequestParameters.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AppendRequestParameters.java @@ -130,4 +130,12 @@ public void setRetryDueToExpect(boolean retryDueToExpect) { public void setExpectHeaderEnabled(boolean expectHeaderEnabled) { isExpectHeaderEnabled = expectHeaderEnabled; } + + public void setBlockId(final String blockId) { + this.blockId = blockId; + } + + public void setEtag(final String eTag) { + this.eTag = eTag; + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AzureServiceErrorCode.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AzureServiceErrorCode.java index cadba8b4e1039..62ab1744aa060 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AzureServiceErrorCode.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AzureServiceErrorCode.java @@ -57,6 +57,8 @@ public enum AzureServiceErrorCode { ACCOUNT_REQUIRES_HTTPS("AccountRequiresHttps", HttpURLConnection.HTTP_BAD_REQUEST, null), MD5_MISMATCH("Md5Mismatch", HttpURLConnection.HTTP_BAD_REQUEST, "The MD5 value specified in the request did not match with the MD5 value calculated by the server."), + BLOB_OPERATION_NOT_SUPPORTED("BlobOperationNotSupported", HttpURLConnection.HTTP_CONFLICT, null), + INVALID_APPEND_OPERATION("InvalidAppendOperation", HttpURLConnection.HTTP_CONFLICT, null), UNKNOWN(null, -1, null); private final String errorCode; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/extensions/SASTokenProvider.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/extensions/SASTokenProvider.java index a2cd292b0b230..0af3130143119 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/extensions/SASTokenProvider.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/extensions/SASTokenProvider.java @@ -41,6 +41,7 @@ public interface SASTokenProvider { String GET_STATUS_OPERATION = "get-status"; String GET_PROPERTIES_OPERATION = "get-properties"; String LIST_OPERATION = "list"; + String LIST_BLOB_OPERATION = "list-blob"; String READ_OPERATION = "read"; String RENAME_SOURCE_OPERATION = "rename-source"; String RENAME_DESTINATION_OPERATION = "rename-destination"; @@ -49,6 +50,7 @@ public interface SASTokenProvider { String SET_PERMISSION_OPERATION = "set-permission"; String SET_PROPERTIES_OPERATION = "set-properties"; String WRITE_OPERATION = "write"; + String APPEND_BLOCK_OPERATION = "append-block"; /** * Initialize authorizer for Azure Blob File System. diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobBlock.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobBlock.java new file mode 100644 index 0000000000000..3086f03bb5415 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobBlock.java @@ -0,0 +1,66 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; + +import org.apache.commons.codec.binary.Base64; + +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.BLOCK_ID_LENGTH; + +public class AbfsBlobBlock extends AbfsBlock { + + private final String blockId; + + /** + * Gets the activeBlock and the blockId. + * + * @param outputStream AbfsOutputStream Instance. + * @param offset Used to generate blockId based on offset. + * @throws IOException exception is thrown. + */ + AbfsBlobBlock(AbfsOutputStream outputStream, long offset) throws IOException { + super(outputStream, offset); + this.blockId = generateBlockId(offset); + } + + /** + * Helper method that generates blockId. + * @param position The offset needed to generate blockId. + * @return String representing the block ID generated. + */ + private String generateBlockId(long position) { + String streamId = this.outputStream.getStreamID(); + String streamIdHash = Integer.toString(streamId.hashCode()); + String blockId = String.format("%d_%s", position, streamIdHash); + byte[] blockIdByteArray = new byte[BLOCK_ID_LENGTH]; + System.arraycopy(blockId.getBytes(), 0, blockIdByteArray, 0, Math.min(BLOCK_ID_LENGTH, blockId.length())); + return new String(Base64.encodeBase64(blockIdByteArray), StandardCharsets.UTF_8); + } + + /** + * Returns blockId for the block. + * @return blockId. + */ + public String getBlockId() { + return blockId; + } +} + diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java index a153e8b3a6d0b..8e0dbe69624b8 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java @@ -54,6 +54,7 @@ import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; import org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsInvalidChecksumException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidAbfsRestOperationException; @@ -71,12 +72,82 @@ import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import static java.net.HttpURLConnection.HTTP_NOT_FOUND; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.ACQUIRE_LEASE_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APPEND_BLOB_TYPE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APPEND_BLOCK; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APPLICATION_JSON; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APPLICATION_OCTET_STREAM; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APPLICATION_XML; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.BLOCK; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.BLOCKLIST; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.BLOCK_BLOB_TYPE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.BLOCK_TYPE_COMMITTED; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.BREAK_LEASE_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.COMMA; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CONTAINER; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.DEFAULT_LEASE_BREAK_PERIOD; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.FORWARD_SLASH; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.GET_ACCESS_CONTROL; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_DELETE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_GET; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_HEAD; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HUNDRED_CONTINUE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.LEASE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.LIST; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.METADATA; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.RELEASE_LEASE_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.RENEW_LEASE_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.ROOT_PATH; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.SINGLE_WHITE_SPACE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.STAR; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.TRUE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.XML_TAG_BLOB_ERROR_CODE_END_XML; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.XML_TAG_BLOB_ERROR_CODE_START_XML; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.XML_TAG_BLOB_ERROR_MESSAGE_END_XML; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.XML_TAG_BLOB_ERROR_MESSAGE_START_XML; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.XML_TAG_BLOCK_NAME; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.XML_TAG_COMMITTED_BLOCKS; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.XML_TAG_NAME; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.XMS_PROPERTIES_ENCODING_ASCII; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.XMS_PROPERTIES_ENCODING_UNICODE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.ZERO; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.ACCEPT; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.CONTENT_LENGTH; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.CONTENT_TYPE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.EXPECT; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.IF_MATCH; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.IF_NONE_MATCH; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.RANGE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.USER_AGENT; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_BLOB_TYPE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_COPY_SOURCE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_LEASE_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_LEASE_BREAK_PERIOD; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_LEASE_DURATION; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_LEASE_ID; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_METADATA_PREFIX; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_PROPOSED_LEASE_ID; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_META_HDI_ISFOLDER; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_RANGE_GET_CONTENT_MD5; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_SOURCE_LEASE_ID; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_BLOCKID; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_BLOCKLISTTYPE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_CLOSE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_COMP; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_DELIMITER; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_INCLUDE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_MARKER; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_MAX_RESULTS; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_PREFIX; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_RESTYPE; + import static java.net.HttpURLConnection.HTTP_CONFLICT; import static java.net.HttpURLConnection.HTTP_NOT_FOUND; import static java.net.HttpURLConnection.HTTP_OK; -import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.*; -import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.*; -import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.*; +import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.PATH_EXISTS; /** * AbfsClient interacting with Blob endpoint. @@ -244,10 +315,66 @@ public AbfsRestOperation createPath(final String path, final boolean isAppendBlob, final String eTag, final ContextEncryptionAdapter contextEncryptionAdapter, - final TracingContext tracingContext) throws AzureBlobFileSystemException { + final TracingContext tracingContext, final boolean isNamespaceEnabled) + throws AzureBlobFileSystemException { + return createPath(path, isFile, overwrite, permissions, isAppendBlob, eTag, + contextEncryptionAdapter, tracingContext, isNamespaceEnabled, false); + } + /** + * Get Rest Operation for API . + * Creates a file or directory(marker file) at specified path. + * @param path of the directory to be created. + * @param tracingContext + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + public AbfsRestOperation createPath(final String path, + final boolean isFile, + final boolean overwrite, + final AzureBlobFileSystemStore.Permissions permissions, + final boolean isAppendBlob, + final String eTag, + final ContextEncryptionAdapter contextEncryptionAdapter, + final TracingContext tracingContext, + final boolean isNamespaceEnabled, + boolean isCreateCalledFromMarkers) throws AzureBlobFileSystemException { final List requestHeaders = createDefaultHeaders(); + if (!isNamespaceEnabled && !isCreateCalledFromMarkers) { + Path parentPath = new Path(path).getParent(); + if (parentPath != null && !parentPath.isRoot()) { + createMarkers(parentPath, overwrite, permissions, isAppendBlob, eTag, + contextEncryptionAdapter, tracingContext, isNamespaceEnabled); + } + } + if (!isNamespaceEnabled && isFile) { + AbfsHttpOperation op1Result = null; + try { + op1Result = getPathStatus(path, tracingContext, + null, false).getResult(); + } catch (AbfsRestOperationException ex) { + if (ex.getStatusCode() == HTTP_NOT_FOUND) { + LOG.debug("No explicit directory/path found: {}", path); + } else { + throw ex; + } + } + if (op1Result != null && checkIsDir(op1Result)) { + throw new AbfsRestOperationException(HTTP_CONFLICT, + AzureServiceErrorCode.PATH_CONFLICT.getErrorCode(), + PATH_EXISTS, + null); + } + } + if (isFile) { + addEncryptionKeyRequestHeaders(path, requestHeaders, true, + contextEncryptionAdapter, tracingContext); + } requestHeaders.add(new AbfsHttpHeader(CONTENT_LENGTH, ZERO)); - requestHeaders.add(new AbfsHttpHeader(X_MS_BLOB_TYPE, BLOCK_BLOB_TYPE)); + if (isAppendBlob) { + requestHeaders.add(new AbfsHttpHeader(X_MS_BLOB_TYPE, APPEND_BLOB_TYPE)); + } else { + requestHeaders.add(new AbfsHttpHeader(X_MS_BLOB_TYPE, BLOCK_BLOB_TYPE)); + } if (!overwrite) { requestHeaders.add(new AbfsHttpHeader(IF_NONE_MATCH, AbfsHttpConstants.STAR)); } @@ -285,6 +412,136 @@ public AbfsRestOperation createPath(final String path, return op; } + /** + * Creates marker blobs for the parent directories of the specified path. + * + * @param path The path for which parent directories need to be created. + * @param overwrite A flag indicating whether existing directories should be overwritten. + * @param permissions The permissions to be set for the created directories. + * @param isAppendBlob A flag indicating whether the created blob should be of type APPEND_BLOB. + * @param eTag The eTag to be matched for conditional requests. + * @param contextEncryptionAdapter The encryption adapter for context encryption. + * @param tracingContext The tracing context for the operation. + * @throws AzureBlobFileSystemException If the creation of any parent directory fails. + */ + public void createMarkers(final Path path, + final boolean overwrite, + final AzureBlobFileSystemStore.Permissions permissions, + final boolean isAppendBlob, + final String eTag, + final ContextEncryptionAdapter contextEncryptionAdapter, + final TracingContext tracingContext, final boolean isNamespaceEnabled) throws AzureBlobFileSystemException { + ArrayList keysToCreateAsFolder = new ArrayList<>(); + checkParentChainForFile(path, tracingContext, + keysToCreateAsFolder); + for (Path pathToCreate : keysToCreateAsFolder) { + createPath(pathToCreate.toUri().getPath(), false, overwrite, permissions, + isAppendBlob, eTag, contextEncryptionAdapter, tracingContext, isNamespaceEnabled, true); + } + } + + /** + * Checks for the entire parent hierarchy and returns if any directory exists and + * throws an exception if any file exists. + * @param path path to check the hierarchy for. + * @param tracingContext the tracingcontext. + */ + private void checkParentChainForFile(Path path, TracingContext tracingContext, + List keysToCreateAsFolder) throws AzureBlobFileSystemException { + AbfsHttpOperation opResult = null; + try { + opResult = getPathStatus(path.toUri().getPath(), + tracingContext, null, false).getResult(); + } catch (AbfsRestOperationException ex) { + if (ex.getStatusCode() == HTTP_NOT_FOUND) { + LOG.debug("No explicit directory/path found: {}", path); + } else { + throw ex; + } + } + boolean isDirectory = opResult != null && checkIsDir(opResult); + if (opResult != null && !isDirectory) { + throw new AbfsRestOperationException(HTTP_CONFLICT, + AzureServiceErrorCode.PATH_CONFLICT.getErrorCode(), + PATH_EXISTS, + null); + } + if (isDirectory) { + return; + } + keysToCreateAsFolder.add(path); + Path current = path.getParent(); + while (current != null && !current.isRoot()) { + try { + opResult = getPathStatus(current.toUri().getPath(), + tracingContext, null, false).getResult(); + } catch (AbfsRestOperationException ex) { + if (ex.getStatusCode() == HTTP_NOT_FOUND) { + LOG.debug("No explicit directory/path found: {}", path); + } else { + throw ex; + } + } + isDirectory = opResult != null && checkIsDir(opResult); + if (opResult != null && !isDirectory) { + throw new AbfsRestOperationException(HTTP_CONFLICT, + AzureServiceErrorCode.PATH_CONFLICT.getErrorCode(), + PATH_EXISTS, + null); + } + if (isDirectory) { + return; + } + keysToCreateAsFolder.add(current); + current = current.getParent(); + } + } + + /** + * Appends a block to an append blob. + * API reference: + * + * @param path the path of the append blob. + * @param data the data to be appended. + * @param tracingContext the tracing context. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + public AbfsRestOperation appendBlock(final String path, + AppendRequestParameters requestParameters, + final byte[] data, + final TracingContext tracingContext) throws AzureBlobFileSystemException { + final List requestHeaders = createDefaultHeaders(); + requestHeaders.add(new AbfsHttpHeader(CONTENT_LENGTH, String.valueOf(data.length))); + requestHeaders.add(new AbfsHttpHeader(X_MS_BLOB_TYPE, APPEND_BLOB_TYPE)); + + final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_COMP, APPEND_BLOCK); + String sasTokenForReuse = appendSASTokenToQuery(path, SASTokenProvider.APPEND_BLOCK_OPERATION, abfsUriQueryBuilder); + + final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); + final AbfsRestOperation op = getAbfsRestOperation( + AbfsRestOperationType.AppendBlock, + HTTP_METHOD_PUT, + url, + requestHeaders, + data, + requestParameters.getoffset(), + requestParameters.getLength(), + sasTokenForReuse); + + try { + op.execute(tracingContext); + } catch (AzureBlobFileSystemException ex) { + // If we have no HTTP response, throw the original exception. + if (!op.hasResult()) { + throw ex; + } + throw ex; + } + return op; + } + /** * Get Rest Operation for API . * @param relativePath to return only blobs with names that begin with the specified prefix. @@ -319,7 +576,7 @@ public AbfsRestOperation listPath(final String relativePath, final boolean recur abfsUriQueryBuilder.addQuery(QUERY_PARAM_MARKER, continuation); } abfsUriQueryBuilder.addQuery(QUERY_PARAM_MAX_RESULTS, String.valueOf(listMaxResults)); - appendSASTokenToQuery(relativePath, SASTokenProvider.LIST_OPERATION, abfsUriQueryBuilder); + appendSASTokenToQuery(relativePath, SASTokenProvider.LIST_BLOB_OPERATION, abfsUriQueryBuilder); final URL url = createRequestUrl(abfsUriQueryBuilder.toString()); final AbfsRestOperation op = getAbfsRestOperation( @@ -517,6 +774,11 @@ public AbfsRestOperation read(final String path, requestHeaders.add(rangeHeader); requestHeaders.add(new AbfsHttpHeader(IF_MATCH, eTag)); + // Add request header to fetch MD5 Hash of data returned by server. + if (isChecksumValidationEnabled(requestHeaders, rangeHeader, bufferLength)) { + requestHeaders.add(new AbfsHttpHeader(X_MS_RANGE_GET_CONTENT_MD5, TRUE)); + } + final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); String sasTokenForReuse = appendSASTokenToQuery(path, SASTokenProvider.READ_OPERATION, abfsUriQueryBuilder, cachedSasToken); @@ -529,6 +791,11 @@ public AbfsRestOperation read(final String path, sasTokenForReuse); op.execute(tracingContext); + // Verify the MD5 hash returned by server holds valid on the data received. + if (isChecksumValidationEnabled(requestHeaders, rangeHeader, bufferLength)) { + verifyCheckSumForRead(buffer, op.getResult(), bufferOffset); + } + return op; } @@ -552,6 +819,8 @@ public AbfsRestOperation append(final String path, final ContextEncryptionAdapter contextEncryptionAdapter, final TracingContext tracingContext) throws AzureBlobFileSystemException { final List requestHeaders = createDefaultHeaders(); + addEncryptionKeyRequestHeaders(path, requestHeaders, false, + contextEncryptionAdapter, tracingContext); requestHeaders.add(new AbfsHttpHeader(CONTENT_LENGTH, String.valueOf(buffer.length))); requestHeaders.add(new AbfsHttpHeader(IF_MATCH, reqParams.getETag())); if (reqParams.getLeaseId() != null) { @@ -561,6 +830,10 @@ public AbfsRestOperation append(final String path, requestHeaders.add(new AbfsHttpHeader(EXPECT, HUNDRED_CONTINUE)); } + if (isChecksumValidationEnabled()) { + addCheckSumHeaderForWrite(requestHeaders, reqParams, buffer); + } + if (reqParams.isRetryDueToExpect()) { String userAgentRetry = userAgent; userAgentRetry = userAgentRetry.replace(HUNDRED_CONTINUE_USER_AGENT, EMPTY_STRING); @@ -584,7 +857,7 @@ public AbfsRestOperation append(final String path, try { op.execute(tracingContext); - } catch (AzureBlobFileSystemException e) { + } catch (AbfsRestOperationException e) { /* If the http response code indicates a user error we retry the same append request with expect header being disabled. @@ -594,7 +867,7 @@ public AbfsRestOperation append(final String path, if someone has taken dependency on the exception message, which is created using the error string present in the response header. */ - int responseStatusCode = ((AbfsRestOperationException) e).getStatusCode(); + int responseStatusCode = e.getStatusCode(); if (checkUserError(responseStatusCode) && reqParams.isExpectHeaderEnabled()) { LOG.debug("User error, retrying without 100 continue enabled for the given path {}", path); reqParams.setExpectHeaderEnabled(false); @@ -602,15 +875,29 @@ public AbfsRestOperation append(final String path, return this.append(path, buffer, reqParams, cachedSasToken, contextEncryptionAdapter, tracingContext); } - else { + // If we have no HTTP response, throw the original exception. + if (!op.hasResult()) { throw e; } + + if (isMd5ChecksumError(e)) { + throw new AbfsInvalidChecksumException(e); + } + + throw e; + } + + catch (AzureBlobFileSystemException e) { + // Any server side issue will be returned as AbfsRestOperationException and will be handled above. + LOG.debug("Append request failed with non server issues for path: {}, offset: {}, position: {}", + path, reqParams.getoffset(), reqParams.getPosition()); + throw e; } return op; } /** - * Redirect to flush specific to blob endpoint + * Redirect to flush specific to blob endpoint. */ @Override public AbfsRestOperation flush(final String path, @@ -621,7 +908,7 @@ public AbfsRestOperation flush(final String path, final String leaseId, final ContextEncryptionAdapter contextEncryptionAdapter, final TracingContext tracingContext) throws AzureBlobFileSystemException { - return this.flush(null, path, isClose, cachedSasToken, leaseId, null, + return this.flush(null, path, isClose, cachedSasToken, leaseId, null, contextEncryptionAdapter, tracingContext); } @@ -645,8 +932,11 @@ public AbfsRestOperation flush(byte[] buffer, final String cachedSasToken, final String leaseId, final String eTag, + ContextEncryptionAdapter contextEncryptionAdapter, final TracingContext tracingContext) throws AzureBlobFileSystemException { final List requestHeaders = createDefaultHeaders(); + addEncryptionKeyRequestHeaders(path, requestHeaders, false, + contextEncryptionAdapter, tracingContext); requestHeaders.add(new AbfsHttpHeader(CONTENT_LENGTH, String.valueOf(buffer.length))); requestHeaders.add(new AbfsHttpHeader(CONTENT_TYPE, APPLICATION_XML)); requestHeaders.add(new AbfsHttpHeader(IF_MATCH, eTag)); @@ -721,7 +1011,7 @@ public AbfsRestOperation getPathStatus(final String path, final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); final AbfsRestOperation op = getAbfsRestOperation( - AbfsRestOperationType.GetPathStatus, + AbfsRestOperationType.GetBlobProperties, HTTP_METHOD_HEAD, url, requestHeaders); try { op.execute(tracingContext); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlock.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlock.java new file mode 100644 index 0000000000000..a4499baca639b --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlock.java @@ -0,0 +1,123 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.Closeable; +import java.io.IOException; + +import org.apache.hadoop.fs.store.DataBlocks; + +/** + * Return activeBlock with blockId. + */ +public class AbfsBlock implements Closeable { + + private final DataBlocks.DataBlock activeBlock; + protected AbfsOutputStream outputStream; + private final long offset; + + /** + * Gets the activeBlock and the blockId. + * @param outputStream AbfsOutputStream Instance. + * @param offset Used to generate blockId based on offset. + * @throws IOException + */ + AbfsBlock(AbfsOutputStream outputStream, long offset) throws IOException { + this.outputStream = outputStream; + this.offset = offset; + DataBlocks.BlockFactory blockFactory = outputStream.getBlockManager().getBlockFactory(); + long blockCount = outputStream.getBlockManager().getBlockCount(); + int blockSize = outputStream.getBlockManager().getBlockSize(); + AbfsOutputStreamStatistics outputStreamStatistics = outputStream.getOutputStreamStatistics(); + this.activeBlock = blockFactory.create(blockCount, blockSize, outputStreamStatistics); + } + + /** + * Returns activeBlock. + * @return activeBlock. + */ + public DataBlocks.DataBlock getActiveBlock() { + return activeBlock; + } + + /** + * Returns datasize for the block. + * @return datasize. + */ + public int dataSize() { + return activeBlock.dataSize(); + } + + /** + * Return instance of BlockUploadData. + * @return instance of BlockUploadData. + * @throws IOException + */ + public DataBlocks.BlockUploadData startUpload() throws IOException { + return activeBlock.startUpload(); + } + + /** + * Return the block has data or not. + * @return block has data or not. + */ + public boolean hasData() { + return activeBlock.hasData(); + } + + /** + * Write a series of bytes from the buffer, from the offset. Returns the number of bytes written. + * Only valid in the state Writing. Base class verifies the state but does no writing. + * @param buffer buffer. + * @param offset offset. + * @param length length. + * @return number of bytes written. + * @throws IOException + */ + public int write(byte[] buffer, int offset, int length) throws IOException { + return activeBlock.write(buffer, offset, length); + } + + /** + * Returns remainingCapacity. + * @return remainingCapacity. + */ + public int remainingCapacity() { + return activeBlock.remainingCapacity(); + } + + public Long getOffset() { + return offset; + } + + @Override + public void close() throws IOException { + if (activeBlock != null) { + activeBlock.close(); + } + } + + /** + * Returns blockId for the block. + * @return blockId. + */ + public String getBlockId() { + throw new IllegalArgumentException("DFS client does not support blockId"); + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlockStatus.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlockStatus.java new file mode 100644 index 0000000000000..dc04df33fb71a --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlockStatus.java @@ -0,0 +1,25 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +public enum AbfsBlockStatus { + NEW, + SUCCESS, + FAILED +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index 2e968a7fe1f71..38d9c68b639f2 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -43,6 +43,8 @@ import java.util.concurrent.atomic.AtomicBoolean; import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore; import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsInvalidChecksumException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsDriverException; @@ -107,6 +109,7 @@ public abstract class AbfsClient implements Closeable { private final URL baseUrl; private final SharedKeyCredentials sharedKeyCredentials; + // TODO: Abstract for blob and dfs for CPK in OSS PR protected ApiVersion xMsVersion = ApiVersion.getCurrentVersion(); private final ExponentialRetryPolicy exponentialRetryPolicy; private final StaticRetryPolicy staticRetryPolicy; @@ -164,7 +167,6 @@ private AbfsClient(final URL baseUrl, if (encryptionContextProvider != null) { this.encryptionContextProvider = encryptionContextProvider; - xMsVersion = ApiVersion.APR_10_2021; // will be default once server change deployed encryptionType = EncryptionType.ENCRYPTION_CONTEXT; } else if (abfsConfiguration.getEncodedClientProvidedEncryptionKey() != null) { clientProvidedEncryptionKey = @@ -447,7 +449,7 @@ public abstract AbfsRestOperation createPath(final String path, final boolean isAppendBlob, final String eTag, final ContextEncryptionAdapter contextEncryptionAdapter, - final TracingContext tracingContext) throws AzureBlobFileSystemException; + final TracingContext tracingContext, boolean isNamespaceEnabled) throws AzureBlobFileSystemException; public abstract AbfsRestOperation acquireLease(final String path, final int duration, @@ -639,6 +641,7 @@ public abstract AbfsRestOperation flush(byte[] buffer, final String cachedSasToken, final String leaseId, final String eTag, + ContextEncryptionAdapter contextEncryptionAdapter, final TracingContext tracingContext) throws AzureBlobFileSystemException; public abstract AbfsRestOperation setPathProperties(final String path, diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java index 0eac4dee10e4c..c0b0ca3cd22eb 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java @@ -40,6 +40,7 @@ import com.fasterxml.jackson.core.JsonToken; import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore; import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; @@ -281,7 +282,7 @@ public AbfsRestOperation createPath(final String path, final boolean isAppendBlob, final String eTag, final ContextEncryptionAdapter contextEncryptionAdapter, - final TracingContext tracingContext) + final TracingContext tracingContext, final boolean isNamespaceEnabled) throws AzureBlobFileSystemException { final List requestHeaders = createDefaultHeaders(); if (isFile) { @@ -341,6 +342,7 @@ public AbfsRestOperation createPath(final String path, return op; } + /** * Get Rest Operation for API . * Acquire lease on specified path. @@ -744,6 +746,7 @@ public AbfsRestOperation flush(byte[] buffer, final String cachedSasToken, final String leaseId, final String eTag, + ContextEncryptionAdapter contextEncryptionAdapter, final TracingContext tracingContext) throws AzureBlobFileSystemException { throw new UnsupportedOperationException( "flush with blockIds not supported on DFS Endpoint"); @@ -1145,7 +1148,8 @@ public String getContinuationFromResponse(AbfsHttpOperation result) { @Override public boolean checkUserError(int responseStatusCode) { return (responseStatusCode >= HttpURLConnection.HTTP_BAD_REQUEST - && responseStatusCode < HttpURLConnection.HTTP_INTERNAL_ERROR); + && responseStatusCode < HttpURLConnection.HTTP_INTERNAL_ERROR + && responseStatusCode != HttpURLConnection.HTTP_CONFLICT); } /** diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsErrors.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsErrors.java index 86285e08f2ce3..e78d2a5702b49 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsErrors.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsErrors.java @@ -29,10 +29,12 @@ @InterfaceStability.Evolving public final class AbfsErrors { public static final String ERR_WRITE_WITHOUT_LEASE = "Attempted to write to file without lease"; - public static final String ERR_LEASE_EXPIRED = "A lease ID was specified, but the lease for the" - + " resource has expired"; + public static final String ERR_LEASE_EXPIRED = "A lease ID was specified, but the lease for the resource has expired."; + public static final String ERR_LEASE_EXPIRED_BLOB = "A lease ID was specified, but the lease for the blob has expired."; public static final String ERR_NO_LEASE_ID_SPECIFIED = "There is currently a lease on the " + "resource and no lease ID was specified in the request"; + public static final String ERR_NO_LEASE_ID_SPECIFIED_BLOB = "There is currently a lease on the " + + "blob and no lease ID was specified in the request"; public static final String ERR_PARALLEL_ACCESS_DETECTED = "Parallel access to the create path " + "detected. Failing request to honor single writer semantics"; public static final String ERR_ACQUIRING_LEASE = "Unable to acquire lease"; @@ -49,5 +51,12 @@ public final class AbfsErrors { public static final String ERR_NO_LEASE_THREADS = "Lease desired but no lease threads " + "configured, set " + FS_AZURE_LEASE_THREADS; public static final String ERR_CREATE_ON_ROOT = "Cannot create file over root path"; + public static final String PATH_EXISTS = "The specified path, or an element of the path, " + + "exists and its resource type is invalid for this operation."; + public static final String BLOB_OPERATION_NOT_SUPPORTED = "Blob operation is not supported."; + public static final String INVALID_APPEND_OPERATION = "The resource was created or modified by the Azure Blob Service API " + + "and cannot be appended to by the Azure Data Lake Storage Service API"; + public static final String CONDITION_NOT_MET = "The condition specified using " + + "HTTP conditional header(s) is not met."; private AbfsErrors() {} } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java index 4a509a8184b1e..769d849fe93a6 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java @@ -568,6 +568,10 @@ private void parseBlockListResponse(final InputStream stream) throws IOException blockIdList = client.parseBlockListResponse(stream); } + public List getBlockIdList() { + return blockIdList; + } + /** * Check null stream, this is to pass findbugs's redundant check for NULL * @param stream InputStream @@ -615,6 +619,7 @@ Integer getConnResponseCode() throws IOException { * @return output stream. * @throws IOException */ + @VisibleForTesting OutputStream getConnOutputStream() throws IOException { return connection.getOutputStream(); } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java index 092ebf4506b51..2f65f8ea62d00 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java @@ -25,7 +25,12 @@ import java.util.concurrent.ConcurrentLinkedDeque; import java.util.concurrent.Future; import java.util.UUID; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; +import org.apache.hadoop.fs.azurebfs.constants.AbfsServiceType; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidIngressServiceException; import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.fs.impl.BackReference; @@ -68,7 +73,7 @@ public class AbfsOutputStream extends OutputStream implements Syncable, StreamCapabilities, IOStatisticsSource { - private final AbfsClient client; + private AbfsClient client; private final String path; /** The position in the file being uploaded, where the next block would be * uploaded. @@ -117,34 +122,43 @@ public class AbfsOutputStream extends OutputStream implements Syncable, /** Factory for blocks. */ private final DataBlocks.BlockFactory blockFactory; - /** Current data block. Null means none currently active. */ - private DataBlocks.DataBlock activeBlock; - - /** Count of blocks uploaded. */ - private long blockCount = 0; - - /** The size of a single block. */ - private final int blockSize; - /** Executor service to carry out the parallel upload requests. */ private final ListeningExecutorService executorService; + /** The etag of the blob. */ + private String eTag; + /** ABFS instance to be held by the output stream to avoid GC close. */ private final BackReference fsBackRef; + /** The service type at initialization. */ + private final AbfsServiceType serviceTypeAtInit; + + /** Indicates whether DFS to Blob fallback is enabled. */ + private final boolean isDFSToBlobFallbackEnabled; + + /** The current executing service type. */ + private AbfsServiceType currentExecutingServiceType; + + /** The handler for managing Azure ingress, marked as volatile to ensure visibility across threads. */ + private volatile AzureIngressHandler ingressHandler; + + /** The handler for managing Abfs client operations. */ + private final AbfsClientHandler clientHandler; + public AbfsOutputStream(AbfsOutputStreamContext abfsOutputStreamContext) throws IOException { - this.client = abfsOutputStreamContext.getClient(); this.statistics = abfsOutputStreamContext.getStatistics(); this.path = abfsOutputStreamContext.getPath(); this.position = abfsOutputStreamContext.getPosition(); this.closed = false; this.supportFlush = abfsOutputStreamContext.isEnableFlush(); - this.isExpectHeaderEnabled = abfsOutputStreamContext.isExpectHeaderEnabled(); + this.isExpectHeaderEnabled + = abfsOutputStreamContext.isExpectHeaderEnabled(); this.disableOutputStreamFlush = abfsOutputStreamContext - .isDisableOutputStreamFlush(); + .isDisableOutputStreamFlush(); this.enableSmallWriteOptimization - = abfsOutputStreamContext.isEnableSmallWriteOptimization(); + = abfsOutputStreamContext.isSmallWriteSupported(); this.isAppendBlob = abfsOutputStreamContext.isAppendBlob(); this.lastError = null; this.lastFlushOffset = 0; @@ -154,7 +168,9 @@ public AbfsOutputStream(AbfsOutputStreamContext abfsOutputStreamContext) this.writeOperations = new ConcurrentLinkedDeque<>(); this.outputStreamStatistics = abfsOutputStreamContext.getStreamStatistics(); this.fsBackRef = abfsOutputStreamContext.getFsBackRef(); - this.contextEncryptionAdapter = abfsOutputStreamContext.getEncryptionAdapter(); + this.contextEncryptionAdapter + = abfsOutputStreamContext.getEncryptionAdapter(); + this.eTag = abfsOutputStreamContext.getETag(); if (this.isAppendBlob) { this.maxConcurrentRequestCount = 1; @@ -168,25 +184,171 @@ public AbfsOutputStream(AbfsOutputStreamContext abfsOutputStreamContext) this.lease = abfsOutputStreamContext.getLease(); this.leaseId = abfsOutputStreamContext.getLeaseId(); this.executorService = - MoreExecutors.listeningDecorator(abfsOutputStreamContext.getExecutorService()); + MoreExecutors.listeningDecorator( + abfsOutputStreamContext.getExecutorService()); this.cachedSasToken = new CachedSASToken( abfsOutputStreamContext.getSasTokenRenewPeriodForStreamsInSeconds()); this.outputStreamId = createOutputStreamId(); - this.tracingContext = new TracingContext(abfsOutputStreamContext.getTracingContext()); + this.tracingContext = new TracingContext( + abfsOutputStreamContext.getTracingContext()); this.tracingContext.setStreamID(outputStreamId); this.tracingContext.setOperation(FSOperationType.WRITE); this.ioStatistics = outputStreamStatistics.getIOStatistics(); this.blockFactory = abfsOutputStreamContext.getBlockFactory(); - this.blockSize = bufferSize; + this.isDFSToBlobFallbackEnabled + = abfsOutputStreamContext.isDFSToBlobFallbackEnabled(); // create that first block. This guarantees that an open + close sequence // writes a 0-byte entry. - createBlockIfNeeded(); + this.serviceTypeAtInit = this.currentExecutingServiceType = + abfsOutputStreamContext.getIngressServiceType(); + this.clientHandler = abfsOutputStreamContext.getClientHandler(); + createIngressHandler(serviceTypeAtInit, + abfsOutputStreamContext.getBlockFactory(), bufferSize, false, null); + createBlockIfNeeded(position); + } + + /** + * Retrieves the current ingress handler. + * + * @return the current {@link AzureIngressHandler}. + */ + public AzureIngressHandler getIngressHandler() { + return ingressHandler; + } + + private final Lock lock = new ReentrantLock(); + + private volatile boolean switchCompleted = false; + + /** + * Creates an ingress handler based on the provided service type and other parameters. + * + * @param serviceType the type of service, either BLOB or DFS. + * @param blockFactory the factory to create data blocks. + * @param bufferSize the buffer size for the ingress handler. + * @return the created {@link AzureIngressHandler}. + * @throws IOException if there is an error creating the ingress handler. + */ + private AzureIngressHandler createIngressHandler(AbfsServiceType serviceType, + DataBlocks.BlockFactory blockFactory, + int bufferSize, boolean isSwitch, AzureBlockManager blockManager) throws IOException { + lock.lock(); + try { + this.client = clientHandler.getClient(serviceType); + if (switchCompleted && ingressHandler != null) { + return ingressHandler; // Return the existing ingress handler + } + if (isDFSToBlobFallbackEnabled + && serviceTypeAtInit == AbfsServiceType.BLOB) { + throw new InvalidConfigurationValueException( + "The ingress service type must be configured as DFS"); + } + if (isDFSToBlobFallbackEnabled && !isSwitch) { + ingressHandler = new AzureDfsToBlobIngressFallbackHandler(this, + blockFactory, + bufferSize, eTag, clientHandler); + } else if (serviceType == AbfsServiceType.BLOB) { + ingressHandler = new AzureBlobIngressHandler(this, blockFactory, + bufferSize, eTag, clientHandler, blockManager); + } else { + ingressHandler = new AzureDFSIngressHandler(this, blockFactory, + bufferSize, eTag, clientHandler); + } + if (isSwitch) { + switchCompleted = true; + } + return ingressHandler; + } finally { + lock.unlock(); + } } + /** + * Switches the current ingress handler and service type if necessary. + * + * @throws IOException if there is an error creating the new ingress handler. + */ + protected void switchHandler() throws IOException { + if (serviceTypeAtInit != currentExecutingServiceType) { + return; + } + if (serviceTypeAtInit == AbfsServiceType.BLOB) { + currentExecutingServiceType = AbfsServiceType.DFS; + } else { + currentExecutingServiceType = AbfsServiceType.BLOB; + } + ingressHandler = createIngressHandler(currentExecutingServiceType, + blockFactory, bufferSize, true, getBlockManager()); + } + + /** + * Buffers data in the given block. + * + * @param block the block to buffer data into. + * @param data the data to buffer. + * @param off the offset in the data array. + * @param length the length of data to buffer. + * @return the number of bytes buffered. + * @throws IOException if there is an error buffering the data. + */ + private int bufferData(AbfsBlock block, + final byte[] data, + final int off, + final int length) + throws IOException { + return getIngressHandler().bufferData(block, data, off, length); + } + + /** + * Performs a remote write operation. + * + * @param blockToUpload the block to upload. + * @param uploadData the data to upload. + * @param reqParams the parameters for the append request. + * @param tracingContext the tracing context for the operation. + * @return the result of the remote write operation. + * @throws IOException if there is an error during the remote write. + */ + private AbfsRestOperation remoteWrite(AbfsBlock blockToUpload, + DataBlocks.BlockUploadData uploadData, + AppendRequestParameters reqParams, + TracingContext tracingContext) + throws IOException { + return getIngressHandler().remoteWrite(blockToUpload, uploadData, reqParams, + tracingContext); + } + + /** + * Flushes data remotely. + * + * @param offset the offset to flush. + * @param retainUncommitedData whether to retain uncommitted data. + * @param isClose whether this is a close operation. + * @param leaseId the lease ID for the operation. + * @param tracingContext the tracing context for the operation. + * @return the result of the remote flush operation. + * @throws IOException if there is an error during the remote flush. + */ + private AbfsRestOperation remoteFlush(final long offset, + final boolean retainUncommitedData, + final boolean isClose, + final String leaseId, + TracingContext tracingContext) + throws IOException { + return getIngressHandler().remoteFlush(offset, retainUncommitedData, + isClose, leaseId, tracingContext); + } + + /** + * Creates a new output stream ID. + * + * @return the newly created output stream ID. + */ private String createOutputStreamId() { return StringUtils.right(UUID.randomUUID().toString(), STREAM_ID_LEN); } + /** * Query the stream for a specific capability. * @@ -224,8 +386,14 @@ public void write(final int byteVal) throws IOException { * thrown if the output stream has been closed. */ @Override - public synchronized void write(final byte[] data, final int off, final int length) + public synchronized void write(final byte[] data, + final int off, + final int length) throws IOException { + if (closed) { + throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); + } + // validate if data is not null and index out of bounds. DataBlocks.validateWriteArgs(data, off, length); maybeThrowLastError(); @@ -237,8 +405,13 @@ public synchronized void write(final byte[] data, final int off, final int lengt if (hasLease() && isLeaseFreed()) { throw new PathIOException(path, ERR_WRITE_WITHOUT_LEASE); } - DataBlocks.DataBlock block = createBlockIfNeeded(); - int written = block.write(data, off, length); + + if (length == 0) { + return; + } + + AbfsBlock block = createBlockIfNeeded(position); + int written = bufferData(block, data, off, length); int remainingCapacity = block.remainingCapacity(); if (written < length) { @@ -264,14 +437,9 @@ public synchronized void write(final byte[] data, final int off, final int lengt * @return the active block; null if there isn't one. * @throws IOException on any failure to create */ - private synchronized DataBlocks.DataBlock createBlockIfNeeded() + private synchronized AbfsBlock createBlockIfNeeded(long position) throws IOException { - if (activeBlock == null) { - blockCount++; - activeBlock = blockFactory - .create(blockCount, this.blockSize, outputStreamStatistics); - } - return activeBlock; + return getBlockManager().createBlock(position); } /** @@ -281,13 +449,15 @@ private synchronized DataBlocks.DataBlock createBlockIfNeeded() * initializing the upload, or if a previous operation has failed. */ private synchronized void uploadCurrentBlock() throws IOException { - checkState(hasActiveBlock(), "No active block"); - LOG.debug("Writing block # {}", blockCount); + checkState(getBlockManager().hasActiveBlock(), + "No active block"); + LOG.debug("Writing block # {}", getBlockManager().getBlockCount()); try { - uploadBlockAsync(getActiveBlock(), false, false); + uploadBlockAsync(getBlockManager().getActiveBlock(), + false, false); } finally { // set the block to null, so the next write will create a new block. - clearActiveBlock(); + getBlockManager().clearActiveBlock(); } } @@ -298,11 +468,11 @@ private synchronized void uploadCurrentBlock() throws IOException { * @param blockToUpload block to upload. * @throws IOException upload failure */ - private void uploadBlockAsync(DataBlocks.DataBlock blockToUpload, + private void uploadBlockAsync(AbfsBlock blockToUpload, boolean isFlush, boolean isClose) throws IOException { if (this.isAppendBlob) { - writeAppendBlobCurrentBufferToService(); + getIngressHandler().writeAppendBlobCurrentBufferToService(); return; } if (!blockToUpload.hasData()) { @@ -337,10 +507,19 @@ private void uploadBlockAsync(DataBlocks.DataBlock blockToUpload, * leaseId - The AbfsLeaseId for this request. */ AppendRequestParameters reqParams = new AppendRequestParameters( - offset, 0, bytesLength, mode, false, leaseId, isExpectHeaderEnabled); - AbfsRestOperation op = getClient().append(path, - blockUploadData.toByteArray(), reqParams, cachedSasToken.get(), - contextEncryptionAdapter, new TracingContext(tracingContext)); + offset, 0, bytesLength, mode, false, leaseId, + isExpectHeaderEnabled); + AbfsRestOperation op; + try { + op = remoteWrite(blockToUpload, blockUploadData, reqParams, + tracingContext); + } catch (InvalidIngressServiceException ex) { + switchHandler(); + // retry the operation with switched handler. + op = remoteWrite(blockToUpload, blockUploadData, + reqParams, tracingContext); + } + cachedSasToken.update(op.getSasToken()); perfInfo.registerResult(op.getResult()); perfInfo.registerSuccess(true); @@ -361,7 +540,7 @@ private void uploadBlockAsync(DataBlocks.DataBlock blockToUpload, * @param ex Exception caught. * @throws IOException Throws the lastError. */ - private void failureWhileSubmit(Exception ex) throws IOException { + void failureWhileSubmit(Exception ex) throws IOException { if (ex instanceof AbfsRestOperationException) { if (((AbfsRestOperationException) ex).getStatusCode() == HttpURLConnection.HTTP_NOT_FOUND) { @@ -377,42 +556,14 @@ private void failureWhileSubmit(Exception ex) throws IOException { } /** - * Synchronized accessor to the active block. - * - * @return the active block; null if there isn't one. - */ - private synchronized DataBlocks.DataBlock getActiveBlock() { - return activeBlock; - } - - /** - * Predicate to query whether or not there is an active block. - * - * @return true if there is an active block. - */ - private synchronized boolean hasActiveBlock() { - return activeBlock != null; - } - - /** - * Is there an active block and is there any data in it to upload? + * Is there an active block and is there any data in it to upload. * * @return true if there is some data to upload in an active block else false. */ - private boolean hasActiveBlockDataToUpload() { - return hasActiveBlock() && getActiveBlock().hasData(); - } - - /** - * Clear the active block. - */ - private void clearActiveBlock() { - if (activeBlock != null) { - LOG.debug("Clearing active block"); - } - synchronized (this) { - activeBlock = null; - } + boolean hasActiveBlockDataToUpload() { + AzureBlockManager blockManager = getBlockManager(); + AbfsBlock activeBlock = blockManager.getActiveBlock(); + return blockManager.hasActiveBlock() && activeBlock.hasData(); } /** @@ -472,15 +623,26 @@ public void hflush() throws IOException { } } + /** + * Retrieves the stream ID associated with this output stream. + * + * @return the stream ID of this output stream. + */ public String getStreamID() { return outputStreamId; } + /** + * Registers a listener for this output stream. + * + * @param listener1 the listener to register. + */ public void registerListener(Listener listener1) { listener = listener1; tracingContext.setListener(listener); } + /** * Force all data in the output stream to be written to Azure storage. * Wait to return until this is complete. Close the access to the stream and @@ -522,21 +684,31 @@ public synchronized void close() throws IOException { bufferIndex = 0; closed = true; writeOperations.clear(); - if (hasActiveBlock()) { - clearActiveBlock(); - } + getBlockManager().clearActiveBlock(); } LOG.debug("Closing AbfsOutputStream : {}", this); } + /** + * Flushes the buffered data to the Azure Blob Storage service. + * This method checks if a small write optimization can be applied, and if so, delegates + * the flush operation to {@link #smallWriteOptimizedflushInternal(boolean)}. + * Otherwise, it uploads the active block synchronously, flushes the written bytes to + * the service, and resets the number of appends to the server since the last flush. + * + * @param isClose indicates whether this flush operation is part of a close operation. + * @throws IOException if an I/O error occurs during the flush operation. + */ private synchronized void flushInternal(boolean isClose) throws IOException { maybeThrowLastError(); // if its a flush post write < buffersize, send flush parameter in append if (!isAppendBlob && enableSmallWriteOptimization - && (numOfAppendsToServerSinceLastFlush == 0) // there are no ongoing store writes - && (writeOperations.size() == 0) // double checking no appends in progress + && (numOfAppendsToServerSinceLastFlush == 0) + // there are no ongoing store writes + && (writeOperations.size() == 0) + // double checking no appends in progress && hasActiveBlockDataToUpload()) { // there is // some data that is pending to be written smallWriteOptimizedflushInternal(isClose); @@ -550,17 +722,37 @@ && hasActiveBlockDataToUpload()) { // there is numOfAppendsToServerSinceLastFlush = 0; } - private synchronized void smallWriteOptimizedflushInternal(boolean isClose) throws IOException { + + /** + * Flushes the buffered data to the Azure Blob Storage service with small write optimization. + * This method uploads the active block asynchronously, waits for appends to complete, shrinks + * the write operation queue, checks for any previous errors, and resets the number of appends + * to the server since the last flush. + * + * @param isClose indicates whether this flush operation is part of a close operation. + * @throws IOException if an I/O error occurs during the flush operation. + */ + private synchronized void smallWriteOptimizedflushInternal(boolean isClose) + throws IOException { // writeCurrentBufferToService will increment numOfAppendsToServerSinceLastFlush - uploadBlockAsync(getActiveBlock(), true, isClose); + uploadBlockAsync(getBlockManager().getActiveBlock(), + true, isClose); waitForAppendsToComplete(); shrinkWriteOperationQueue(); maybeThrowLastError(); numOfAppendsToServerSinceLastFlush = 0; } + /** + * Asynchronously flushes the buffered data to the Azure Blob Storage service. + * This method checks for any previous errors, uploads the current block if needed, + * waits for appends to complete, and then performs an async flush operation. + * + * @throws IOException if an I/O error occurs during the flush operation. + */ private synchronized void flushInternalAsync() throws IOException { maybeThrowLastError(); + // Upload the current block if there is active block data if (hasActiveBlockDataToUpload()) { uploadCurrentBlock(); } @@ -569,59 +761,25 @@ private synchronized void flushInternalAsync() throws IOException { } /** - * Appending the current active data block to service. Clearing the active - * data block and releasing all buffered data. - * @throws IOException if there is any failure while starting an upload for - * the dataBlock or while closing the BlockUploadData. + * Waits for all write operations (appends) to complete. + * This method iterates through the list of write operations and waits for their tasks + * to finish. If an error occurs during the operation, it is handled appropriately. + * + * @throws IOException if an I/O error occurs while waiting for appends to complete. */ - private void writeAppendBlobCurrentBufferToService() throws IOException { - DataBlocks.DataBlock activeBlock = getActiveBlock(); - // No data, return. - if (!hasActiveBlockDataToUpload()) { - return; - } - - final int bytesLength = activeBlock.dataSize(); - DataBlocks.BlockUploadData uploadData = activeBlock.startUpload(); - clearActiveBlock(); - outputStreamStatistics.writeCurrentBuffer(); - outputStreamStatistics.bytesToUpload(bytesLength); - final long offset = position; - position += bytesLength; - AbfsPerfTracker tracker = client.getAbfsPerfTracker(); - try (AbfsPerfInfo perfInfo = new AbfsPerfInfo(tracker, - "writeCurrentBufferToService", "append")) { - AppendRequestParameters reqParams = new AppendRequestParameters(offset, 0, - bytesLength, APPEND_MODE, true, leaseId, isExpectHeaderEnabled); - AbfsRestOperation op = getClient().append(path, uploadData.toByteArray(), - reqParams, cachedSasToken.get(), contextEncryptionAdapter, - new TracingContext(tracingContext)); - cachedSasToken.update(op.getSasToken()); - outputStreamStatistics.uploadSuccessful(bytesLength); - - perfInfo.registerResult(op.getResult()); - perfInfo.registerSuccess(true); - return; - } catch (Exception ex) { - outputStreamStatistics.uploadFailed(bytesLength); - failureWhileSubmit(ex); - } finally { - IOUtils.close(uploadData, activeBlock); - } - } - private synchronized void waitForAppendsToComplete() throws IOException { for (WriteOperation writeOperation : writeOperations) { try { + // Wait for the write operation task to complete writeOperation.task.get(); } catch (Exception ex) { outputStreamStatistics.uploadFailed(writeOperation.length); if (ex.getCause() instanceof AbfsRestOperationException) { - if (((AbfsRestOperationException) ex.getCause()).getStatusCode() == HttpURLConnection.HTTP_NOT_FOUND) { + if (((AbfsRestOperationException) ex.getCause()).getStatusCode() + == HttpURLConnection.HTTP_NOT_FOUND) { throw new FileNotFoundException(ex.getMessage()); } } - if (ex.getCause() instanceof AzureBlobFileSystemException) { ex = (AzureBlobFileSystemException) ex.getCause(); } @@ -631,46 +789,40 @@ private synchronized void waitForAppendsToComplete() throws IOException { } } - private synchronized void flushWrittenBytesToService(boolean isClose) throws IOException { + /** + * Flushes the written bytes to the Azure Blob Storage service, ensuring all + * appends are completed. This method is typically called during a close operation. + * + * @param isClose indicates whether this flush is happening as part of a close operation. + * @throws IOException if an I/O error occurs during the flush operation. + */ + private synchronized void flushWrittenBytesToService(boolean isClose) + throws IOException { + // Ensure all appends are completed before flushing waitForAppendsToComplete(); + // Flush the written bytes to the service flushWrittenBytesToServiceInternal(position, false, isClose); } - private synchronized void flushWrittenBytesToServiceAsync() throws IOException { + /** + * Asynchronously flushes the written bytes to the Azure Blob Storage service. + * This method ensures that the write operation queue is managed and only flushes + * if there are uncommitted data beyond the last flush offset. + * + * @throws IOException if an I/O error occurs during the flush operation. + */ + private synchronized void flushWrittenBytesToServiceAsync() + throws IOException { + // Manage the write operation queue to ensure efficient writes shrinkWriteOperationQueue(); + // Only flush if there are uncommitted data beyond the last flush offset if (this.lastTotalAppendOffset > this.lastFlushOffset) { this.flushWrittenBytesToServiceInternal(this.lastTotalAppendOffset, true, - false/*Async flush on close not permitted*/); + false /*Async flush on close not permitted*/); } } - private synchronized void flushWrittenBytesToServiceInternal(final long offset, - final boolean retainUncommitedData, final boolean isClose) throws IOException { - // flush is called for appendblob only on close - if (this.isAppendBlob && !isClose) { - return; - } - - AbfsPerfTracker tracker = client.getAbfsPerfTracker(); - try (AbfsPerfInfo perfInfo = new AbfsPerfInfo(tracker, - "flushWrittenBytesToServiceInternal", "flush")) { - AbfsRestOperation op = getClient().flush(path, offset, retainUncommitedData, - isClose, cachedSasToken.get(), leaseId, contextEncryptionAdapter, - new TracingContext(tracingContext)); - cachedSasToken.update(op.getSasToken()); - perfInfo.registerResult(op.getResult()).registerSuccess(true); - } catch (AzureBlobFileSystemException ex) { - if (ex instanceof AbfsRestOperationException) { - if (((AbfsRestOperationException) ex).getStatusCode() == HttpURLConnection.HTTP_NOT_FOUND) { - throw new FileNotFoundException(ex.getMessage()); - } - } - lastError = new IOException(ex); - throw lastError; - } - this.lastFlushOffset = offset; - } /** * Try to remove the completed write operations from the beginning of write @@ -697,12 +849,76 @@ private synchronized void shrinkWriteOperationQueue() throws IOException { } } - private static class WriteOperation { - private final Future task; - private final long startOffset; - private final long length; + /** + * Flushes the written bytes to the Azure Blob Storage service. + * + * @param offset the offset up to which data needs to be flushed. + * @param retainUncommittedData whether to retain uncommitted data after flush. + * @param isClose whether this flush is happening as part of a close operation. + * @throws IOException if an I/O error occurs. + */ + private synchronized void flushWrittenBytesToServiceInternal(final long offset, + final boolean retainUncommittedData, final boolean isClose) + throws IOException { + + // Flush is called for append blob only on close + if (this.isAppendBlob && !isClose) { + return; + } + + // Tracker to monitor performance metrics + AbfsPerfTracker tracker = client.getAbfsPerfTracker(); + + // Performance information for tracking this method's performance + try (AbfsPerfInfo perfInfo = new AbfsPerfInfo(tracker, + "flushWrittenBytesToServiceInternal", "flush")) { + + AbfsRestOperation op; + + try { + // Attempt to flush data to the remote service. + op = remoteFlush(offset, retainUncommittedData, isClose, leaseId, + tracingContext); + } catch (InvalidIngressServiceException ex) { + // If an invalid ingress service is encountered, switch handler and retry. + switchHandler(); + op = remoteFlush(offset, retainUncommittedData, isClose, leaseId, + tracingContext); + } catch (AzureBlobFileSystemException ex) { + // Handle specific Azure Blob FileSystem exceptions + if (ex instanceof AbfsRestOperationException && + ((AbfsRestOperationException) ex).getStatusCode() + == HttpURLConnection.HTTP_NOT_FOUND) { + throw new FileNotFoundException(ex.getMessage()); + } + // Store the last error and rethrow it + lastError = new IOException(ex); + throw lastError; + } + + if (op != null) { + // Update the cached SAS token if the operation was successful + cachedSasToken.update(op.getSasToken()); + // Register the result and mark the operation as successful + perfInfo.registerResult(op.getResult()).registerSuccess(true); + } + + // Update the last flush offset + this.lastFlushOffset = offset; + } + } + + protected static class WriteOperation { + + protected final Future task; - WriteOperation(final Future task, final long startOffset, final long length) { + protected final long startOffset; + + protected final long length; + + WriteOperation(final Future task, + final long startOffset, + final long length) { Preconditions.checkNotNull(task, "task"); Preconditions.checkArgument(startOffset >= 0, "startOffset"); Preconditions.checkArgument(length >= 0, "length"); @@ -786,18 +1002,150 @@ public String toString() { return sb.toString(); } + /** + * Gets the reference to the file system back. + * + * @return The back reference to the file system. + */ @VisibleForTesting BackReference getFsBackRef() { return fsBackRef; } + /** + * Gets the executor service used for asynchronous operations. + * + * @return The executor service. + */ @VisibleForTesting ListeningExecutorService getExecutorService() { return executorService; } + /** + * Gets the Azure Blob Storage client. + * + * @return The Azure Blob Storage client. + */ @VisibleForTesting AbfsClient getClient() { return client; } + + /** + * Gets the Azure Blob Storage clientHandler. + * + * @return The Azure Blob Storage clientHandler. + */ + public AbfsClientHandler getClientHandler() { + return clientHandler; + } + + /** + * Gets the path associated with this stream. + * + * @return The path of the stream. + */ + public String getPath() { + return this.path; + } + + /** + * Gets the current position in the stream. + * + * @return The current position in the stream. + */ + public long getPosition() { + return position; + } + + /** + * Sets the position in the stream. + * + * @param position The position to set. + */ + public void setPosition(final long position) { + this.position = position; + } + + /** + * Gets the cached SAS token string for authentication. + * + * @return The cached SAS token string. + */ + public String getCachedSasTokenString() { + return cachedSasToken.get(); + } + + /** + * Gets the context encryption adapter. + * + * @return The context encryption adapter. + */ + public ContextEncryptionAdapter getContextEncryptionAdapter() { + return contextEncryptionAdapter; + } + + /** + * Gets the Azure Block Manager associated with this stream. + * + * @return The Azure Block Manager. + */ + public AzureBlockManager getBlockManager() { + return getIngressHandler().getBlockManager(); + } + + /** + * Gets the tracing context for operations. + * + * @return The tracing context. + */ + public TracingContext getTracingContext() { + return tracingContext; + } + + /** + * Checks if the DFS to blob fallback mechanism is enabled. + * + * @return True if the DFS to blob fallback is enabled, otherwise false. + */ + public boolean isDFSToBlobFallbackEnabled() { + return isDFSToBlobFallbackEnabled; + } + + /** + * Checks if the 'Expect' header is enabled for HTTP requests. + * + * @return True if the 'Expect' header is enabled, otherwise false. + */ + public boolean isExpectHeaderEnabled() { + return isExpectHeaderEnabled; + } + + /** + * Gets the lease ID associated with the stream. + * + * @return The lease ID. + */ + public String getLeaseId() { + return leaseId; + } + + /** + * Gets the cached SAS token object. + * + * @return The cached SAS token object. + */ + public CachedSASToken getCachedSasToken() { + return cachedSasToken; + } + + /** + * Checks if the stream is associated with an append blob. + * + * @return True if the stream is for an append blob, otherwise false. + */ + public boolean isAppendBlob() { + return isAppendBlob; + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStreamContext.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStreamContext.java index 4763c99c472ae..242bec61eccef 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStreamContext.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStreamContext.java @@ -21,6 +21,7 @@ import java.util.concurrent.ExecutorService; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.azurebfs.constants.AbfsServiceType; import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import org.apache.hadoop.fs.impl.BackReference; @@ -57,8 +58,6 @@ public class AbfsOutputStreamContext extends AbfsStreamContext { private int blockOutputActiveBlocks; - private AbfsClient client; - private long position; private FileSystem.Statistics statistics; @@ -72,6 +71,14 @@ public class AbfsOutputStreamContext extends AbfsStreamContext { /** A BackReference to the FS instance that created this OutputStream. */ private BackReference fsBackRef; + private AbfsServiceType ingressServiceType; + + private boolean isDFSToBlobFallbackEnabled; + + private String eTag; + + private AbfsClientHandler clientHandler; + public AbfsOutputStreamContext(final long sasTokenRenewPeriodForStreamsInSeconds) { super(sasTokenRenewPeriodForStreamsInSeconds); } @@ -127,10 +134,9 @@ public AbfsOutputStreamContext withBlockOutputActiveBlocks( return this; } - - public AbfsOutputStreamContext withClient( - final AbfsClient client) { - this.client = client; + public AbfsOutputStreamContext withClientHandler( + final AbfsClientHandler clientHandler) { + this.clientHandler = clientHandler; return this; } @@ -164,12 +170,30 @@ public AbfsOutputStreamContext withTracingContext( return this; } + public AbfsOutputStreamContext withETag( + final String eTag) { + this.eTag = eTag; + return this; + } + public AbfsOutputStreamContext withAbfsBackRef( final BackReference fsBackRef) { this.fsBackRef = fsBackRef; return this; } + public AbfsOutputStreamContext withIngressServiceType( + final AbfsServiceType serviceType) { + this.ingressServiceType = serviceType; + return this; + } + + public AbfsOutputStreamContext withDFSToBlobFallbackEnabled( + final boolean isDFSToBlobFallbackEnabled) { + this.isDFSToBlobFallbackEnabled = isDFSToBlobFallbackEnabled; + return this; + } + public AbfsOutputStreamContext build() { // Validation of parameters to be done here. if (streamStatistics == null) { @@ -261,10 +285,6 @@ public int getBlockOutputActiveBlocks() { return blockOutputActiveBlocks; } - public AbfsClient getClient() { - return client; - } - public FileSystem.Statistics getStatistics() { return statistics; } @@ -288,4 +308,32 @@ public TracingContext getTracingContext() { public BackReference getFsBackRef() { return fsBackRef; } + + public AbfsServiceType getIngressServiceType() { + return ingressServiceType; + } + + public boolean isDFSToBlobFallbackEnabled() { + return isDFSToBlobFallbackEnabled; + } + + public String getETag() { + return eTag; + } + + public AbfsClientHandler getClientHandler() { + return clientHandler; + } + + /** + * Checks if small write is supported based on the current configuration. + * + * @return true if small write is supported, false otherwise. + */ + protected boolean isSmallWriteSupported() { + if (!enableSmallWriteOptimization) { + return false; + } + return !(ingressServiceType == AbfsServiceType.BLOB || isDFSToBlobFallbackEnabled); + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java index fd961ea4331d6..c3550dd07e09b 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java @@ -46,6 +46,7 @@ import static org.apache.hadoop.util.Time.now; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_CONTINUE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.PUT_BLOCK_LIST; import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.EGRESS_LIMIT_BREACH_ABBREVIATION; import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.INGRESS_LIMIT_BREACH_ABBREVIATION; import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.TPS_LIMIT_BREACH_ABBREVIATION; @@ -211,7 +212,7 @@ String getSasToken() { * @param requestHeaders The HTTP request headers. * @param buffer For uploads, this is the request entity body. For downloads, * this will hold the response entity body. - * @param bufferOffset An offset into the buffer where the data beings. + * @param bufferOffset An offset into the buffer where the data begins. * @param bufferLength The length of the data in the buffer. * @param sasToken A sasToken for optional re-use by AbfsInputStream/AbfsOutputStream. */ @@ -378,7 +379,9 @@ private boolean executeHttpOperation(final int retryCount, // HttpUrlConnection requires httpOperation.sendRequest(buffer, bufferOffset, bufferLength); incrementCounter(AbfsStatistic.SEND_REQUESTS, 1); - incrementCounter(AbfsStatistic.BYTES_SENT, bufferLength); + if (!(operationType.name().equals(PUT_BLOCK_LIST))) { + incrementCounter(AbfsStatistic.BYTES_SENT, bufferLength); + } } httpOperation.processResponse(buffer, bufferOffset, bufferLength); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperationType.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperationType.java index 6b2c554e58cdc..deae5f524a429 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperationType.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperationType.java @@ -56,5 +56,6 @@ public enum AbfsRestOperationType { GetBlobProperties, SetBlobMetadata, DeleteBlob, - CopyBlob + CopyBlob, + AppendBlock } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AzureBlobBlockManager.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AzureBlobBlockManager.java new file mode 100644 index 0000000000000..be43c64948aa5 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AzureBlobBlockManager.java @@ -0,0 +1,256 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.store.DataBlocks; + +/** + * Manages Azure Blob blocks for append operations. + */ +public class AzureBlobBlockManager extends AzureBlockManager { + + private static final Logger LOG = LoggerFactory.getLogger( + AbfsOutputStream.class); + + /** The map to store blockId and Status **/ + private final LinkedHashMap blockStatusMap + = new LinkedHashMap<>(); + + /** The list of already committed blocks is stored in this list. */ + private List committedBlockEntries = new ArrayList<>(); + + /** The list of all blockId's for putBlockList. */ + private final Set blockIdList = new LinkedHashSet<>(); + + /** List to validate order. */ + private final UniqueArrayList orderedBlockList + = new UniqueArrayList<>(); + + private final Lock lock = new ReentrantLock(); + + /** + * UniqueArrayList ensures elements are added only once. + * + * @param the type of elements in this list + */ + public static class UniqueArrayList extends ArrayList { + + @Override + public boolean add(T element) { + if (!super.contains(element)) { + return super.add(element); + } + return false; + } + } + + /** + * Constructs an AzureBlobBlockManager. + * + * @param abfsOutputStream the output stream + * @param blockFactory the block factory + * @param bufferSize the buffer size + * @throws AzureBlobFileSystemException if an error occurs + */ + public AzureBlobBlockManager(AbfsOutputStream abfsOutputStream, + DataBlocks.BlockFactory blockFactory, + int bufferSize) + throws AzureBlobFileSystemException { + super(abfsOutputStream, blockFactory, bufferSize); + if (abfsOutputStream.getPosition() > 0) { + this.committedBlockEntries = getBlockList( + abfsOutputStream.getTracingContext()); + } + LOG.trace( + "Created a new Blob Block Manager for AbfsOutputStream instance {} for path {}", + abfsOutputStream.getStreamID(), abfsOutputStream.getPath()); + } + + /** + * Creates a new block. + * + * @param position the position + * @return the created block + * @throws IOException if an I/O error occurs + */ + @Override + protected synchronized AbfsBlock createBlockInternal(final long position) + throws IOException { + if (activeBlock == null) { + blockCount++; + activeBlock = new AbfsBlobBlock(abfsOutputStream, position); + } + return activeBlock; + } + + /** + * Returns block id's which are committed for the blob. + * + * @param tracingContext Tracing context object. + * @return list of committed block id's. + * @throws AzureBlobFileSystemException if an error occurs + */ + private List getBlockList(TracingContext tracingContext) + throws AzureBlobFileSystemException { + List committedBlockIdList; + AbfsBlobClient blobClient = abfsOutputStream.getClientHandler().getBlobClient(); + final AbfsRestOperation op = blobClient + .getBlockList(abfsOutputStream.getPath(), tracingContext); + committedBlockIdList = op.getResult().getBlockIdList(); + return committedBlockIdList; + } + + /** + * Adds the block entry to the map with status NEW. + * + * @param block the block to track + */ + protected void trackBlockWithData(AbfsBlock block) { + lock.lock(); + try { + blockStatusMap.put(block.getBlockId(), AbfsBlockStatus.NEW); + orderedBlockList.add(block.getBlockId()); + } finally { + lock.unlock(); + } + } + + /** + * Updates the status of the specified block. + * + * @param block the block to update + * @param status the new status + * @throws IOException if an I/O error occurs + */ + protected void updateBlockStatus(AbfsBlock block, AbfsBlockStatus status) + throws IOException { + String key = block.getBlockId(); + lock.lock(); + try { + if (!getBlockStatusMap().containsKey(key)) { + throw new IOException("Block is missing with blockId " + key + + " for offset " + block.getOffset() + + " for path" + abfsOutputStream.getPath() + + " with streamId " + abfsOutputStream.getStreamID()); + } else { + blockStatusMap.put(key, status); + } + } finally { + lock.unlock(); + } + } + + /** + * Prepares the list of blocks to commit. + * + * @param offset the offset + * @return the number of blocks to commit + * @throws IOException if an I/O error occurs + */ + protected int prepareListToCommit(long offset) throws IOException { + // Adds all the committed blocks if available to the list of blocks to be added in putBlockList. + blockIdList.addAll(committedBlockEntries); + String failedBlockId; + AbfsBlockStatus success = AbfsBlockStatus.SUCCESS; + + // No network calls needed for empty map. + if (blockStatusMap.isEmpty()) { + return 0; + } + + int mapEntry = 0; + // If any of the entry in the map doesn't have the status of SUCCESS, fail the flush. + for (Map.Entry entry : getBlockStatusMap().entrySet()) { + if (!success.equals(entry.getValue())) { + failedBlockId = entry.getKey(); + LOG.debug( + "A past append for the given offset {} with blockId {} and streamId {}" + + " for the path {} was not successful", offset, failedBlockId, + abfsOutputStream.getStreamID(), abfsOutputStream.getPath()); + throw new IOException( + "A past append was not successful for blockId " + failedBlockId + + " and offset " + offset + " for path" + abfsOutputStream.getPath() + + " with streamId " + + abfsOutputStream.getStreamID()); + } else { + if (!entry.getKey().equals(orderedBlockList.get(mapEntry))) { + LOG.debug( + "The order for the given offset {} with blockId {} and streamId {} " + + " for the path {} was not successful", offset, + entry.getKey(), + abfsOutputStream.getStreamID(), abfsOutputStream.getPath()); + throw new IOException( + "The ordering in map is incorrect for blockId " + entry.getKey() + + " and offset " + offset + " for path " + + abfsOutputStream.getPath() + " with streamId " + + abfsOutputStream.getStreamID()); + } + blockIdList.add(entry.getKey()); + mapEntry++; + } + } + return mapEntry; + } + + /** + * Returns the block status map. + * + * @return the block status map + */ + private LinkedHashMap getBlockStatusMap() { + return blockStatusMap; + } + + /** + * Returns the block ID list. + * + * @return the block ID list + */ + protected Set getBlockIdList() { + return blockIdList; + } + + /** + * Performs cleanup after committing blocks. + */ + protected void postCommitCleanup() { + lock.lock(); + try { + blockStatusMap.clear(); + orderedBlockList.clear(); + } finally { + lock.unlock(); + } + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AzureBlobIngressHandler.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AzureBlobIngressHandler.java new file mode 100644 index 0000000000000..0f75be177ead4 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AzureBlobIngressHandler.java @@ -0,0 +1,352 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.azurebfs.Abfs; +import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidIngressServiceException; +import org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.store.DataBlocks; +import org.apache.hadoop.io.IOUtils; + +/** + * The BlobFsOutputStream for Rest AbfsClient. + */ +public class AzureBlobIngressHandler extends AzureIngressHandler { + + private static final Logger LOG = LoggerFactory.getLogger( + AbfsOutputStream.class); + + private volatile String eTag; + + private final AzureBlobBlockManager blobBlockManager; + + private final AbfsBlobClient blobClient; + + private final AbfsClientHandler clientHandler; + + /** + * Constructs an AzureBlobIngressHandler. + * + * @param abfsOutputStream the AbfsOutputStream. + * @param blockFactory the block factory. + * @param bufferSize the buffer size. + * @param eTag the eTag. + * @throws AzureBlobFileSystemException if an error occurs. + */ + public AzureBlobIngressHandler(AbfsOutputStream abfsOutputStream, + DataBlocks.BlockFactory blockFactory, + int bufferSize, String eTag, AbfsClientHandler clientHandler, AzureBlockManager blockManager) + throws AzureBlobFileSystemException { + super(abfsOutputStream); + this.eTag = eTag; + if (blockManager instanceof AzureBlobBlockManager) { + this.blobBlockManager = (AzureBlobBlockManager) blockManager; + } else { + this.blobBlockManager = new AzureBlobBlockManager(this.abfsOutputStream, + blockFactory, bufferSize); + } + this.clientHandler = clientHandler; + this.blobClient = clientHandler.getBlobClient(); + LOG.trace("Created a new BlobIngress Handler for AbfsOutputStream instance {} for path {}", + abfsOutputStream.getStreamID(), abfsOutputStream.getPath()); + } + + /** + * Buffers data into the specified block. + * + * @param block the block to buffer data into. + * @param data the data to be buffered. + * @param off the start offset in the data. + * @param length the number of bytes to buffer. + * @return the number of bytes buffered. + * @throws IOException if an I/O error occurs. + */ + @Override + protected int bufferData(AbfsBlock block, + final byte[] data, + final int off, + final int length) + throws IOException { + blobBlockManager.trackBlockWithData(block); + LOG.trace("Buffering data of length {} to block at offset {}", length, off); + return block.write(data, off, length); + } + + /** + * Performs a remote write operation. + * + * @param blockToUpload the block to upload. + * @param uploadData the data to upload. + * @param reqParams the request parameters. + * @param tracingContext the tracing context. + * @return the resulting AbfsRestOperation. + * @throws IOException if an I/O error occurs. + */ + @Override + protected AbfsRestOperation remoteWrite(AbfsBlock blockToUpload, + DataBlocks.BlockUploadData uploadData, + AppendRequestParameters reqParams, + TracingContext tracingContext) + throws IOException { + reqParams.setBlockId(blockToUpload.getBlockId()); + reqParams.setEtag(getETag()); + AbfsRestOperation op; + TracingContext tracingContextAppend = new TracingContext(tracingContext); + tracingContextAppend.setIngressHandler("BAppend"); + tracingContextAppend.setPosition(String.valueOf(blockToUpload.getOffset())); + try { + LOG.trace("Starting remote write for block with ID {} and offset {}", + blockToUpload.getBlockId(), blockToUpload.getOffset()); + op = getClient().append(abfsOutputStream.getPath(), uploadData.toByteArray(), + reqParams, + abfsOutputStream.getCachedSasTokenString(), + abfsOutputStream.getContextEncryptionAdapter(), + tracingContextAppend); + blobBlockManager.updateBlockStatus(blockToUpload, + AbfsBlockStatus.SUCCESS); + } catch (AbfsRestOperationException ex) { + LOG.error("Error in remote write requiring handler switch for path {}", abfsOutputStream.getPath(), ex); + if (shouldIngressHandlerBeSwitched(ex)) { + throw getIngressHandlerSwitchException(ex); + } + LOG.error("Error in remote write for path {} and offset {}", abfsOutputStream.getPath(), + blockToUpload.getOffset(), ex); + throw ex; + } + return op; + } + + /** + * Flushes data to the remote store. + * + * @param offset the offset to flush. + * @param retainUncommitedData whether to retain uncommitted data. + * @param isClose whether this is a close operation. + * @param leaseId the lease ID. + * @param tracingContext the tracing context. + * @return the resulting AbfsRestOperation. + * @throws IOException if an I/O error occurs. + */ + @Override + protected synchronized AbfsRestOperation remoteFlush(final long offset, + final boolean retainUncommitedData, + final boolean isClose, + final String leaseId, + TracingContext tracingContext) + throws IOException { + AbfsRestOperation op; + if (abfsOutputStream.isAppendBlob()) { + return null; + } + if (blobBlockManager.prepareListToCommit(offset) == 0) { + return null; + } + try { + // Generate the xml with the list of blockId's to generate putBlockList call. + String blockListXml = generateBlockListXml( + blobBlockManager.getBlockIdList()); + TracingContext tracingContextFlush = new TracingContext(tracingContext); + tracingContextFlush.setIngressHandler("BFlush"); + tracingContextFlush.setPosition(String.valueOf(offset)); + LOG.trace("Flushing data at offset {} for path {}", offset, abfsOutputStream.getPath()); + op = getClient().flush(blockListXml.getBytes(StandardCharsets.UTF_8), + abfsOutputStream.getPath(), + isClose, abfsOutputStream.getCachedSasTokenString(), leaseId, + getETag(), abfsOutputStream.getContextEncryptionAdapter(), tracingContextFlush); + synchronized (this) { + setETag(op.getResult().getResponseHeader(HttpHeaderConfigurations.ETAG)); + } + blobBlockManager.postCommitCleanup(); + } catch (AbfsRestOperationException ex) { + LOG.error("Error in remote flush requiring handler switch for path {}", abfsOutputStream.getPath(), ex); + if (shouldIngressHandlerBeSwitched(ex)) { + throw getIngressHandlerSwitchException(ex); + } + LOG.error("Error in remote flush for path {} and offset {}", abfsOutputStream.getPath(), offset, ex); + throw ex; + } + return op; + } + + /** + * Method to perform a remote write operation for appending data to an append blob in Azure Blob Storage. + * + *

    This method is intended to be implemented by subclasses to handle the specific + * case of appending data to an append blob. It takes in the path of the append blob, + * the data to be uploaded, the block of data, and additional parameters required for + * the append operation.

    + * + * @param path The path of the append blob to which data is to be appended. + * @param uploadData The data to be uploaded as part of the append operation. + * @param block The block of data to append. + * @param reqParams The additional parameters required for the append operation. + * @param tracingContext The tracing context for the operation. + * @return An {@link AbfsRestOperation} object representing the remote write operation. + * @throws IOException If an I/O error occurs during the append operation. + */ + protected AbfsRestOperation remoteAppendBlobWrite(String path, + DataBlocks.BlockUploadData uploadData, + AbfsBlock block, + AppendRequestParameters reqParams, + TracingContext tracingContext) throws IOException { + // Perform the remote append operation using the blob client. + AbfsRestOperation op; + try { + op = blobClient.appendBlock(path, reqParams, uploadData.toByteArray(), tracingContext); + } catch (AbfsRestOperationException ex) { + LOG.error("Error in remote write requiring handler switch for path {}", + abfsOutputStream.getPath(), ex); + if (shouldIngressHandlerBeSwitched(ex)) { + throw getIngressHandlerSwitchException(ex); + } + LOG.error("Error in remote write for path {} and offset {}", + abfsOutputStream.getPath(), + block.getOffset(), ex); + throw ex; + } + return op; + } + + /** + * Sets the eTag of the blob. + * + * @param eTag the eTag to set. + */ + void setETag(String eTag) { + this.eTag = eTag; + } + + /** + * Gets the eTag value of the blob. + * + * @return the eTag. + */ + @VisibleForTesting + @Override + public String getETag() { + return eTag; + } + + /** + * Writes the current buffer to the service. . + * + */ + @Override + protected void writeAppendBlobCurrentBufferToService() throws IOException { + AbfsBlock activeBlock = blobBlockManager.getActiveBlock(); + + // No data, return immediately. + if (!abfsOutputStream.hasActiveBlockDataToUpload()) { + return; + } + + // Prepare data for upload. + final int bytesLength = activeBlock.dataSize(); + DataBlocks.BlockUploadData uploadData = activeBlock.startUpload(); + + // Clear active block and update statistics. + blobBlockManager.clearActiveBlock(); + abfsOutputStream.getOutputStreamStatistics().writeCurrentBuffer(); + abfsOutputStream.getOutputStreamStatistics().bytesToUpload(bytesLength); + + // Update the stream position. + final long offset = abfsOutputStream.getPosition(); + abfsOutputStream.setPosition(offset + bytesLength); + + // Perform the upload within a performance tracking context. + try (AbfsPerfInfo perfInfo = new AbfsPerfInfo( + blobClient.getAbfsPerfTracker(), + "writeCurrentBufferToService", "append")) { + LOG.trace("Writing current buffer to service at offset {} and path {}", offset, abfsOutputStream.getPath()); + AppendRequestParameters reqParams = new AppendRequestParameters( + offset, 0, bytesLength, AppendRequestParameters.Mode.APPEND_MODE, + true, abfsOutputStream.getLeaseId(), abfsOutputStream.isExpectHeaderEnabled()); + + AbfsRestOperation op; + try { + op = remoteAppendBlobWrite(abfsOutputStream.getPath(), uploadData, + activeBlock, reqParams, + new TracingContext(abfsOutputStream.getTracingContext())); + } catch (InvalidIngressServiceException ex) { + abfsOutputStream.switchHandler(); + op = abfsOutputStream.getIngressHandler() + .remoteAppendBlobWrite(abfsOutputStream.getPath(), uploadData, + activeBlock, reqParams, + new TracingContext(abfsOutputStream.getTracingContext())); + } finally { + // Ensure the upload data stream is closed. + IOUtils.closeStreams(uploadData, activeBlock); + } + + if (op != null) { + // Update the SAS token and log the successful upload. + abfsOutputStream.getCachedSasToken().update(op.getSasToken()); + abfsOutputStream.getOutputStreamStatistics() + .uploadSuccessful(bytesLength); + + // Register performance information. + perfInfo.registerResult(op.getResult()); + perfInfo.registerSuccess(true); + } + } catch (Exception ex) { + LOG.error("Failed to upload current buffer of length {} and path {}", bytesLength, abfsOutputStream.getPath(), ex); + abfsOutputStream.getOutputStreamStatistics().uploadFailed(bytesLength); + abfsOutputStream.failureWhileSubmit(ex); + } + } + + /** + * Gets the block manager. + * + * @return the block manager. + */ + @Override + public AzureBlockManager getBlockManager() { + return blobBlockManager; + } + + /** + * Gets the blob client. + * + * @return the blob client. + */ + @Override + public AbfsBlobClient getClient() { + return blobClient; + } + + @VisibleForTesting + public AbfsClientHandler getClientHandler() { + return clientHandler; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AzureBlockManager.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AzureBlockManager.java new file mode 100644 index 0000000000000..61fab54cf5148 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AzureBlockManager.java @@ -0,0 +1,143 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.store.DataBlocks; + +/** + * Abstract base class for managing Azure Data Lake Storage (ADLS) blocks. + */ +public abstract class AzureBlockManager { + + private static final Logger LOG = LoggerFactory.getLogger( + AbfsOutputStream.class); + + /** Factory for blocks. */ + protected final DataBlocks.BlockFactory blockFactory; + + /** Current data block. Null means none currently active. */ + protected AbfsBlock activeBlock; + + /** Count of blocks uploaded. */ + protected long blockCount = 0; + + /** The size of a single block. */ + protected final int blockSize; + + protected AbfsOutputStream abfsOutputStream; + + /** + * Constructs an AzureBlockManager. + * + * @param abfsOutputStream the output stream associated with this block manager + * @param blockFactory the factory to create blocks + * @param blockSize the size of each block + */ + protected AzureBlockManager(AbfsOutputStream abfsOutputStream, + DataBlocks.BlockFactory blockFactory, + final int blockSize) { + this.abfsOutputStream = abfsOutputStream; + this.blockFactory = blockFactory; + this.blockSize = blockSize; + } + + /** + * Creates a new block at the given position. + * + * @param position the position in the output stream where the block should be created + * @return the created block + * @throws IOException if an I/O error occurs + */ + protected final synchronized AbfsBlock createBlock(final long position) + throws IOException { + return createBlockInternal(position); + } + + /** + * Internal method to create a new block at the given position. + * + * @param position the position in the output stream where the block should be created. + * @return the created block. + * @throws IOException if an I/O error occurs. + */ + protected abstract AbfsBlock createBlockInternal(final long position) + throws IOException; + + /** + * Gets the active block. + * + * @return the active block + */ + protected synchronized AbfsBlock getActiveBlock() { + return activeBlock; + } + + /** + * Checks if there is an active block. + * + * @return true if there is an active block, false otherwise + */ + protected synchronized boolean hasActiveBlock() { + return activeBlock != null; + } + + /** + * Gets the block factory. + * + * @return the block factory + */ + protected DataBlocks.BlockFactory getBlockFactory() { + return blockFactory; + } + + /** + * Gets the count of blocks uploaded. + * + * @return the block count + */ + protected long getBlockCount() { + return blockCount; + } + + /** + * Gets the block size. + * + * @return the block size + */ + protected int getBlockSize() { + return blockSize; + } + + /** + * Clears the active block. + */ + void clearActiveBlock() { + if (activeBlock != null) { + LOG.debug("Clearing active block"); + } + synchronized (this) { + activeBlock = null; + } + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AzureDFSBlockManager.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AzureDFSBlockManager.java new file mode 100644 index 0000000000000..f13913f93c2fc --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AzureDFSBlockManager.java @@ -0,0 +1,88 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.store.DataBlocks; + +/** + * Manages Azure Data Lake Storage (ADLS) blocks for append operations. + */ +public class AzureDFSBlockManager extends AzureBlockManager { + + private static final Logger LOG = LoggerFactory.getLogger( + AbfsOutputStream.class); + + /** + * Constructs an AzureDFSBlockManager. + * + * @param abfsOutputStream the output stream associated with this block manager + * @param blockFactory the factory to create blocks + * @param blockSize the size of each block + */ + public AzureDFSBlockManager(AbfsOutputStream abfsOutputStream, + DataBlocks.BlockFactory blockFactory, + int blockSize) { + super(abfsOutputStream, blockFactory, blockSize); + LOG.trace( + "Created a new DFS Block Manager for AbfsOutputStream instance {} for path {}", + abfsOutputStream.getStreamID(), abfsOutputStream.getPath()); + } + + /** + * Creates a new block at the given position if none exists. + * + * @param position the position in the output stream where the block should be created + * @return the created block + * @throws IOException if an I/O error occurs + */ + @Override + protected synchronized AbfsBlock createBlockInternal(final long position) + throws IOException { + if (activeBlock == null) { + blockCount++; + activeBlock = new AbfsBlock(abfsOutputStream, position); + } + return activeBlock; + } + + /** + * Gets the active block. + * + * @return the active block + */ + @Override + protected synchronized AbfsBlock getActiveBlock() { + return super.getActiveBlock(); + } + + /** + * Checks if there is an active block. + * + * @return true if there is an active block, false otherwise + */ + @Override + protected synchronized boolean hasActiveBlock() { + return super.hasActiveBlock(); + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AzureDFSIngressHandler.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AzureDFSIngressHandler.java new file mode 100644 index 0000000000000..b7612cb2f5bc5 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AzureDFSIngressHandler.java @@ -0,0 +1,269 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.store.DataBlocks; +import org.apache.hadoop.io.IOUtils; + +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; + +/** + * The BlobFsOutputStream for Rest AbfsClient. + */ +public class AzureDFSIngressHandler extends AzureIngressHandler { + + private static final Logger LOG = LoggerFactory.getLogger( + AbfsOutputStream.class); + + private AzureDFSBlockManager dfsBlockManager; + + private final AbfsDfsClient dfsClient; + + private String eTag; + + /** + * Constructs an AzureDFSIngressHandler. + * + * @param abfsOutputStream the AbfsOutputStream. + */ + public AzureDFSIngressHandler(AbfsOutputStream abfsOutputStream, AbfsClientHandler clientHandler) { + super(abfsOutputStream); + this.dfsClient = clientHandler.getDfsClient(); + } + + /** + * Constructs an AzureDFSIngressHandler with specified parameters. + * + * @param abfsOutputStream the AbfsOutputStream. + * @param blockFactory the block factory. + * @param bufferSize the buffer size. + */ + public AzureDFSIngressHandler(AbfsOutputStream abfsOutputStream, + DataBlocks.BlockFactory blockFactory, + int bufferSize, String eTag, AbfsClientHandler clientHandler) { + this(abfsOutputStream, clientHandler); + this.eTag = eTag; + this.dfsBlockManager = new AzureDFSBlockManager(this.abfsOutputStream, + blockFactory, bufferSize); + LOG.trace( + "Created a new DFSIngress Handler for AbfsOutputStream instance {} for path {}", + abfsOutputStream.getStreamID(), abfsOutputStream.getPath()); + } + + /** + * Buffers data into the specified block. + * + * @param block the block to buffer data into. + * @param data the data to be buffered. + * @param off the start offset in the data. + * @param length the number of bytes to buffer. + * @return the number of bytes buffered. + * @throws IOException if an I/O error occurs. + */ + @Override + public synchronized int bufferData(AbfsBlock block, + final byte[] data, + final int off, + final int length) + throws IOException { + LOG.trace("Buffering data of length {} to block at offset {}", length, off); + return block.write(data, off, length); + } + + /** + * Performs a remote write operation. + * + * @param blockToUpload the block to upload. + * @param uploadData the data to upload. + * @param reqParams the request parameters. + * @param tracingContext the tracing context. + * @return the resulting AbfsRestOperation. + * @throws IOException if an I/O error occurs. + */ + @Override + protected AbfsRestOperation remoteWrite(AbfsBlock blockToUpload, + DataBlocks.BlockUploadData uploadData, + AppendRequestParameters reqParams, + TracingContext tracingContext) throws IOException { + TracingContext tracingContextAppend = new TracingContext(tracingContext); + if (tracingContextAppend.getIngressHandler().equals(EMPTY_STRING)) { + tracingContextAppend.setIngressHandler("DAppend"); + tracingContextAppend.setPosition( + String.valueOf(blockToUpload.getOffset())); + } + LOG.trace("Starting remote write for block with offset {} and path {}", blockToUpload.getOffset(), abfsOutputStream.getPath()); + return getClient().append(abfsOutputStream.getPath(), + uploadData.toByteArray(), reqParams, + abfsOutputStream.getCachedSasTokenString(), + abfsOutputStream.getContextEncryptionAdapter(), + tracingContextAppend); + } + + /** + * Method to perform a remote write operation for appending data to an append blob in Azure Blob Storage. + * + *

    This method is intended to be implemented by subclasses to handle the specific + * case of appending data to an append blob. It takes in the path of the append blob, + * the data to be uploaded, the block of data, and additional parameters required for + * the append operation.

    + * + * @param path The path of the append blob to which data is to be appended. + * @param uploadData The data to be uploaded as part of the append operation. + * @param block The block of data to append. + * @param reqParams The additional parameters required for the append operation. + * @param tracingContext The tracing context for the operation. + * @return An {@link AbfsRestOperation} object representing the remote write operation. + * @throws IOException If an I/O error occurs during the append operation. + */ + @Override + protected AbfsRestOperation remoteAppendBlobWrite(String path, DataBlocks.BlockUploadData uploadData, + AbfsBlock block, AppendRequestParameters reqParams, + TracingContext tracingContext) throws IOException { + return remoteWrite(block, uploadData, reqParams, tracingContext); + } + + /** + * Flushes data to the remote store. + * + * @param offset the offset to flush. + * @param retainUncommitedData whether to retain uncommitted data. + * @param isClose whether this is a close operation. + * @param leaseId the lease ID. + * @param tracingContext the tracing context. + * @return the resulting AbfsRestOperation. + * @throws IOException if an I/O error occurs. + */ + @Override + protected synchronized AbfsRestOperation remoteFlush(final long offset, + final boolean retainUncommitedData, + final boolean isClose, + final String leaseId, + TracingContext tracingContext) + throws IOException { + TracingContext tracingContextFlush = new TracingContext(tracingContext); + if (tracingContextFlush.getIngressHandler().equals(EMPTY_STRING)) { + tracingContextFlush.setIngressHandler("DFlush"); + tracingContextFlush.setPosition(String.valueOf(offset)); + } + LOG.trace("Flushing data at offset {} and path {}", offset, abfsOutputStream.getPath()); + return getClient() + .flush(abfsOutputStream.getPath(), offset, retainUncommitedData, + isClose, + abfsOutputStream.getCachedSasTokenString(), leaseId, + abfsOutputStream.getContextEncryptionAdapter(), + tracingContextFlush); + } + + /** + * Appending the current active data block to the service. Clearing the active + * data block and releasing all buffered data. + * + * @throws IOException if there is any failure while starting an upload for + * the data block or while closing the BlockUploadData. + */ + @Override + protected void writeAppendBlobCurrentBufferToService() throws IOException { + AbfsBlock activeBlock = dfsBlockManager.getActiveBlock(); + + // No data, return immediately. + if (!abfsOutputStream.hasActiveBlockDataToUpload()) { + return; + } + + // Prepare data for upload. + final int bytesLength = activeBlock.dataSize(); + DataBlocks.BlockUploadData uploadData = activeBlock.startUpload(); + + // Clear active block and update statistics. + dfsBlockManager.clearActiveBlock(); + abfsOutputStream.getOutputStreamStatistics().writeCurrentBuffer(); + abfsOutputStream.getOutputStreamStatistics().bytesToUpload(bytesLength); + + // Update the stream position. + final long offset = abfsOutputStream.getPosition(); + abfsOutputStream.setPosition(offset + bytesLength); + + // Perform the upload within a performance tracking context. + try (AbfsPerfInfo perfInfo = new AbfsPerfInfo( + dfsClient.getAbfsPerfTracker(), + "writeCurrentBufferToService", "append")) { + LOG.trace("Writing current buffer to service at offset {} and path {}", offset, abfsOutputStream.getPath()); + AppendRequestParameters reqParams = new AppendRequestParameters( + offset, 0, bytesLength, AppendRequestParameters.Mode.APPEND_MODE, + true, abfsOutputStream.getLeaseId(), abfsOutputStream.isExpectHeaderEnabled()); + + // Perform the remote write operation. + AbfsRestOperation op = remoteWrite(activeBlock, uploadData, reqParams, + new TracingContext(abfsOutputStream.getTracingContext())); + + // Update the SAS token and log the successful upload. + abfsOutputStream.getCachedSasToken().update(op.getSasToken()); + abfsOutputStream.getOutputStreamStatistics().uploadSuccessful(bytesLength); + + // Register performance information. + perfInfo.registerResult(op.getResult()); + perfInfo.registerSuccess(true); + } catch (Exception ex) { + LOG.error("Failed to upload current buffer of length {} and path {}", bytesLength, abfsOutputStream.getPath(), ex); + abfsOutputStream.getOutputStreamStatistics().uploadFailed(bytesLength); + abfsOutputStream.failureWhileSubmit(ex); + } finally { + // Ensure the upload data stream is closed. + IOUtils.closeStreams(uploadData, activeBlock); + } + } + + + /** + * Gets the block manager. + * + * @return the block manager. + */ + @Override + public AzureBlockManager getBlockManager() { + return dfsBlockManager; + } + + /** + * Gets the dfs client. + * + * @return the dfs client. + */ + @Override + public AbfsDfsClient getClient() { + return dfsClient; + } + + /** + * Gets the eTag value of the blob. + * + * @return the eTag. + */ + @Override + public String getETag() { + return eTag; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AzureDfsToBlobIngressFallbackHandler.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AzureDfsToBlobIngressFallbackHandler.java new file mode 100644 index 0000000000000..e603228cbeea3 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AzureDfsToBlobIngressFallbackHandler.java @@ -0,0 +1,263 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidIngressServiceException; +import org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.store.DataBlocks; +import org.apache.hadoop.io.IOUtils; + +/** + * Handles the fallback mechanism for Azure Blob Ingress operations. + */ +public class AzureDfsToBlobIngressFallbackHandler extends AzureDFSIngressHandler { + + private static final Logger LOG = LoggerFactory.getLogger( + AbfsOutputStream.class); + + private final AzureBlobBlockManager blobBlockManager; + + private String eTag; + + private final Lock lock = new ReentrantLock(); + + /** + * Constructs an AzureBlobIngressFallbackHandler. + * + * @param abfsOutputStream the AbfsOutputStream. + * @param blockFactory the block factory. + * @param bufferSize the buffer size. + * @param eTag the eTag. + * @throws AzureBlobFileSystemException if an error occurs. + */ + public AzureDfsToBlobIngressFallbackHandler(AbfsOutputStream abfsOutputStream, + DataBlocks.BlockFactory blockFactory, + int bufferSize, String eTag, AbfsClientHandler clientHandler) throws AzureBlobFileSystemException { + super(abfsOutputStream, clientHandler); + this.eTag = eTag; + this.blobBlockManager = new AzureBlobBlockManager(this.abfsOutputStream, + blockFactory, bufferSize); + LOG.trace( + "Created a new BlobFallbackIngress Handler for AbfsOutputStream instance {} for path {}", + abfsOutputStream.getStreamID(), abfsOutputStream.getPath()); + } + + /** + * Buffers data into the specified block. + * + * @param block the block to buffer data into. + * @param data the data to be buffered. + * @param off the start offset in the data. + * @param length the number of bytes to buffer. + * @return the number of bytes buffered. + * @throws IOException if an I/O error occurs. + */ + @Override + public synchronized int bufferData(AbfsBlock block, + final byte[] data, + final int off, + final int length) throws IOException { + blobBlockManager.trackBlockWithData(block); + LOG.trace("Buffering data of length {} to block at offset {}", length, off); + return super.bufferData(block, data, off, length); + } + + /** + * Performs a remote write operation. + * + * @param blockToUpload the block to upload. + * @param uploadData the data to upload. + * @param reqParams the request parameters. + * @param tracingContext the tracing context. + * @return the resulting AbfsRestOperation. + * @throws IOException if an I/O error occurs. + */ + @Override + protected AbfsRestOperation remoteWrite(AbfsBlock blockToUpload, + DataBlocks.BlockUploadData uploadData, + AppendRequestParameters reqParams, + TracingContext tracingContext) throws IOException { + AbfsRestOperation op; + TracingContext tracingContextAppend = new TracingContext(tracingContext); + tracingContextAppend.setIngressHandler("FBAppend"); + tracingContextAppend.setPosition(String.valueOf(blockToUpload.getOffset())); + try { + op = super.remoteWrite(blockToUpload, uploadData, reqParams, + tracingContextAppend); + blobBlockManager.updateBlockStatus(blockToUpload, + AbfsBlockStatus.SUCCESS); + } catch (AbfsRestOperationException ex) { + if (shouldIngressHandlerBeSwitched(ex)) { + LOG.error("Error in remote write requiring handler switch for path {}", abfsOutputStream.getPath(), ex); + throw getIngressHandlerSwitchException(ex); + } + LOG.error("Error in remote write for path {} and offset {}", abfsOutputStream.getPath(), + blockToUpload.getOffset(), ex); + throw ex; + } + return op; + } + + /** + * Flushes data to the remote store. + * + * @param offset the offset to flush. + * @param retainUncommitedData whether to retain uncommitted data. + * @param isClose whether this is a close operation. + * @param leaseId the lease ID. + * @param tracingContext the tracing context. + * @return the resulting AbfsRestOperation. + * @throws IOException if an I/O error occurs. + */ + @Override + protected synchronized AbfsRestOperation remoteFlush(final long offset, + final boolean retainUncommitedData, + final boolean isClose, + final String leaseId, + TracingContext tracingContext) throws IOException { + AbfsRestOperation op; + if (blobBlockManager.prepareListToCommit(offset) == 0) { + return null; + } + try { + TracingContext tracingContextFlush = new TracingContext(tracingContext); + tracingContextFlush.setIngressHandler("FBFlush"); + tracingContextFlush.setPosition(String.valueOf(offset)); + op = super.remoteFlush(offset, retainUncommitedData, isClose, leaseId, + tracingContextFlush); + blobBlockManager.postCommitCleanup(); + } catch (AbfsRestOperationException ex) { + if (shouldIngressHandlerBeSwitched(ex)) { + LOG.error("Error in remote flush requiring handler switch for path {}", abfsOutputStream.getPath(), ex); + throw getIngressHandlerSwitchException(ex); + } + LOG.error("Error in remote flush for path {} and offset {}", abfsOutputStream.getPath(), offset, ex); + throw ex; + } + return op; + } + + /** + * Gets the block manager. + * + * @return the block manager. + */ + @Override + public AzureBlockManager getBlockManager() { + return blobBlockManager; + } + + /** + * Gets the eTag value of the blob. + * + * @return the eTag. + */ + @VisibleForTesting + public String getETag() { + lock.lock(); + try { + return eTag; + } finally { + lock.unlock(); + } + } + + /** + * Appending the current active data block to the service. Clearing the active + * data block and releasing all buffered data. + * + * @throws IOException if there is any failure while starting an upload for + * the data block or while closing the BlockUploadData. + */ + @Override + protected void writeAppendBlobCurrentBufferToService() throws IOException { + AbfsBlock activeBlock = blobBlockManager.getActiveBlock(); + + // No data, return immediately. + if (!abfsOutputStream.hasActiveBlockDataToUpload()) { + return; + } + + // Prepare data for upload. + final int bytesLength = activeBlock.dataSize(); + DataBlocks.BlockUploadData uploadData = activeBlock.startUpload(); + + // Clear active block and update statistics. + blobBlockManager.clearActiveBlock(); + abfsOutputStream.getOutputStreamStatistics().writeCurrentBuffer(); + abfsOutputStream.getOutputStreamStatistics().bytesToUpload(bytesLength); + + // Update the stream position. + final long offset = abfsOutputStream.getPosition(); + abfsOutputStream.setPosition(offset + bytesLength); + + // Perform the upload within a performance tracking context. + try (AbfsPerfInfo perfInfo = new AbfsPerfInfo( + getClient().getAbfsPerfTracker(), + "writeCurrentBufferToService", "append")) { + LOG.trace("Writing current buffer to service at offset {} and path {}", offset, abfsOutputStream.getPath()); + AppendRequestParameters reqParams = new AppendRequestParameters( + offset, 0, bytesLength, AppendRequestParameters.Mode.APPEND_MODE, + true, abfsOutputStream.getLeaseId(), abfsOutputStream.isExpectHeaderEnabled()); + + // Perform the remote write operation. + AbfsRestOperation op; + try { + op = remoteAppendBlobWrite(abfsOutputStream.getPath(), uploadData, + activeBlock, reqParams, + new TracingContext(abfsOutputStream.getTracingContext())); + } catch (InvalidIngressServiceException ex) { + abfsOutputStream.switchHandler(); + op = abfsOutputStream.getIngressHandler() + .remoteAppendBlobWrite(abfsOutputStream.getPath(), uploadData, + activeBlock, reqParams, + new TracingContext(abfsOutputStream.getTracingContext())); + } finally { + // Ensure the upload data stream is closed. + IOUtils.closeStreams(uploadData, activeBlock); + } + + if (op != null) { + // Update the SAS token and log the successful upload. + abfsOutputStream.getCachedSasToken().update(op.getSasToken()); + abfsOutputStream.getOutputStreamStatistics() + .uploadSuccessful(bytesLength); + + // Register performance information. + perfInfo.registerResult(op.getResult()); + perfInfo.registerSuccess(true); + } + } catch (Exception ex) { + LOG.error("Failed to upload current buffer of length {} and path {}", bytesLength, abfsOutputStream.getPath(), ex); + abfsOutputStream.getOutputStreamStatistics().uploadFailed(bytesLength); + abfsOutputStream.failureWhileSubmit(ex); + } + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AzureIngressHandler.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AzureIngressHandler.java new file mode 100644 index 0000000000000..a388cd8a65d10 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AzureIngressHandler.java @@ -0,0 +1,207 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.util.Set; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidIngressServiceException; +import org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters; +import org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.store.DataBlocks; + +import static java.net.HttpURLConnection.HTTP_CONFLICT; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.BLOCK_LIST_END_TAG; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.BLOCK_LIST_START_TAG; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.LATEST_BLOCK_FORMAT; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.XML_VERSION; +import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.BLOB_OPERATION_NOT_SUPPORTED; +import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.INVALID_APPEND_OPERATION; + +/** + * Abstract base class for handling ingress operations for Azure Data Lake Storage (ADLS). + */ +public abstract class AzureIngressHandler { + + private static final Logger LOG = LoggerFactory.getLogger( + AbfsOutputStream.class); + + /** The output stream associated with this handler */ + protected AbfsOutputStream abfsOutputStream; + + /** + * Constructs an AzureIngressHandler. + * + * @param abfsOutputStream the output stream associated with this handler + */ + protected AzureIngressHandler(AbfsOutputStream abfsOutputStream) { + this.abfsOutputStream = abfsOutputStream; + } + + /** + * Gets the eTag value of the blob. + * + * @return the eTag. + */ + public abstract String getETag(); + + /** + * Buffers data into the specified block. + * + * @param block the block to buffer data into + * @param data the data to buffer + * @param off the start offset in the data + * @param length the number of bytes to buffer + * @return the number of bytes buffered + * @throws IOException if an I/O error occurs + */ + protected abstract int bufferData(AbfsBlock block, + final byte[] data, + final int off, + final int length) throws IOException; + + /** + * Performs a remote write operation to upload a block. + * + * @param blockToUpload the block to upload + * @param uploadData the data to upload + * @param reqParams the request parameters for the append operation + * @param tracingContext the tracing context + * @return the result of the REST operation + * @throws IOException if an I/O error occurs + */ + protected abstract AbfsRestOperation remoteWrite(AbfsBlock blockToUpload, + DataBlocks.BlockUploadData uploadData, + AppendRequestParameters reqParams, + TracingContext tracingContext) throws IOException; + + /** + * Performs a remote flush operation. + * + * @param offset the offset to flush to + * @param retainUncommittedData whether to retain uncommitted data + * @param isClose whether this is a close operation + * @param leaseId the lease ID + * @param tracingContext the tracing context + * @return the result of the REST operation + * @throws IOException if an I/O error occurs + */ + protected abstract AbfsRestOperation remoteFlush(final long offset, + final boolean retainUncommittedData, + final boolean isClose, + final String leaseId, + TracingContext tracingContext) throws IOException; + + /** + * Writes the current buffer to the service for an append blob. + * + * @throws IOException if an I/O error occurs + */ + protected abstract void writeAppendBlobCurrentBufferToService() + throws IOException; + + /** + * Abstract method to perform a remote write operation for appending data to an append blob in Azure Blob Storage. + * + *

    This method is intended to be implemented by subclasses to handle the specific + * case of appending data to an append blob. It takes in the path of the append blob, + * the data to be uploaded, the block of data, and additional parameters required for + * the append operation.

    + * + * @param path The path of the append blob to which data is to be appended. + * @param uploadData The data to be uploaded as part of the append operation. + * @param block The block of data to append. + * @param reqParams The additional parameters required for the append operation. + * @param tracingContext The tracing context for the operation. + * @return An {@link AbfsRestOperation} object representing the remote write operation. + * @throws IOException If an I/O error occurs during the append operation. + */ + protected abstract AbfsRestOperation remoteAppendBlobWrite(String path, + DataBlocks.BlockUploadData uploadData, + AbfsBlock block, + AppendRequestParameters reqParams, + TracingContext tracingContext) throws IOException; + + /** + * Determines if the ingress handler should be switched based on the given exception. + * + * @param ex the exception that occurred + * @return true if the ingress handler should be switched, false otherwise + */ + protected boolean shouldIngressHandlerBeSwitched(AbfsRestOperationException ex) { + return ex.getStatusCode() == HTTP_CONFLICT && (ex.getErrorCode() + .getErrorCode().equals(AzureServiceErrorCode.BLOB_OPERATION_NOT_SUPPORTED.getErrorCode()) || + ex.getErrorMessage().contains(INVALID_APPEND_OPERATION)); + } + + /** + * Constructs an InvalidIngressServiceException that includes the current handler class name in the exception message. + * + * @param e the original AbfsRestOperationException that triggered this exception. + * @return an InvalidIngressServiceException with the status code, error code, original message, and handler class name. + */ + protected InvalidIngressServiceException getIngressHandlerSwitchException( + AbfsRestOperationException e) { + if (e.getMessage().contains(BLOB_OPERATION_NOT_SUPPORTED)) { + return new InvalidIngressServiceException(e.getStatusCode(), + AzureServiceErrorCode.BLOB_OPERATION_NOT_SUPPORTED.getErrorCode(), + BLOB_OPERATION_NOT_SUPPORTED + " " + getClass().getName(), e); + } else { + return new InvalidIngressServiceException(e.getStatusCode(), + AzureServiceErrorCode.INVALID_APPEND_OPERATION.getErrorCode(), + INVALID_APPEND_OPERATION + " " + getClass().getName(), e); + } + } + + /** + * Gets the block manager associated with this handler. + * + * @return the block manager + */ + protected abstract AzureBlockManager getBlockManager(); + + /** + * Gets the client associated with this handler. + * + * @return the block manager + */ + public abstract AbfsClient getClient(); + + /** + * Generates an XML string representing the block list. + * + * @param blockIds the set of block IDs + * @return the generated XML string + */ + protected static String generateBlockListXml(Set blockIds) { + StringBuilder stringBuilder = new StringBuilder(); + stringBuilder.append(XML_VERSION); + stringBuilder.append(BLOCK_LIST_START_TAG); + for (String blockId : blockIds) { + stringBuilder.append(String.format(LATEST_BLOCK_FORMAT, blockId)); + } + stringBuilder.append(BLOCK_LIST_END_TAG); + return stringBuilder.toString(); + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/Listener.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/Listener.java index 4c2270a87f100..d976c6f9b6617 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/Listener.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/Listener.java @@ -30,4 +30,6 @@ public interface Listener { void updatePrimaryRequestID(String primaryRequestID); Listener getClone(); void setOperation(FSOperationType operation); + void updateIngressHandler(String ingressHandler); + void updatePosition(String position); } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java index b0a9a021c5e47..853901faec715 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java @@ -63,6 +63,8 @@ public class TracingContext { private Listener listener = null; // null except when testing //final concatenated ID list set into x-ms-client-request-id header private String header = EMPTY_STRING; + private String ingressHandler = EMPTY_STRING; + private String position = EMPTY_STRING; private String metricResults = EMPTY_STRING; private String metricHeader = EMPTY_STRING; @@ -131,6 +133,8 @@ public TracingContext(TracingContext originalTracingContext) { this.retryCount = 0; this.primaryRequestId = originalTracingContext.primaryRequestId; this.format = originalTracingContext.format; + this.position = originalTracingContext.getPosition(); + this.ingressHandler = originalTracingContext.getIngressHandler(); if (originalTracingContext.listener != null) { this.listener = originalTracingContext.listener.getClone(); } @@ -192,7 +196,13 @@ public void constructHeader(AbfsHttpOperation httpOperation, String previousFail + getPrimaryRequestIdForHeader(retryCount > 0) + ":" + streamID + ":" + opType + ":" + retryCount; header = addFailureReasons(header, previousFailure, retryPolicyAbbreviation); - metricHeader += !(metricResults.trim().isEmpty()) ? metricResults : ""; + if (!(ingressHandler.equals(EMPTY_STRING))) { + header += ":" + ingressHandler; + } + if (!(position.equals(EMPTY_STRING))) { + header += ":" + position; + } + metricHeader += !(metricResults.trim().isEmpty()) ? metricResults : ""; break; case TWO_ID_FORMAT: header = clientCorrelationID + ":" + clientRequestId; @@ -256,4 +266,46 @@ public String getHeader() { return header; } + /** + * Gets the ingress handler. + * + * @return the ingress handler as a String. + */ + public String getIngressHandler() { + return ingressHandler; + } + + /** + * Gets the position. + * + * @return the position as a String. + */ + public String getPosition() { + return position; + } + + /** + * Sets the ingress handler. + * + * @param ingressHandler the ingress handler to set, must not be null. + */ + public void setIngressHandler(final String ingressHandler) { + this.ingressHandler = ingressHandler; + if (listener != null) { + listener.updateIngressHandler(ingressHandler); + } + } + + /** + * Sets the position. + * + * @param position the position to set, must not be null. + */ + public void setPosition(final String position) { + this.position = position; + if (listener != null) { + listener.updatePosition(position); + } + } + } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsCustomEncryption.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsCustomEncryption.java index fb1f1c30d57fa..bf7c345cc7e3b 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsCustomEncryption.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsCustomEncryption.java @@ -27,12 +27,15 @@ import java.util.Hashtable; import java.util.List; import java.util.Random; +import java.util.Set; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.PathIOException; +import org.apache.hadoop.fs.azurebfs.constants.AbfsServiceType; import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; import org.apache.hadoop.fs.azurebfs.security.EncodingHelper; import org.apache.hadoop.fs.azurebfs.services.AbfsClientUtils; +import org.apache.hadoop.fs.azurebfs.services.AbfsDfsClient; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import org.assertj.core.api.Assertions; import org.assertj.core.api.Assumptions; @@ -61,7 +64,11 @@ import org.apache.hadoop.test.LambdaTestUtils; import org.apache.hadoop.util.Lists; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.BLOCK_LIST_END_TAG; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.BLOCK_LIST_START_TAG; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CPK_IN_NON_HNS_ACCOUNT_ERROR_MESSAGE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.LATEST_BLOCK_FORMAT; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.XML_VERSION; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENCRYPTION_CONTEXT_PROVIDER_TYPE; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENCRYPTION_ENCODED_CLIENT_PROVIDED_KEY; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENCRYPTION_ENCODED_CLIENT_PROVIDED_KEY_SHA; @@ -185,6 +192,12 @@ public void testCustomEncryptionCombinations() throws Exception { } } + protected static String generateBlockListXml() { + return XML_VERSION + + BLOCK_LIST_START_TAG + + BLOCK_LIST_END_TAG; + } + private void validateCpkResponseHeadersForCombination(final AzureBlobFileSystem fs) throws Exception { Path testPath = path("/testFile"); @@ -244,6 +257,7 @@ private AbfsRestOperation callOperation(AzureBlobFileSystem fs, Path testPath, EncryptionContextProvider ecp) throws Exception { AbfsClient client = fs.getAbfsClient(); + AbfsClient ingressClient = fs.getAbfsStore().getClientHandler().getIngressClient(); AbfsClientUtils.setEncryptionContextProvider(client, ecp); if (isExceptionCase) { LambdaTestUtils.intercept(IOException.class, () -> { @@ -310,12 +324,26 @@ private AbfsRestOperation callOperation(AzureBlobFileSystem fs, } } case WRITE: - return client.flush(path, 3, false, false, null, - null, encryptionAdapter, getTestTracingContext(fs, false)); + if (ingressClient instanceof AbfsDfsClient) { + return ingressClient.flush(path, 3, false, false, null, + null, encryptionAdapter, getTestTracingContext(fs, false)); + } else { + return ingressClient.flush(generateBlockListXml().getBytes( + StandardCharsets.UTF_8), path, false, null, + null, null, encryptionAdapter, getTestTracingContext(fs, false)); + } case APPEND: - return client.append(path, "val".getBytes(), - new AppendRequestParameters(3, 0, 3, APPEND_MODE, false, null, true), - null, encryptionAdapter, getTestTracingContext(fs, false)); + if (ingressClient instanceof AbfsDfsClient) { + return ingressClient.append(path, "val".getBytes(), + new AppendRequestParameters(3, 0, 3, APPEND_MODE, false, null, + true), + null, encryptionAdapter, getTestTracingContext(fs, false)); + } else { + return ingressClient.append(path, "val".getBytes(), + new AppendRequestParameters(3, 0, 3, APPEND_MODE, false, null, + true,"MF8tNDE1MjkzOTE4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", null), + null, encryptionAdapter, getTestTracingContext(fs, false)); + } case SET_ACL: return client.setAcl(path, AclEntry.aclSpecToString( Lists.newArrayList(aclEntry(ACCESS, USER, ALL))), diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsNetworkStatistics.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsNetworkStatistics.java index 66b8da89572a1..ccce71df6e48e 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsNetworkStatistics.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsNetworkStatistics.java @@ -28,6 +28,9 @@ import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileAlreadyExistsException; +import org.apache.hadoop.fs.azurebfs.constants.AbfsServiceType; +import org.apache.hadoop.fs.azurebfs.services.AbfsBlobClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsClient; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.services.AbfsOutputStream; @@ -65,6 +68,7 @@ public void testAbfsHttpSendStatistics() throws IOException { long expectedConnectionsMade = metricMap.get(CONNECTIONS_MADE.getStatName()); long expectedRequestsSent = metricMap.get(SEND_REQUESTS.getStatName()); long expectedBytesSent = 0; + AbfsClient client = fs.getAbfsStore().getClientHandler().getIngressClient(); // -------------------------------------------------------------------- // Operation: Creating AbfsOutputStream @@ -72,8 +76,13 @@ public void testAbfsHttpSendStatistics() throws IOException { sendRequestPath)) { // Network stats calculation: For Creating AbfsOutputStream: // 1 create request = 1 connection made and 1 send request - expectedConnectionsMade++; - expectedRequestsSent++; + if (client instanceof AbfsBlobClient) { + expectedRequestsSent += 2; + expectedConnectionsMade += 4; + } else { + expectedRequestsSent ++; + expectedConnectionsMade++; + } // -------------------------------------------------------------------- // Operation: Write small data @@ -147,8 +156,13 @@ public void testAbfsHttpSendStatistics() throws IOException { * = 3 connections and 2 send requests */ if (this.getConfiguration().isConditionalCreateOverwriteEnabled()) { - expectedConnectionsMade += 3; - expectedRequestsSent += 2; + if (client instanceof AbfsBlobClient) { + expectedRequestsSent += 2; + expectedConnectionsMade += 6; + } else { + expectedConnectionsMade += 3; + expectedRequestsSent += 2; + } } else { expectedConnectionsMade += 1; expectedRequestsSent += 1; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsReadFooterMetrics.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsReadFooterMetrics.java index 0071b90771c49..90d769b56f4b9 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsReadFooterMetrics.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsReadFooterMetrics.java @@ -21,7 +21,10 @@ import static org.apache.hadoop.fs.CommonConfigurationKeys.IOSTATISTICS_LOGGING_LEVEL_INFO; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_READ_BUFFER_SIZE; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_WRITE_BUFFER_SIZE; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_ACCOUNT_KEY; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_ACCOUNT_NAME; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_FORMAT; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_URI; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.MIN_BUFFER_SIZE; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_KB; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_MB; @@ -30,6 +33,8 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.azurebfs.utils.MetricFormat; + +import org.junit.Assume; import org.junit.Test; import java.io.IOException; @@ -47,6 +52,20 @@ public class ITestAbfsReadFooterMetrics extends AbstractAbfsScaleTest { public ITestAbfsReadFooterMetrics() throws Exception { + checkPrerequisites(); + } + + private void checkPrerequisites(){ + checkIfConfigIsSet(FS_AZURE_METRIC_ACCOUNT_NAME); + checkIfConfigIsSet(FS_AZURE_METRIC_ACCOUNT_KEY); + checkIfConfigIsSet(FS_AZURE_METRIC_URI); + } + + private void checkIfConfigIsSet(String configKey){ + AbfsConfiguration conf = getConfiguration(); + String value = conf.get(configKey); + Assume.assumeTrue(configKey + " config is mandatory for the test to run", + value != null && value.trim().length() > 1); } private static final String TEST_PATH = "/testfile"; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsRestOperationException.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsRestOperationException.java index ca2ab9dabcf43..265792656ae4e 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsRestOperationException.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsRestOperationException.java @@ -29,6 +29,9 @@ import org.apache.hadoop.fs.azurebfs.oauth2.RetryTestTokenProvider; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.services.AbfsBlobClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsDfsClient; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.DOT; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION; @@ -53,6 +56,7 @@ public void testAbfsRestOperationExceptionFormat() throws IOException { final AzureBlobFileSystem fs = getFileSystem(); Path nonExistedFilePath1 = new Path("nonExistedPath1"); Path nonExistedFilePath2 = new Path("nonExistedPath2"); + AbfsClient client = fs.getAbfsStore().getClient(); try { fs.getFileStatus(nonExistedFilePath1); } catch (Exception ex) { @@ -61,12 +65,21 @@ public void testAbfsRestOperationExceptionFormat() throws IOException { // Expected Fields are: Message, StatusCode, Method, URL, ActivityId(rId) Assertions.assertThat(errorFields) - .describedAs("Number of Fields in exception message are not as expected") + .describedAs( + "Number of Fields in exception message are not as expected") .hasSize(5); // Check status message, status code, HTTP Request Type and URL. - Assertions.assertThat(errorFields[0].trim()) - .describedAs("Error Message Field in exception message is wrong") - .isEqualTo("Operation failed: \"The specified path does not exist.\""); + if (client instanceof AbfsBlobClient) { + Assertions.assertThat(errorFields[0].trim()) + .describedAs("Error Message Field in exception message is wrong") + .contains( + "Operation failed: \"The specified blob does not exist.\""); + } else { + Assertions.assertThat(errorFields[0].trim()) + .describedAs("Error Message Field in exception message is wrong") + .isEqualTo( + "Operation failed: \"The specified path does not exist.\""); + } Assertions.assertThat(errorFields[1].trim()) .describedAs("Status Code Field in exception message " + "should be \"404\"") @@ -88,46 +101,86 @@ public void testAbfsRestOperationExceptionFormat() throws IOException { try { fs.listFiles(nonExistedFilePath2, false); } catch (Exception ex) { - // verify its format String errorMessage = ex.getLocalizedMessage(); String[] errorFields = errorMessage.split(","); - // Expected Fields are: Message, StatusCode, Method, URL, ActivityId(rId), StorageErrorCode, StorageErrorMessage. - Assertions.assertThat(errorFields) - .describedAs("Number of Fields in exception message are not as expected") - .hasSize(7); - // Check status message, status code, HTTP Request Type and URL. - Assertions.assertThat(errorFields[0].trim()) - .describedAs("Error Message Field in exception message is wrong") - .isEqualTo("Operation failed: \"The specified path does not exist.\""); - Assertions.assertThat(errorFields[1].trim()) - .describedAs("Status Code Field in exception message" - + " should be \"404\"") - .isEqualTo("404"); - Assertions.assertThat(errorFields[2].trim()) - .describedAs("Http Rest Method Field in exception message" - + " should be \"GET\"") - .isEqualTo("GET"); - Assertions.assertThat(errorFields[3].trim()) - .describedAs("Url Field in exception message" - + " should start with \"http\"") - .startsWith("http"); - Assertions.assertThat(errorFields[4].trim()) - .describedAs("ActivityId Field in exception message" - + " should start with \"rId:\"") - .startsWith("rId:"); - // Check storage error code and storage error message. - Assertions.assertThat(errorFields[5].trim()) - .describedAs("StorageErrorCode Field in exception message" - + " should be \"PathNotFound\"") - .isEqualTo("PathNotFound"); - Assertions.assertThat(errorFields[6].trim()) - .describedAs("StorageErrorMessage Field in exception message" - + " should contain \"RequestId\"") - .contains("RequestId"); - Assertions.assertThat(errorFields[6].trim()) - .describedAs("StorageErrorMessage Field in exception message" - + " should contain \"Time\"") - .contains("Time"); + if (client instanceof AbfsDfsClient) { + // verify its format + // Expected Fields are: Message, StatusCode, Method, URL, ActivityId(rId), StorageErrorCode, StorageErrorMessage. + Assertions.assertThat(errorFields) + .describedAs( + "Number of Fields in exception message are not as expected") + .hasSize(7); + Assertions.assertThat(errorFields[0].trim()) + .describedAs("Error Message Field in exception message is wrong") + .isEqualTo( + "Operation failed: \"The specified path does not exist.\""); + Assertions.assertThat(errorFields[1].trim()) + .describedAs("Status Code Field in exception message" + + " should be \"404\"") + .isEqualTo("404"); + Assertions.assertThat(errorFields[2].trim()) + .describedAs("Http Rest Method Field in exception message" + + " should be \"GET\"") + .isEqualTo("GET"); + Assertions.assertThat(errorFields[3].trim()) + .describedAs("Url Field in exception message" + + " should start with \"http\"") + .startsWith("http"); + Assertions.assertThat(errorFields[4].trim()) + .describedAs("ActivityId Field in exception message" + + " should start with \"rId:\"") + .startsWith("rId:"); + // Check storage error code and storage error message. + Assertions.assertThat(errorFields[5].trim()) + .describedAs("StorageErrorCode Field in exception message" + + " should be \"PathNotFound\"") + .isEqualTo("PathNotFound"); + Assertions.assertThat(errorFields[6].trim()) + .describedAs("StorageErrorMessage Field in exception message" + + " should contain \"RequestId\"") + .contains("RequestId"); + Assertions.assertThat(errorFields[6].trim()) + .describedAs("StorageErrorMessage Field in exception message" + + " should contain \"Time\"") + .contains("Time"); + } + else { + // Expected Fields are: Message, StatusCode, Method, URL, ActivityId(rId) + Assertions.assertThat(errorFields) + .describedAs( + "Number of Fields in exception message are not as expected") + .hasSize(5); + // Check status message, status code, HTTP Request Type and URL. + if (getAbfsStore(fs).getAbfsConfiguration().enableAbfsListIterator()) { + Assertions.assertThat(errorFields[0].trim()) + .describedAs( + "Error Message Field in exception message is wrong") + .contains( + "Operation failed: \"The specified container does not exist.\""); + } else { + Assertions.assertThat(errorFields[0].trim()) + .describedAs( + "Error Message Field in exception message is wrong") + .contains( + "Operation failed: \"The specified blob does not exist.\""); + } + Assertions.assertThat(errorFields[1].trim()) + .describedAs("Status Code Field in exception message " + + "should be \"404\"") + .isEqualTo("404"); + Assertions.assertThat(errorFields[2].trim()) + .describedAs("Http Rest Method Field in exception message " + + "should be \"HEAD\"") + .isEqualTo("HEAD"); + Assertions.assertThat(errorFields[3].trim()) + .describedAs("Url Field in exception message" + + " should start with \"http\"") + .startsWith("http"); + Assertions.assertThat(errorFields[4].trim()) + .describedAs("ActivityId Field in exception message " + + "should start with \"rId:\"") + .startsWith("rId:"); + } } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAppend.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAppend.java index 7d182f936b7bb..9e415deb9cb3e 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAppend.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAppend.java @@ -21,30 +21,60 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.io.OutputStream; +import java.lang.reflect.Field; +import java.util.ArrayList; +import java.util.Arrays; import java.util.HashSet; +import java.util.List; import java.util.Random; import java.util.Set; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; import org.assertj.core.api.Assertions; +import org.junit.Assert; +import org.junit.Assume; import org.junit.Test; import org.mockito.Mockito; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.constants.AbfsServiceType; import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; +import org.apache.hadoop.fs.azurebfs.services.AbfsBlobClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsDfsClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsOutputStream; +import org.apache.hadoop.fs.azurebfs.services.AzureBlobIngressHandler; +import org.apache.hadoop.fs.azurebfs.services.AzureDFSIngressHandler; +import org.apache.hadoop.fs.azurebfs.services.AzureIngressHandler; import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderValidator; import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.store.BlockUploadStatistics; import org.apache.hadoop.fs.store.DataBlocks; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.DATA_BLOCKS_BUFFER; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENABLE_CONDITIONAL_CREATE_OVERWRITE; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENABLE_DFSTOBLOB_FALLBACK; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_INFINITE_LEASE_KEY; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_INGRESS_SERVICE_TYPE; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_LEASE_THREADS; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_MB; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_TEST_APPENDBLOB_ENABLED; import static org.apache.hadoop.fs.store.DataBlocks.DATA_BLOCKS_BUFFER_ARRAY; import static org.apache.hadoop.fs.store.DataBlocks.DATA_BLOCKS_BUFFER_DISK; import static org.apache.hadoop.fs.store.DataBlocks.DATA_BLOCKS_BYTEBUFFER; import static org.apache.hadoop.fs.store.DataBlocks.DataBlock.DestState.Closed; import static org.apache.hadoop.fs.store.DataBlocks.DataBlock.DestState.Writing; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.mockito.ArgumentMatchers.anyString; /** * Test append operations. @@ -149,4 +179,541 @@ public void testCloseOfDataBlockOnAppendComplete() throws Exception { } } } + + /** + * Creates a file over DFS and attempts to append over Blob. + * It should fallback to DFS when appending to the file fails. + * + * @throws IOException if an I/O error occurs. + */ + @Test + public void testCreateOverDfsAppendOverBlob() throws IOException { + Assertions.assertThat( + getConfiguration().getBoolean(FS_AZURE_TEST_APPENDBLOB_ENABLED, + false)) + .describedAs("Test should run when blob is not append blob") + .isFalse(); + final AzureBlobFileSystem fs = getFileSystem(); + Path TEST_FILE_PATH = new Path("testFile"); + AzureBlobFileSystemStore.Permissions permissions + = new AzureBlobFileSystemStore.Permissions(false, + FsPermission.getDefault(), FsPermission.getUMask(fs.getConf())); + fs.getAbfsStore().getClientHandler().getDfsClient(). + createPath(makeQualified(TEST_FILE_PATH).toUri().getPath(), true, false, + permissions, false, null, + null, getTestTracingContext(fs, true), getIsNamespaceEnabled(fs)); + fs.getAbfsStore() + .getAbfsConfiguration() + .set(FS_AZURE_INGRESS_SERVICE_TYPE, AbfsServiceType.BLOB.name()); + FSDataOutputStream outputStream = fs.append(TEST_FILE_PATH); + AzureIngressHandler ingressHandler + = ((AbfsOutputStream) outputStream.getWrappedStream()).getIngressHandler(); + AbfsClient client = ingressHandler.getClient(); + Assert.assertTrue("Blob client was not used before fallback", + client instanceof AbfsBlobClient); + outputStream.write(10); + outputStream.hsync(); + outputStream.write(20); + outputStream.hsync(); + outputStream.write(30); + outputStream.hsync(); + AzureIngressHandler ingressHandlerFallback + = ((AbfsOutputStream) outputStream.getWrappedStream()).getIngressHandler(); + AbfsClient clientFallback = ingressHandlerFallback.getClient(); + Assert.assertTrue("DFS client was not used after fallback", + clientFallback instanceof AbfsDfsClient); + } + + /** + * Creates a file over Blob and attempts to append over DFS. + * It should fallback to Blob when appending to the file fails. + * + * @throws IOException if an I/O error occurs. + */ + @Test + public void testCreateOverBlobAppendOverDfs() throws IOException { + Assertions.assertThat( + getConfiguration().getBoolean(FS_AZURE_TEST_APPENDBLOB_ENABLED, + false)) + .describedAs("Test should run when blob is not append blob") + .isFalse(); + Configuration conf = getRawConfiguration(); + conf.setBoolean(FS_AZURE_ENABLE_DFSTOBLOB_FALLBACK, true); + conf.set(FS_AZURE_INGRESS_SERVICE_TYPE, + String.valueOf(AbfsServiceType.DFS)); + final AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance(conf); + Path TEST_FILE_PATH = new Path("testFile"); + AzureBlobFileSystemStore.Permissions permissions + = new AzureBlobFileSystemStore.Permissions(false, + FsPermission.getDefault(), FsPermission.getUMask(fs.getConf())); + fs.getAbfsStore().getAbfsConfiguration().setBoolean(FS_AZURE_ENABLE_DFSTOBLOB_FALLBACK, true); + fs.getAbfsStore().getAbfsConfiguration().set(FS_AZURE_INGRESS_SERVICE_TYPE, + String.valueOf(AbfsServiceType.DFS)); + fs.getAbfsStore().getClientHandler().getBlobClient(). + createPath(makeQualified(TEST_FILE_PATH).toUri().getPath(), true, false, + permissions, false, null, + null, getTestTracingContext(fs, true), getIsNamespaceEnabled(fs)); + FSDataOutputStream outputStream = fs.append(TEST_FILE_PATH); + outputStream.write(10); + outputStream.hsync(); + outputStream.write(20); + outputStream.hsync(); + outputStream.write(30); + outputStream.hsync(); + } + + /** + * Creates an Append Blob over Blob and attempts to append over DFS. + * It should fallback to Blob when appending to the file fails. + * + * @throws IOException if an I/O error occurs. + */ + @Test + public void testCreateAppendBlobOverBlobEndpointAppendOverDfs() + throws IOException, NoSuchFieldException, IllegalAccessException { + Configuration conf = getRawConfiguration(); + conf.setBoolean(FS_AZURE_ENABLE_DFSTOBLOB_FALLBACK, true); + conf.set(FS_AZURE_INGRESS_SERVICE_TYPE, + String.valueOf(AbfsServiceType.DFS)); + final AzureBlobFileSystem fs = Mockito.spy((AzureBlobFileSystem) FileSystem.newInstance(conf)); + AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore()); + Mockito.doReturn(true).when(store).isAppendBlobKey(anyString()); + + // Set abfsStore as our mocked value. + Field privateField = AzureBlobFileSystem.class.getDeclaredField("abfsStore"); + privateField.setAccessible(true); + privateField.set(fs, store); + Path TEST_FILE_PATH = new Path("testFile"); + AzureBlobFileSystemStore.Permissions permissions + = new AzureBlobFileSystemStore.Permissions(false, + FsPermission.getDefault(), FsPermission.getUMask(fs.getConf())); + fs.getAbfsStore().getAbfsConfiguration().setBoolean(FS_AZURE_ENABLE_DFSTOBLOB_FALLBACK, true); + fs.getAbfsStore().getAbfsConfiguration().set(FS_AZURE_INGRESS_SERVICE_TYPE, + String.valueOf(AbfsServiceType.DFS)); + fs.getAbfsStore().getClientHandler().getBlobClient(). + createPath(makeQualified(TEST_FILE_PATH).toUri().getPath(), true, false, + permissions, true, null, + null, getTestTracingContext(fs, true), getIsNamespaceEnabled(fs)); + FSDataOutputStream outputStream = fs.append(TEST_FILE_PATH); + outputStream.write(10); + outputStream.hsync(); + outputStream.write(20); + outputStream.hsync(); + outputStream.write(30); + outputStream.hsync(); + } + + /** + * Creates an append Blob over DFS and attempts to append over Blob. + * It should fallback to DFS when appending to the file fails. + * + * @throws IOException if an I/O error occurs. + */ + @Test + public void testCreateAppendBlobOverDfsEndpointAppendOverBlob() + throws IOException, NoSuchFieldException, IllegalAccessException { + Assume.assumeTrue("FNS does not support append blob creation for DFS endpoint", getIsNamespaceEnabled(getFileSystem())); + final AzureBlobFileSystem fs = Mockito.spy(getFileSystem()); + AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore()); + Mockito.doReturn(true).when(store).isAppendBlobKey(anyString()); + + // Set abfsStore as our mocked value. + Field privateField = AzureBlobFileSystem.class.getDeclaredField("abfsStore"); + privateField.setAccessible(true); + privateField.set(fs, store); + Path TEST_FILE_PATH = new Path("testFile"); + AzureBlobFileSystemStore.Permissions permissions + = new AzureBlobFileSystemStore.Permissions(false, + FsPermission.getDefault(), FsPermission.getUMask(fs.getConf())); + fs.getAbfsStore().getClientHandler().getDfsClient(). + createPath(makeQualified(TEST_FILE_PATH).toUri().getPath(), true, false, + permissions, true, null, + null, getTestTracingContext(fs, true), getIsNamespaceEnabled(fs)); + fs.getAbfsStore() + .getAbfsConfiguration() + .set(FS_AZURE_INGRESS_SERVICE_TYPE, AbfsServiceType.BLOB.name()); + FSDataOutputStream outputStream = fs.append(TEST_FILE_PATH); + AzureIngressHandler ingressHandler + = ((AbfsOutputStream) outputStream.getWrappedStream()).getIngressHandler(); + AbfsClient client = ingressHandler.getClient(); + Assert.assertTrue("Blob client was not used before fallback", + client instanceof AbfsBlobClient); + outputStream.write(10); + outputStream.hsync(); + outputStream.write(20); + outputStream.hsync(); + outputStream.write(30); + outputStream.flush(); + AzureIngressHandler ingressHandlerFallback + = ((AbfsOutputStream) outputStream.getWrappedStream()).getIngressHandler(); + AbfsClient clientFallback = ingressHandlerFallback.getClient(); + Assert.assertTrue("DFS client was not used after fallback", + clientFallback instanceof AbfsDfsClient); + } + + + /** + * Tests the correct retrieval of the AzureIngressHandler based on the configured ingress service type. + * + * @throws IOException if an I/O error occurs + */ + @Test + public void testValidateIngressHandler() throws IOException { + Configuration configuration = getRawConfiguration(); + configuration.set(FS_AZURE_INGRESS_SERVICE_TYPE, AbfsServiceType.BLOB.name()); + AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance(configuration); + Path TEST_FILE_PATH = new Path("testFile"); + AzureBlobFileSystemStore.Permissions permissions + = new AzureBlobFileSystemStore.Permissions(false, + FsPermission.getDefault(), FsPermission.getUMask(fs.getConf())); + fs.getAbfsStore().getClientHandler().getBlobClient(). + createPath(makeQualified(TEST_FILE_PATH).toUri().getPath(), true, + false, + permissions, false, null, + null, getTestTracingContext(fs, true), getIsNamespaceEnabled(fs)); + FSDataOutputStream outputStream = fs.append(TEST_FILE_PATH); + AzureIngressHandler ingressHandler + = ((AbfsOutputStream) outputStream.getWrappedStream()).getIngressHandler(); + Assert.assertTrue("Ingress handler instance is not correct", + ingressHandler instanceof AzureBlobIngressHandler); + AbfsClient client = ingressHandler.getClient(); + Assert.assertTrue("Blob client was not used correctly", + client instanceof AbfsBlobClient); + + Path TEST_FILE_PATH_1 = new Path("testFile1"); + fs.getAbfsStore().getClientHandler().getBlobClient(). + createPath(makeQualified(TEST_FILE_PATH_1).toUri().getPath(), true, + false, + permissions, false, null, + null, getTestTracingContext(fs, true), getIsNamespaceEnabled(fs)); + fs.getAbfsStore() + .getAbfsConfiguration() + .set(FS_AZURE_INGRESS_SERVICE_TYPE, AbfsServiceType.DFS.name()); + FSDataOutputStream outputStream1 = fs.append(TEST_FILE_PATH_1); + AzureIngressHandler ingressHandler1 + = ((AbfsOutputStream) outputStream1.getWrappedStream()).getIngressHandler(); + Assert.assertTrue("Ingress handler instance is not correct", + ingressHandler1 instanceof AzureDFSIngressHandler); + AbfsClient client1 = ingressHandler1.getClient(); + Assert.assertTrue("DFS client was not used correctly", + client1 instanceof AbfsDfsClient); + } + + @Test(expected = FileNotFoundException.class) + public void testAppendImplicitDirectory() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + final Path folderPath = new Path(TEST_FOLDER_PATH); + fs.mkdirs(folderPath); + fs.append(folderPath.getParent()); + } + + @Test(expected = FileNotFoundException.class) + public void testAppendFileNotExists() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + final Path folderPath = new Path(TEST_FOLDER_PATH); + fs.append(folderPath); + } + + /** + * Create directory over dfs endpoint and append over blob endpoint. + * Should return error as append is not supported for directory. + * **/ + @Test(expected = IOException.class) + public void testCreateExplicitDirectoryOverDfsAppendOverBlob() throws IOException { + final AzureBlobFileSystem fs = getFileSystem(); + final Path folderPath = path(TEST_FOLDER_PATH); + AzureBlobFileSystemStore.Permissions permissions + = new AzureBlobFileSystemStore.Permissions(false, + FsPermission.getDefault(), FsPermission.getUMask(fs.getConf())); + fs.getAbfsStore().getClientHandler().getDfsClient(). + createPath(makeQualified(folderPath).toUri().getPath(), false, false, + permissions, false, null, + null, getTestTracingContext(fs, true), getIsNamespaceEnabled(fs)); + FSDataOutputStream outputStream = fs.append(folderPath); + outputStream.write(10); + outputStream.hsync(); + } + + /** + * Recreate file between append and flush. Etag mismatch happens. + **/ + @Test(expected = IOException.class) + public void testRecreateAppendAndFlush() throws IOException { + final AzureBlobFileSystem fs = getFileSystem(); + final Path filePath = path(TEST_FILE_PATH); + fs.create(filePath); + AbfsClient abfsClient = fs.getAbfsStore().getClientHandler().getIngressClient(); + Assume.assumeTrue("Skipping for DFS client", abfsClient instanceof AbfsBlobClient); + FSDataOutputStream outputStream = fs.append(filePath); + outputStream.write(10); + final AzureBlobFileSystem fs1 = (AzureBlobFileSystem) FileSystem.newInstance(getRawConfiguration()); + FSDataOutputStream outputStream1 = fs1.create(filePath); + outputStream.hsync(); + } + + /** + * Recreate directory between append and flush. Etag mismatch happens. + **/ + @Test(expected = IOException.class) + public void testRecreateDirectoryAppendAndFlush() throws IOException { + final AzureBlobFileSystem fs = getFileSystem(); + final Path filePath = path(TEST_FILE_PATH); + fs.create(filePath); + FSDataOutputStream outputStream = fs.append(filePath); + outputStream.write(10); + final AzureBlobFileSystem fs1 = (AzureBlobFileSystem) FileSystem.newInstance(getRawConfiguration()); + fs1.mkdirs(filePath); + outputStream.hsync(); + } + + /** + * Verify that parallel write with same offset from different output streams will not throw exception. + **/ + @Test + public void testParallelWriteSameOffsetDifferentOutputStreams() throws Exception { + Configuration configuration = getRawConfiguration(); + configuration.set(FS_AZURE_ENABLE_CONDITIONAL_CREATE_OVERWRITE, "false"); + AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance(configuration); + ExecutorService executorService = Executors.newFixedThreadPool(5); + List> futures = new ArrayList<>(); + + final byte[] b = new byte[8 * ONE_MB]; + new Random().nextBytes(b); + final Path filePath = path(TEST_FILE_PATH); + // Create three output streams + FSDataOutputStream out1 = fs.create(filePath); + FSDataOutputStream out2 = fs.append(filePath); + FSDataOutputStream out3 = fs.append(filePath); + + // Submit tasks to write to each output stream with the same offset + futures.add(executorService.submit(() -> { + try { + out1.write(b, 10, 200); + } catch (IOException e) { + throw new RuntimeException(e); + } + })); + + futures.add(executorService.submit(() -> { + try { + out2.write(b, 10, 200); + } catch (IOException e) { + throw new RuntimeException(e); + } + })); + + futures.add(executorService.submit(() -> { + try { + out3.write(b, 10, 200); + } catch (IOException e) { + throw new RuntimeException(e); + } + })); + + int exceptionCaught = 0; + for (Future future : futures) { + try { + future.get(); // wait for the task to complete and handle any exceptions thrown by the lambda expression + } catch (ExecutionException e) { + Throwable cause = e.getCause(); + if (cause instanceof RuntimeException) { + exceptionCaught++; + } else { + System.err.println("Unexpected exception caught: " + cause); + } + } catch (InterruptedException e) { + // handle interruption + } + } + assertEquals(exceptionCaught, 0); + } + + /** + * Verify that parallel write for different content length will not throw exception. + **/ + @Test + public void testParallelWriteDifferentContentLength() throws Exception { + Configuration configuration = getRawConfiguration(); + configuration.set(FS_AZURE_ENABLE_CONDITIONAL_CREATE_OVERWRITE, "false"); + FileSystem fs = FileSystem.newInstance(configuration); + ExecutorService executorService = Executors.newFixedThreadPool(5); + List> futures = new ArrayList<>(); + + final Path filePath = path(TEST_FILE_PATH); + // Create three output streams with different content length + FSDataOutputStream out1 = fs.create(filePath); + final byte[] b1 = new byte[8 * ONE_MB]; + new Random().nextBytes(b1); + + FSDataOutputStream out2 = fs.append(filePath); + FSDataOutputStream out3 = fs.append(filePath); + + // Submit tasks to write to each output stream + futures.add(executorService.submit(() -> { + try { + out1.write(b1, 10, 200); + } catch (IOException e) { + throw new RuntimeException(e); + } + })); + + futures.add(executorService.submit(() -> { + try { + out2.write(b1, 20, 300); + } catch (IOException e) { + throw new RuntimeException(e); + } + })); + + futures.add(executorService.submit(() -> { + try { + out3.write(b1, 30, 400); + } catch (IOException e) { + throw new RuntimeException(e); + } + })); + + int exceptionCaught = 0; + for (Future future : futures) { + try { + future.get(); // wait for the task to complete and handle any exceptions thrown by the lambda expression + } catch (ExecutionException e) { + Throwable cause = e.getCause(); + if (cause instanceof RuntimeException) { + exceptionCaught++; + } else { + System.err.println("Unexpected exception caught: " + cause); + } + } catch (InterruptedException e) { + // handle interruption + } + } + assertEquals(exceptionCaught, 0); + } + + /** + * Verify that parallel write for different content length will not throw exception. + **/ + @Test + public void testParallelWriteOutputStreamClose() throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + final Path SECONDARY_FILE_PATH = new Path("secondarytestfile"); + ExecutorService executorService = Executors.newFixedThreadPool(2); + List> futures = new ArrayList<>(); + + FSDataOutputStream out1 = fs.create(SECONDARY_FILE_PATH); + AbfsClient abfsClient = fs.getAbfsStore().getClientHandler().getIngressClient(); + Assume.assumeTrue("Skipping for DFS client", abfsClient instanceof AbfsBlobClient); + AbfsOutputStream outputStream1 = (AbfsOutputStream) out1.getWrappedStream(); + String fileETag = outputStream1.getIngressHandler().getETag(); + final byte[] b1 = new byte[8 * ONE_MB]; + new Random().nextBytes(b1); + final byte[] b2 = new byte[8 * ONE_MB]; + new Random().nextBytes(b2); + + FSDataOutputStream out2 = fs.append(SECONDARY_FILE_PATH); + + // Submit tasks to write to each output stream + futures.add(executorService.submit(() -> { + try { + out1.write(b1, 0, 200); + out1.close(); + } catch (IOException e) { + throw new RuntimeException(e); + } + })); + + futures.add(executorService.submit(() -> { + try { + out2.write(b2, 0, 400); + out2.close(); + } catch (IOException e) { + throw new RuntimeException(e); + } + })); + + int exceptionCaught = 0; + + for (Future future : futures) { + try { + future.get(); // wait for the task to complete and handle any exceptions thrown by the lambda expression + } catch (ExecutionException e) { + Throwable cause = e.getCause(); + if (cause instanceof RuntimeException) { + exceptionCaught++; + } else { + System.err.println("Unexpected exception caught: " + cause); + } + } catch (InterruptedException e) { + // handle interruption + } + } + + assertEquals(exceptionCaught, 1); + // Validate that the data written in the buffer is the same as what was read + final byte[] readBuffer = new byte[8 * ONE_MB]; + int result; + FSDataInputStream inputStream = fs.open(SECONDARY_FILE_PATH); + inputStream.seek(0); + + AbfsOutputStream outputStream2 = (AbfsOutputStream) out1.getWrappedStream(); + String out1Etag = outputStream2.getIngressHandler().getETag(); + + AbfsOutputStream outputStream3 = (AbfsOutputStream) out2.getWrappedStream(); + String out2Etag = outputStream3.getIngressHandler().getETag(); + + if (!fileETag.equals(out1Etag)) { + result = inputStream.read(readBuffer, 0, 4 * ONE_MB); + assertEquals(result, 200); // Verify that the number of bytes read matches the number of bytes written + assertArrayEquals( + Arrays.copyOfRange(readBuffer, 0, result), Arrays.copyOfRange(b1, 0, result)); // Verify that the data read matches the original data written + } else if (!fileETag.equals(out2Etag)) { + result = inputStream.read(readBuffer, 0, 4 * ONE_MB); + assertEquals(result, 400); // Verify that the number of bytes read matches the number of bytes written + assertArrayEquals(Arrays.copyOfRange(readBuffer, 0, result), Arrays.copyOfRange(b2, 0, result)); // Verify that the data read matches the original data written + } else { + fail("Neither out1 nor out2 was flushed successfully."); + } + } + + /** + * Verify that once flushed etag changes. + **/ + @Test + public void testEtagMismatch() throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + final Path filePath = path(TEST_FILE_PATH); + FSDataOutputStream out1 = fs.create(filePath); + FSDataOutputStream out2 = fs.create(filePath); + AbfsClient abfsClient = fs.getAbfsStore().getClientHandler().getIngressClient(); + Assume.assumeTrue("Skipping for DFS client", abfsClient instanceof AbfsBlobClient); + out2.write(10); + out2.hsync(); + out1.write(10); + intercept(IOException.class, () -> out1.hsync()); + } + + @Test + public void testAppendWithLease() throws Exception { + final Path testFilePath = new Path(path(methodName.getMethodName()), TEST_FILE_PATH); + final AzureBlobFileSystem fs = Mockito.spy(getCustomFileSystem(testFilePath.getParent(), 1)); + FsPermission permission = new FsPermission(FsAction.ALL, FsAction.ALL, + FsAction.ALL); + FsPermission umask = new FsPermission(FsAction.NONE, FsAction.NONE, + FsAction.NONE); + AbfsOutputStream outputStream = (AbfsOutputStream) fs.getAbfsStore().createFile(testFilePath, null, true, + permission, umask, getTestTracingContext(fs, true)); + outputStream.write(10); + outputStream.close(); + assertNotNull(outputStream.getLeaseId()); + } + + private AzureBlobFileSystem getCustomFileSystem(Path infiniteLeaseDirs, int numLeaseThreads) throws Exception { + Configuration conf = getRawConfiguration(); + conf.setBoolean(String.format("fs.%s.impl.disable.cache", getAbfsScheme()), true); + conf.set(FS_AZURE_INFINITE_LEASE_KEY, infiniteLeaseDirs.toUri().getPath()); + conf.setInt(FS_AZURE_LEASE_THREADS, numLeaseThreads); + FileSystem fileSystem = FileSystem.newInstance(conf); + return (AzureBlobFileSystem) fileSystem; + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemChecksum.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemChecksum.java index 9ca0986931831..1b56e062c49b7 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemChecksum.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemChecksum.java @@ -18,10 +18,13 @@ package org.apache.hadoop.fs.azurebfs; +import java.nio.charset.StandardCharsets; import java.security.SecureRandom; import java.util.Arrays; import java.util.HashSet; +import org.apache.commons.codec.binary.Base64; +import org.apache.hadoop.fs.azurebfs.constants.AbfsServiceType; import org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode; import org.assertj.core.api.Assertions; import org.junit.Test; @@ -35,9 +38,11 @@ import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsOutputStream; import org.apache.hadoop.fs.impl.OpenFileParameters; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_BUFFERED_PREAD_DISABLE; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.BLOCK_ID_LENGTH; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_MB; import static org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters.Mode.APPEND_MODE; import static org.apache.hadoop.test.LambdaTestUtils.intercept; @@ -69,16 +74,16 @@ public void testWriteReadWithChecksum() throws Exception { @Test public void testAppendWithChecksumAtDifferentOffsets() throws Exception { AzureBlobFileSystem fs = getConfiguredFileSystem(MB_4, MB_4, true); - AbfsClient client = fs.getAbfsStore().getClient(); + AbfsClient client = fs.getAbfsStore().getClientHandler().getIngressClient(); Path path = path("testPath" + getMethodName()); - fs.create(path); + AbfsOutputStream os = (AbfsOutputStream) fs.create(path).getWrappedStream(); byte[] data = generateRandomBytes(MB_4); int pos = 0; - pos += appendWithOffsetHelper(client, path, data, fs, pos, 0); - pos += appendWithOffsetHelper(client, path, data, fs, pos, ONE_MB); - pos += appendWithOffsetHelper(client, path, data, fs, pos, MB_2); - appendWithOffsetHelper(client, path, data, fs, pos, MB_4 - 1); + pos += appendWithOffsetHelper(os, client, path, data, fs, pos, 0); + pos += appendWithOffsetHelper(os, client, path, data, fs, pos, ONE_MB); + pos += appendWithOffsetHelper(os, client, path, data, fs, pos, MB_2); + appendWithOffsetHelper(os, client, path, data, fs, pos, MB_4 - 1); fs.close(); } @@ -107,16 +112,16 @@ public void testWriteReadWithChecksumAndOptions() throws Exception { @Test public void testAbfsInvalidChecksumExceptionInAppend() throws Exception { AzureBlobFileSystem fs = getConfiguredFileSystem(MB_4, MB_4, true); - AbfsClient spiedClient = Mockito.spy(fs.getAbfsStore().getClient()); + AbfsClient spiedClient = Mockito.spy(fs.getAbfsStore().getClientHandler().getIngressClient()); Path path = path("testPath" + getMethodName()); - fs.create(path); + AbfsOutputStream os = (AbfsOutputStream) fs.create(path).getWrappedStream(); byte[] data= generateRandomBytes(MB_4); String invalidMD5Hash = spiedClient.computeMD5Hash( INVALID_MD5_TEXT.getBytes(), 0, INVALID_MD5_TEXT.length()); Mockito.doReturn(invalidMD5Hash).when(spiedClient).computeMD5Hash(any(), any(Integer.class), any(Integer.class)); AbfsRestOperationException ex = intercept(AbfsInvalidChecksumException.class, () -> { - appendWithOffsetHelper(spiedClient, path, data, fs, 0, 0); + appendWithOffsetHelper(os, spiedClient, path, data, fs, 0, 0); }); Assertions.assertThat(ex.getErrorCode()) @@ -163,6 +168,20 @@ private void testWriteReadWithChecksumInternal(final boolean readAheadEnabled) } } + /** + * Helper method that generates blockId. + * @param position The offset needed to generate blockId. + * @return String representing the block ID generated. + */ + private String generateBlockId(AbfsOutputStream os, long position) { + String streamId = os.getStreamID(); + String streamIdHash = Integer.toString(streamId.hashCode()); + String blockId = String.format("%d_%s", position, streamIdHash); + byte[] blockIdByteArray = new byte[BLOCK_ID_LENGTH]; + System.arraycopy(blockId.getBytes(), 0, blockIdByteArray, 0, Math.min(BLOCK_ID_LENGTH, blockId.length())); + return new String(Base64.encodeBase64(blockIdByteArray), StandardCharsets.UTF_8); + } + /** * Verify that the checksum computed on client side matches with the one * computed at server side. If not, request will fail with 400 Bad request. @@ -173,10 +192,12 @@ private void testWriteReadWithChecksumInternal(final boolean readAheadEnabled) * @param offset * @throws Exception */ - private int appendWithOffsetHelper(AbfsClient client, Path path, + private int appendWithOffsetHelper(AbfsOutputStream os, AbfsClient client, Path path, byte[] data, AzureBlobFileSystem fs, final int pos, final int offset) throws Exception { + String blockId = generateBlockId(os, pos); + String eTag = os.getIngressHandler().getETag(); AppendRequestParameters reqParams = new AppendRequestParameters( - pos, offset, data.length - offset, APPEND_MODE, isAppendBlobEnabled(), null, true); + pos, offset, data.length - offset, APPEND_MODE, isAppendBlobEnabled(), null, true, blockId, eTag); client.append(path.toUri().getPath(), data, reqParams, null, null, getTestTracingContext(fs, false)); return reqParams.getLength(); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemChooseSAS.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemChooseSAS.java index d8db901151fe7..80ffdf37a065b 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemChooseSAS.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemChooseSAS.java @@ -29,6 +29,7 @@ import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.SASTokenProviderException; import org.apache.hadoop.fs.azurebfs.extensions.MockDelegationSASTokenProvider; +import org.apache.hadoop.fs.azurebfs.services.AbfsDfsClient; import org.apache.hadoop.fs.azurebfs.services.AuthType; import org.apache.hadoop.fs.azurebfs.services.FixedSASTokenProvider; import org.apache.hadoop.fs.azurebfs.utils.AccountSASGenerator; @@ -91,8 +92,8 @@ public void testBothProviderFixedTokenConfigured() throws Exception { AbfsConfiguration testAbfsConfig = new AbfsConfiguration( getRawConfiguration(), this.getAccountName()); removeAnyPresetConfiguration(testAbfsConfig); - - // Configuring a SASTokenProvider class which provides a user delegation SAS. + Assume.assumeTrue(getFileSystem().getAbfsStore().getClient() instanceof AbfsDfsClient); + //Configuring a SASTokenProvider class which provides a user delegation SAS. testAbfsConfig.set(FS_AZURE_SAS_TOKEN_PROVIDER_TYPE, MockDelegationSASTokenProvider.class.getName()); @@ -103,7 +104,7 @@ public void testBothProviderFixedTokenConfigured() throws Exception { try (AzureBlobFileSystem newTestFs = (AzureBlobFileSystem) FileSystem.newInstance(testAbfsConfig.getRawConfiguration())) { - // Asserting that MockDelegationSASTokenProvider is used. + //Asserting that MockDelegationSASTokenProvider is used. Assertions.assertThat(testAbfsConfig.getSASTokenProvider()) .describedAs("Custom SASTokenProvider Class must be used") .isInstanceOf(MockDelegationSASTokenProvider.class); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java index 5a6d3785fb660..ea75e57557405 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java @@ -22,20 +22,31 @@ import java.io.FilterOutputStream; import java.io.IOException; import java.lang.reflect.Field; +import java.util.ArrayList; import java.util.EnumSet; +import java.util.List; import java.util.UUID; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import org.assertj.core.api.Assertions; +import org.junit.Assert; import org.junit.Test; +import org.mockito.Mockito; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FileAlreadyExistsException; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter; +import org.apache.hadoop.fs.azurebfs.services.AbfsBlobClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsClientHandler; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.test.GenericTestUtils; @@ -56,6 +67,7 @@ import static java.net.HttpURLConnection.HTTP_OK; import static java.net.HttpURLConnection.HTTP_PRECON_FAILED; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENABLE_MKDIR_OVERWRITE; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.ArgumentMatchers.nullable; @@ -207,7 +219,7 @@ public void testTryWithResources() throws Throwable { out.write('2'); out.hsync(); fail("Expected a failure"); - } catch (FileNotFoundException fnfe) { + } catch (IOException fnfe) { //appendblob outputStream does not generate suppressed exception on close as it is //single threaded code if (!fs.getAbfsStore().isAppendBlobKey(fs.makeQualified(testPath).toString())) { @@ -232,10 +244,9 @@ public void testTryWithResources() throws Throwable { @Test public void testFilterFSWriteAfterClose() throws Throwable { final AzureBlobFileSystem fs = getFileSystem(); - Path testFolderPath = path(TEST_FOLDER_PATH); - Path testPath = new Path(testFolderPath, TEST_CHILD_FILE); + Path testPath = new Path(TEST_FOLDER_PATH, TEST_CHILD_FILE); FSDataOutputStream out = fs.create(testPath); - intercept(FileNotFoundException.class, + intercept(IOException.class, () -> { try (FilterOutputStream fos = new FilterOutputStream(out)) { fos.write('a'); @@ -243,7 +254,7 @@ public void testFilterFSWriteAfterClose() throws Throwable { out.hsync(); fs.delete(testPath, false); // trigger the first failure - throw intercept(FileNotFoundException.class, + throw intercept(IOException.class, () -> { fos.write('b'); out.hsync(); @@ -276,6 +287,8 @@ public void testCreateFileOverwrite(boolean enableConditionalCreateOverwrite) Configuration config = new Configuration(this.getRawConfiguration()); config.set("fs.azure.enable.conditional.create.overwrite", Boolean.toString(enableConditionalCreateOverwrite)); + AzureBlobFileSystemStore store = currentFs.getAbfsStore(); + AbfsClient client = store.getClientHandler().getIngressClient(); final AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance(currentFs.getUri(), @@ -293,13 +306,18 @@ public void testCreateFileOverwrite(boolean enableConditionalCreateOverwrite) fs.create(nonOverwriteFile, false); // One request to server to create path should be issued - createRequestCount++; + // two calls added for - + // 1. getFileStatus on DFS endpoint : 1 + // getFileStatus on Blob endpoint: 2 (Additional List blob call) + // 2. actual create call: 1 + createRequestCount += (client instanceof AbfsBlobClient && !getIsNamespaceEnabled(fs) ? 2: 1); assertAbfsStatistics( CONNECTIONS_MADE, totalConnectionMadeBeforeTest + createRequestCount, fs.getInstrumentationMap()); + // Case 2: Not Overwrite - File pre-exists fs.registerListener(new TracingHeaderValidator( fs.getAbfsStore().getAbfsConfiguration().getClientCorrelationId(), @@ -309,7 +327,11 @@ public void testCreateFileOverwrite(boolean enableConditionalCreateOverwrite) fs.registerListener(null); // One request to server to create path should be issued - createRequestCount++; + // Only single tryGetFileStatus should happen + // 1. getFileStatus on DFS endpoint : 1 + // getFileStatus on Blob endpoint: 1 (No Additional List blob call as file exists) + + createRequestCount += (client instanceof AbfsBlobClient && !getIsNamespaceEnabled(fs) ? 2: 1); assertAbfsStatistics( CONNECTIONS_MADE, @@ -323,8 +345,12 @@ public void testCreateFileOverwrite(boolean enableConditionalCreateOverwrite) // create should be successful fs.create(overwriteFilePath, true); - // One request to server to create path should be issued - createRequestCount++; + /// One request to server to create path should be issued + // two calls added for - + // 1. getFileStatus on DFS endpoint : 1 + // getFileStatus on Blob endpoint: 2 (Additional List blob call for non-existing path) + // 2. actual create call: 1 + createRequestCount += (client instanceof AbfsBlobClient && !getIsNamespaceEnabled(fs) ? 2: 1); assertAbfsStatistics( CONNECTIONS_MADE, @@ -338,12 +364,15 @@ public void testCreateFileOverwrite(boolean enableConditionalCreateOverwrite) fs.create(overwriteFilePath, true); fs.registerListener(null); + createRequestCount += (client instanceof AbfsBlobClient && !getIsNamespaceEnabled(fs) ? 1: 0); + + // Second actual create call will hap if (enableConditionalCreateOverwrite) { // Three requests will be sent to server to create path, // 1. create without overwrite // 2. GetFileStatus to get eTag // 3. create with overwrite - createRequestCount += 3; + createRequestCount += (client instanceof AbfsBlobClient && !getIsNamespaceEnabled(fs) ? 4: 3); } else { createRequestCount++; } @@ -390,8 +419,12 @@ public void testNegativeScenariosForCreateOverwriteDisabled() = ITestAbfsClient.getMockAbfsClient( fs.getAbfsStore().getClient(), fs.getAbfsStore().getAbfsConfiguration()); + AbfsClientHandler clientHandler = Mockito.mock(AbfsClientHandler.class); + when(clientHandler.getIngressClient()).thenReturn(mockClient); + when(clientHandler.getClient(Mockito.any())).thenReturn(mockClient); AzureBlobFileSystemStore abfsStore = fs.getAbfsStore(); + abfsStore = setAzureBlobSystemStoreField(abfsStore, "clientHandler", clientHandler); abfsStore = setAzureBlobSystemStoreField(abfsStore, "client", mockClient); boolean isNamespaceEnabled = abfsStore .getIsNamespaceEnabled(getTestTracingContext(fs, false)); @@ -424,7 +457,7 @@ public void testNegativeScenariosForCreateOverwriteDisabled() .when(mockClient) .createPath(any(String.class), eq(true), eq(false), any(AzureBlobFileSystemStore.Permissions.class), any(boolean.class), eq(null), any(), - any(TracingContext.class)); + any(TracingContext.class), any(boolean.class)); doThrow(fileNotFoundResponseEx) // Scn1: GFS fails with Http404 .doThrow(serverErrorResponseEx) // Scn2: GFS fails with Http500 @@ -442,7 +475,7 @@ public void testNegativeScenariosForCreateOverwriteDisabled() .when(mockClient) .createPath(any(String.class), eq(true), eq(true), any(AzureBlobFileSystemStore.Permissions.class), any(boolean.class), eq(null), any(), - any(TracingContext.class)); + any(TracingContext.class), any(boolean.class)); // Scn1: GFS fails with Http404 // Sequence of events expected: @@ -505,7 +538,7 @@ private void validateCreateFileException(final Class ex FsAction.ALL); FsPermission umask = new FsPermission(FsAction.NONE, FsAction.NONE, FsAction.NONE); - Path testPath = new Path("testFile"); + Path testPath = new Path("/testFile"); intercept( exceptionClass, () -> abfsStore.createFile(testPath, null, true, permission, umask, @@ -515,4 +548,403 @@ private void validateCreateFileException(final Class ex private AbfsRestOperationException getMockAbfsRestOperationException(int status) { return new AbfsRestOperationException(status, "", "", new Exception()); } + + /** + * Creating subdirectory on existing file path should fail. + * @throws Exception + */ + @Test + public void testMkdirsFailsForSubdirectoryOfExistingFile() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + fs.create(new Path("a/b/c")); + fs.mkdirs(new Path("a/b/d")); + intercept(IOException.class, () -> fs.mkdirs(new Path("a/b/c/d/e"))); + + assertTrue(fs.exists(new Path("a/b/c"))); + assertTrue(fs.exists(new Path("a/b/d"))); + // Asserting directory created still exists as explicit. + FileStatus status = fs.getAbfsStore().getFileStatus(fs.makeQualified(new Path("a/b/d")), + new TracingContext(getTestTracingContext(fs, true))); + Assert.assertTrue("Path is not an explicit directory", status.isDirectory()); + } + + /** + * Try creating file same as an existing directory. + * @throws Exception + */ + @Test + public void testCreateDirectoryAndFile() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + fs.mkdirs(new Path("a/b/c")); + assertTrue(fs.exists(new Path("a/b/c"))); + intercept(IOException.class, () -> fs.create(new Path("a/b/c"))); + // Asserting that directory still exists as explicit + FileStatus status = fs.getAbfsStore().getFileStatus(fs.makeQualified(new Path("a/b/c")), + new TracingContext(getTestTracingContext(fs, true))); + Assert.assertTrue("Path is not an explicit directory", status.isDirectory()); + } + + /** + * Creating same file without specifying overwrite. + * @throws Exception + */ + @Test + public void testCreateSameFile() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + fs.create(new Path("a/b/c")); + fs.create(new Path("a/b/c")); + assertTrue("File does not exist", fs.exists(new Path("a/b/c"))); + } + + /** + * Creating same file with overwrite flag set to false. + * @throws Exception + */ + @Test + public void testCreateSameFileWithOverwriteFalse() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + fs.create(new Path("a/b/c")); + assertTrue(fs.exists(new Path("a/b/c"))); + intercept(IOException.class, () -> fs.create(new Path("a/b/c"), false)); + } + + /** + * Creation of already existing subpath should fail. + * @throws Exception + */ + @Test + public void testCreateSubPath() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + fs.create(new Path("a/b/c")); + assertTrue(fs.exists(new Path("a/b/c"))); + intercept(IOException.class, () -> fs.create(new Path("a/b"))); + } + + /** + * Creating path with parent explicit. + */ + @Test + public void testCreatePathParentExplicit() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + fs.mkdirs(new Path("a/b/c")); + assertTrue(fs.exists(new Path("a/b/c"))); + fs.create(new Path("a/b/c/d")); + assertTrue(fs.exists(new Path("a/b/c/d"))); + + // asserting that parent stays explicit + FileStatus status = fs.getAbfsStore().getFileStatus(fs.makeQualified(new Path("a/b/c")), + new TracingContext(getTestTracingContext(fs, true))); + Assert.assertTrue("Path is not an explicit directory", status.isDirectory()); + } + + /** + * Tests create file when the parent is an existing file + * should fail. + * @throws Exception FileAlreadyExists for blob and IOException for dfs. + */ + @Test + public void testCreateFileParentFile() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + final AzureBlobFileSystemStore store = fs.getAbfsStore(); + + String parentName = "/testParentFile"; + Path parent = new Path(parentName); + fs.create(parent); + + String childName = "/testParentFile/testChildFile"; + Path child = new Path(childName); + IOException e = intercept(IOException.class, () -> + fs.create(child, false)); + + // asserting that parent stays explicit + FileStatus status = fs.getAbfsStore().getFileStatus(fs.makeQualified(new Path(parentName)), + new TracingContext(getTestTracingContext(fs, true))); + Assert.assertFalse("Path is not a file", status.isDirectory()); + } + + /** + * Creating directory on existing file path should fail. + * @throws Exception + */ + @Test + public void testCreateMkdirs() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + fs.create(new Path("a/b/c")); + intercept(IOException.class, () -> fs.mkdirs(new Path("a/b/c/d"))); + } + + /** + * Test mkdirs. + * @throws Exception + */ + @Test + public void testMkdirs() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + fs.mkdirs(new Path("a/b")); + fs.mkdirs(new Path("a/b/c/d")); + fs.mkdirs(new Path("a/b/c/e")); + + assertTrue(fs.exists(new Path("a/b"))); + assertTrue(fs.exists(new Path("a/b/c/d"))); + assertTrue(fs.exists(new Path("a/b/c/e"))); + + //Asserting that directories created as explicit + FileStatus status = fs.getAbfsStore().getFileStatus(fs.makeQualified(new Path("a/b")), + new TracingContext(getTestTracingContext(fs, true))); + Assert.assertTrue("Path is not an explicit directory", status.isDirectory()); + FileStatus status1 = fs.getAbfsStore().getFileStatus(fs.makeQualified(new Path("a/b/c/d")), + new TracingContext(getTestTracingContext(fs, true))); + Assert.assertTrue("Path is not an explicit directory", status1.isDirectory()); + FileStatus status2 = fs.getAbfsStore().getFileStatus(fs.makeQualified(new Path("a/b/c/e")), + new TracingContext(getTestTracingContext(fs, true))); + Assert.assertTrue("Path is not an explicit directory", status2.isDirectory()); + } + + /** + * Creating subpath of directory path should fail. + * @throws Exception + */ + @Test + public void testMkdirsCreateSubPath() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + fs.mkdirs(new Path("a/b/c")); + assertTrue(fs.exists(new Path("a/b/c"))); + intercept(IOException.class, () -> fs.create(new Path("a/b"))); + + //Asserting that directories created as explicit + FileStatus status2 = fs.getAbfsStore().getFileStatus(fs.makeQualified(new Path("a/b/c")), + new TracingContext(getTestTracingContext(fs, true))); + Assert.assertTrue("Path is not an explicit directory", status2.isDirectory()); + } + + /** + * Test creation of directory by level. + * @throws Exception + */ + @Test + public void testMkdirsByLevel() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + fs.mkdirs(new Path("a")); + fs.mkdirs(new Path("a/b/c")); + fs.mkdirs(new Path("a/b/c/d/e")); + + assertTrue(fs.exists(new Path("a"))); + assertTrue(fs.exists(new Path("a/b/c"))); + assertTrue(fs.exists(new Path("a/b/c/d/e"))); + + //Asserting that directories created as explicit + FileStatus status = fs.getAbfsStore().getFileStatus(fs.makeQualified(new Path("a/")), + new TracingContext(getTestTracingContext(fs, true))); + Assert.assertTrue("Path is not an explicit directory", status.isDirectory()); + FileStatus status1 = fs.getAbfsStore().getFileStatus(fs.makeQualified(new Path("a/b/c")), + new TracingContext(getTestTracingContext(fs, true))); + Assert.assertTrue("Path is not an explicit directory", status1.isDirectory()); + FileStatus status2 = fs.getAbfsStore().getFileStatus(fs.makeQualified(new Path("a/b/c/d/e")), + new TracingContext(getTestTracingContext(fs, true))); + Assert.assertTrue("Path is not an explicit directory", status2.isDirectory()); + } + + /* + Delete part of a path and validate sub path exists. + */ + @Test + public void testMkdirsWithDelete() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + fs.mkdirs(new Path("a/b")); + fs.mkdirs(new Path("a/b/c/d")); + fs.delete(new Path("a/b/c/d")); + fs.getFileStatus(new Path("a/b/c")); + assertTrue(fs.exists(new Path("a/b/c"))); + } + + /** + * Verify mkdir and rename of parent. + */ + @Test + public void testMkdirsWithRename() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + fs.mkdirs(new Path("a/b/c/d")); + fs.create(new Path("e/file")); + fs.delete(new Path("a/b/c/d")); + assertTrue(fs.rename(new Path("e"), new Path("a/b/c/d"))); + assertTrue(fs.exists(new Path("a/b/c/d/file"))); + } + + /** + * Create a file with name /dir1 and then mkdirs for /dir1/dir2 should fail. + * @throws Exception + */ + @Test + public void testFileCreateMkdirsRoot() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + fs.setWorkingDirectory(new Path("/")); + final Path p1 = new Path("dir1"); + fs.create(p1); + intercept(IOException.class, () -> fs.mkdirs(new Path("dir1/dir2"))); + } + + /** + * Create a file with name /dir1 and then mkdirs for /dir1/dir2 should fail. + * @throws Exception + */ + @Test + public void testFileCreateMkdirsNonRoot() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + final Path p1 = new Path("dir1"); + fs.create(p1); + intercept(IOException.class, () -> fs.mkdirs(new Path("dir1/dir2"))); + } + + /** + * Creation of same directory without overwrite flag should pass. + * @throws Exception + */ + @Test + public void testCreateSameDirectory() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + fs.mkdirs(new Path("a/b/c")); + fs.mkdirs(new Path("a/b/c")); + + assertTrue(fs.exists(new Path("a/b/c"))); + //Asserting that directories created as explicit + FileStatus status = fs.getAbfsStore().getFileStatus(fs.makeQualified(new Path("a/b/c")), + new TracingContext(getTestTracingContext(fs, true))); + Assert.assertTrue("Path is not an explicit directory", status.isDirectory()); + } + + /** + * Creation of same directory without overwrite flag should pass. + * @throws Exception + */ + @Test + public void testCreateSamePathDirectory() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + fs.create(new Path("a")); + intercept(IOException.class, () -> fs.mkdirs(new Path("a"))); + } + + /** + * Creation of directory with root as parent + */ + @Test + public void testMkdirOnRootAsParent() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + final Path path = new Path("a"); + fs.setWorkingDirectory(new Path("/")); + fs.mkdirs(path); + + // Asserting that the directory created by mkdir exists as explicit. + FileStatus status = fs.getAbfsStore().getFileStatus(fs.makeQualified(new Path("a")), + new TracingContext(getTestTracingContext(fs, true))); + Assert.assertTrue("Path is not an explicit directory", status.isDirectory()); + } + + /** + * Creation of directory on root + */ + @Test + public void testMkdirOnRoot() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + final Path path = new Path("/"); + fs.setWorkingDirectory(new Path("/")); + fs.mkdirs(path); + + FileStatus status = fs.getAbfsStore().getFileStatus(fs.makeQualified(new Path("/")), + new TracingContext(getTestTracingContext(fs, true))); + Assert.assertTrue("Path is not an explicit directory", status.isDirectory()); + } + + /** + * Creation of directory on path with unicode chars + */ + @Test + public void testMkdirUnicode() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + final Path path = new Path("/dir\u0031"); + fs.mkdirs(path); + + // Asserting that the directory created by mkdir exists as explicit. + FileStatus status = fs.getAbfsStore().getFileStatus(fs.makeQualified(path), + new TracingContext(getTestTracingContext(fs, true))); + Assert.assertTrue("Path is not an explicit directory", status.isDirectory()); + } + + /** + * Creation of directory on same path with parallel threads. + */ + @Test + public void testMkdirParallelRequests() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + final Path path = new Path("/dir1"); + + ExecutorService es = Executors.newFixedThreadPool(3); + + List> tasks = new ArrayList<>(); + + for (int i = 0; i < 3; i++) { + CompletableFuture future = CompletableFuture.runAsync(() -> { + try { + fs.mkdirs(path); + } catch (IOException e) { + throw new CompletionException(e); + } + }, es); + tasks.add(future); + } + + // Wait for all the tasks to complete + CompletableFuture.allOf(tasks.toArray(new CompletableFuture[0])).join(); + + // Assert that the directory created by mkdir exists as explicit + FileStatus status = fs.getAbfsStore().getFileStatus(fs.makeQualified(path), + new TracingContext(getTestTracingContext(fs, true))); + Assert.assertTrue("Path is not an explicit directory", status.isDirectory()); + + } + + + /** + * Creation of directory with overwrite set to false should not fail according to DFS code. + * @throws Exception + */ + @Test + public void testCreateSameDirectoryOverwriteFalse() throws Exception { + Configuration configuration = getRawConfiguration(); + configuration.setBoolean(FS_AZURE_ENABLE_MKDIR_OVERWRITE, false); + AzureBlobFileSystem fs1 = (AzureBlobFileSystem) FileSystem.newInstance(configuration); + fs1.mkdirs(new Path("a/b/c")); + fs1.mkdirs(new Path("a/b/c")); + + //Asserting that directories created as explicit + FileStatus status = fs1.getAbfsStore().getFileStatus(fs1.makeQualified(new Path("a/b/c")), + new TracingContext(getTestTracingContext(fs1, true))); + Assert.assertTrue("Path is not an explicit directory", status.isDirectory()); + } + + /** + * Try creating directory same as an existing file. + */ + @Test + public void testCreateDirectoryAndFileRecreation() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + fs.mkdirs(new Path("a/b/c")); + fs.create(new Path("a/b/c/d")); + assertTrue(fs.exists(new Path("a/b/c"))); + assertTrue(fs.exists(new Path("a/b/c/d"))); + intercept(IOException.class, () -> fs.mkdirs(new Path("a/b/c/d"))); + } + + @Test + public void testCreateNonRecursiveForAtomicDirectoryFile() throws Exception { + AzureBlobFileSystem fileSystem = getFileSystem(); + fileSystem.setWorkingDirectory(new Path("/")); + fileSystem.mkdirs(new Path("/hbase/dir")); + fileSystem.createFile(new Path("/hbase/dir/file")) + .overwrite(false) + .replication((short) 1) + .bufferSize(1024) + .blockSize(1024) + .build(); + Assert.assertTrue(fileSystem.exists(new Path("/hbase/dir/file"))); + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemE2E.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemE2E.java index f1673a3b38b45..859f3219c8d0e 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemE2E.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemE2E.java @@ -23,7 +23,10 @@ import java.util.Arrays; import java.util.Random; +import org.apache.hadoop.fs.azurebfs.constants.AbfsServiceType; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidAbfsRestOperationException; +import org.apache.hadoop.fs.azurebfs.services.AbfsClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsDfsClient; import org.apache.hadoop.fs.contract.ContractTestUtils; import org.assertj.core.api.Assertions; import org.junit.Test; @@ -198,6 +201,7 @@ public void testReadWithFileNotFoundException() throws Exception { public void testWriteWithFileNotFoundException() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); final Path testFilePath = path(methodName.getMethodName()); + AbfsClient client = fs.getAbfsStore().getClientHandler().getIngressClient(); try (FSDataOutputStream stream = fs.create(testFilePath)) { assertPathExists(fs, "Path should exist", testFilePath); @@ -206,8 +210,11 @@ public void testWriteWithFileNotFoundException() throws Exception { fs.delete(testFilePath, true); assertPathDoesNotExist(fs, "This path should not exist", testFilePath); - // trigger append call - intercept(FileNotFoundException.class, () -> stream.close()); + if (client instanceof AbfsDfsClient) { + intercept(FileNotFoundException.class, stream::close); + } else { + intercept(IOException.class, stream::close); + } } } @@ -215,6 +222,7 @@ public void testWriteWithFileNotFoundException() throws Exception { public void testFlushWithFileNotFoundException() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); final Path testFilePath = path(methodName.getMethodName()); + AbfsClient client = fs.getAbfsStore().getClientHandler().getIngressClient(); if (fs.getAbfsStore().isAppendBlobKey(fs.makeQualified(testFilePath).toString())) { return; } @@ -225,7 +233,11 @@ public void testFlushWithFileNotFoundException() throws Exception { fs.delete(testFilePath, true); assertPathDoesNotExist(fs, "This path should not exist", testFilePath); - intercept(FileNotFoundException.class, () -> stream.close()); + if (client instanceof AbfsDfsClient) { + intercept(FileNotFoundException.class, () -> stream.close()); + } else { + stream.close(); + } } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemLease.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemLease.java index 4b961f56066df..271c7f67308c3 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemLease.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemLease.java @@ -27,8 +27,10 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.constants.AbfsServiceType; import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; +import org.apache.hadoop.fs.azurebfs.services.AbfsBlobClient; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; import org.apache.hadoop.fs.azurebfs.services.AbfsLease; import org.apache.hadoop.fs.azurebfs.services.AbfsOutputStream; @@ -38,6 +40,9 @@ import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.LambdaTestUtils; +import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.CONDITION_NOT_MET; +import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.ERR_LEASE_EXPIRED_BLOB; +import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.ERR_NO_LEASE_ID_SPECIFIED_BLOB; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyInt; import static org.mockito.ArgumentMatchers.anyString; @@ -136,12 +141,15 @@ public void testSubDir() throws Exception { public void testTwoCreate() throws Exception { final Path testFilePath = new Path(path(methodName.getMethodName()), TEST_FILE); final AzureBlobFileSystem fs = getCustomFileSystem(testFilePath.getParent(), 1); + AbfsClient client = fs.getAbfsStore().getClientHandler().getIngressClient(); assumeValidTestConfigPresent(getRawConfiguration(), FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT); fs.mkdirs(testFilePath.getParent()); try (FSDataOutputStream out = fs.create(testFilePath)) { LambdaTestUtils.intercept(IOException.class, isHNSEnabled ? ERR_PARALLEL_ACCESS_DETECTED - : ERR_NO_LEASE_ID_SPECIFIED, () -> { + : client instanceof AbfsBlobClient + ? ERR_NO_LEASE_ID_SPECIFIED_BLOB + : ERR_NO_LEASE_ID_SPECIFIED, () -> { try (FSDataOutputStream out2 = fs.create(testFilePath)) { } return "Expected second create on infinite lease dir to fail"; @@ -151,6 +159,7 @@ public void testTwoCreate() throws Exception { } private void twoWriters(AzureBlobFileSystem fs, Path testFilePath, boolean expectException) throws Exception { + AbfsClient client = fs.getAbfsStore().getClientHandler().getIngressClient(); try (FSDataOutputStream out = fs.create(testFilePath)) { try (FSDataOutputStream out2 = fs.append(testFilePath)) { out2.writeInt(2); @@ -163,7 +172,23 @@ private void twoWriters(AzureBlobFileSystem fs, Path testFilePath, boolean expec } } out.writeInt(1); - out.hsync(); + try { + out.hsync(); + } catch (IOException e) { + // Etag mismatch leads to condition not met error for blob endpoint. + if (client instanceof AbfsBlobClient) { + GenericTestUtils.assertExceptionContains(CONDITION_NOT_MET, e); + } else { + throw e; + } + } + } catch (IOException e) { + // Etag mismatch leads to condition not met error for blob endpoint. + if (client instanceof AbfsBlobClient) { + GenericTestUtils.assertExceptionContains(CONDITION_NOT_MET, e); + } else { + throw e; + } } Assert.assertTrue("Store leases were not freed", fs.getAbfsStore().areLeasesFreed()); @@ -210,6 +235,7 @@ public void testLeaseFreedOnClose() throws Exception { public void testWriteAfterBreakLease() throws Exception { final Path testFilePath = new Path(path(methodName.getMethodName()), TEST_FILE); final AzureBlobFileSystem fs = getCustomFileSystem(testFilePath.getParent(), 1); + AbfsClient client = fs.getAbfsStore().getClientHandler().getIngressClient(); fs.mkdirs(testFilePath.getParent()); FSDataOutputStream out; @@ -222,14 +248,15 @@ public void testWriteAfterBreakLease() throws Exception { FSOperationType.BREAK_LEASE, false, 0)); fs.breakLease(testFilePath); fs.registerListener(null); - - LambdaTestUtils.intercept(IOException.class, ERR_LEASE_EXPIRED, () -> { + LambdaTestUtils.intercept(IOException.class, client instanceof AbfsBlobClient + ? ERR_LEASE_EXPIRED_BLOB : ERR_LEASE_EXPIRED, () -> { out.write(1); out.hsync(); return "Expected exception on write after lease break but got " + out; }); - LambdaTestUtils.intercept(IOException.class, ERR_LEASE_EXPIRED, () -> { + LambdaTestUtils.intercept(IOException.class, client instanceof AbfsBlobClient + ? ERR_LEASE_EXPIRED_BLOB : ERR_LEASE_EXPIRED, () -> { out.close(); return "Expected exception on close after lease break but got " + out; }); @@ -249,14 +276,15 @@ public void testWriteAfterBreakLease() throws Exception { public void testLeaseFreedAfterBreak() throws Exception { final Path testFilePath = new Path(path(methodName.getMethodName()), TEST_FILE); final AzureBlobFileSystem fs = getCustomFileSystem(testFilePath.getParent(), 1); + AbfsClient client = fs.getAbfsStore().getClientHandler().getIngressClient(); fs.mkdirs(testFilePath.getParent()); FSDataOutputStream out = fs.create(testFilePath); out.write(0); fs.breakLease(testFilePath); - - LambdaTestUtils.intercept(IOException.class, ERR_LEASE_EXPIRED, () -> { + LambdaTestUtils.intercept(IOException.class, client instanceof AbfsBlobClient + ? ERR_LEASE_EXPIRED_BLOB : ERR_LEASE_EXPIRED, () -> { out.close(); return "Expected exception on close after lease break but got " + out; }); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java index bc6f35c66bc53..42df1f1fefcf0 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java @@ -27,6 +27,9 @@ import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.services.AbfsBlobClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsClientHandler; import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CONNECTIONS_MADE; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENABLE_MKDIR_OVERWRITE; @@ -123,6 +126,7 @@ public void testCreateDirOverwrite(boolean enableConditionalCreateOverwrite) fs.mkdirs(dirPath); // One request to server + AbfsClient client = fs.getAbfsStore().getClientHandler().getIngressClient(); mkdirRequestCount++; assertAbfsStatistics( diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestSharedKeyAuth.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestSharedKeyAuth.java index fedddcc4b16fb..718de9e2d824c 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestSharedKeyAuth.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestSharedKeyAuth.java @@ -17,10 +17,13 @@ */ package org.apache.hadoop.fs.azurebfs; +import java.io.IOException; + import org.junit.Assume; import org.junit.Test; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; import org.apache.hadoop.fs.azurebfs.services.AuthType; @@ -40,7 +43,7 @@ public void testWithWrongSharedKey() throws Exception { Assume.assumeTrue(this.getAuthType() == AuthType.SharedKey); Configuration config = this.getRawConfiguration(); config.setBoolean(AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION, - false); + true); String accountName = this.getAccountName(); String configkKey = FS_AZURE_ACCOUNT_KEY_PROPERTY_NAME + "." + accountName; // a wrong sharedKey @@ -48,15 +51,13 @@ public void testWithWrongSharedKey() throws Exception { + "+MJHS7UJNDER+jn6KP6Jnm2ONQlm=="; config.set(configkKey, secret); - AbfsClient abfsClient = this.getFileSystem(config).getAbfsClient(); - intercept(AbfsRestOperationException.class, - "\"Server failed to authenticate the request. Make sure the value of " - + "Authorization header is formed correctly including the " - + "signature.\", 403", - () -> { - abfsClient - .getAclStatus("/", getTestTracingContext(getFileSystem(), false)); - }); + intercept(IOException.class, + "\"Server failed to authenticate the request. Make sure the value of " + + "Authorization header is formed correctly including the " + + "signature.\", 403", + () -> { + FileSystem.newInstance(config); + }); } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestSmallWriteOptimization.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestSmallWriteOptimization.java index fce2b682f580a..1cdb7c3330193 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestSmallWriteOptimization.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestSmallWriteOptimization.java @@ -35,6 +35,10 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys; +import org.apache.hadoop.fs.azurebfs.services.AbfsBlobClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsClientHandler; +import org.apache.hadoop.fs.azurebfs.services.AbfsDfsClient; import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.BYTES_SENT; import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CONNECTIONS_MADE; @@ -384,6 +388,8 @@ private void formulateSmallWriteTestAppendPattern(final AzureBlobFileSystem fs, long expectedBytesSent = fs.getInstrumentationMap() .get(BYTES_SENT.getStatName()); + AbfsClient client = fs.getAbfsStore().getClientHandler().getIngressClient(); + while (testIteration > 0) { // trigger recurringWriteSize appends over numOfWrites writeBufferCursor += executeWritePattern(opStream, writeBuffer, @@ -426,7 +432,9 @@ private void formulateSmallWriteTestAppendPattern(final AzureBlobFileSystem fs, ? 1 // 1 append (with flush and close param) : (wasDataPendingToBeWrittenToServer) ? 2 // 1 append + 1 flush (with close) - : 1); // 1 flush (with close) + : (recurringWriteSize == 0 && client instanceof AbfsBlobClient) + ? 0 // no flush or close on prefix mode blob + : 1); //1 flush (with close) expectedTotalRequestsMade += totalAppendFlushCalls; expectedRequestsMadeWithData += totalAppendFlushCalls; @@ -445,10 +453,19 @@ private void formulateSmallWriteTestAppendPattern(final AzureBlobFileSystem fs, testIteration--; } + /** + * Above test iteration loop executes one of the below two patterns + * 1. Append + Close (triggers flush) + * 2. Append + Flush + * For both patters PutBlockList is complete in the iteration loop itself + * Hence with PrefixMode Blob, below close won't trigger any network call + */ opStream.close(); - expectedTotalRequestsMade += 1; - expectedRequestsMadeWithData += 1; + if (client instanceof AbfsDfsClient) { + expectedTotalRequestsMade += 1; + expectedRequestsMadeWithData += 1; + } // no change in expectedBytesSent assertOpStats(fs.getInstrumentationMap(), expectedTotalRequestsMade, expectedRequestsMadeWithData, expectedBytesSent); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestTracingContext.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestTracingContext.java index 3ffa2bd49e427..be0cae9e37c0c 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestTracingContext.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestTracingContext.java @@ -111,7 +111,7 @@ public void checkCorrelationConfigValidation(String clientCorrelationId, //request should not fail for invalid clientCorrelationID AbfsRestOperation op = fs.getAbfsClient() .createPath(path, false, true, permissions, false, null, null, - tracingContext); + tracingContext, isNamespaceEnabled); int statusCode = op.getResult().getStatusCode(); Assertions.assertThat(statusCode).describedAs("Request should not fail") diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsOutputStream.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsOutputStream.java index 359846ce14dae..195a6e070cc73 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsOutputStream.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsOutputStream.java @@ -20,6 +20,8 @@ import java.io.FileNotFoundException; import java.io.IOException; +import java.io.OutputStream; +import java.net.ProtocolException; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; @@ -27,6 +29,7 @@ import org.assertj.core.api.Assertions; import org.junit.Test; import org.mockito.Mockito; +import org.mockito.stubbing.Answer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; @@ -36,9 +39,12 @@ import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; import org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.test.LambdaTestUtils; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EXPECT_100_JDK_ERROR; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_IS_EXPECT_HEADER_ENABLED; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.EXPECT; /** * Test create operation. @@ -163,15 +169,29 @@ public void testExpect100ContinueFailureInAppend() throws Exception { Path path = new Path("/testFile"); AbfsOutputStream os = Mockito.spy( (AbfsOutputStream) fs.create(path).getWrappedStream()); - AbfsClient spiedClient = Mockito.spy(os.getClient()); + AzureIngressHandler ingressHandler = Mockito.spy( + os.getIngressHandler()); + Mockito.doReturn(ingressHandler).when(os).getIngressHandler(); + + AbfsClient spiedClient = Mockito.spy(ingressHandler.getClient()); + Mockito.doReturn(spiedClient).when(ingressHandler).getClient(); + AbfsHttpOperation[] httpOpForAppendTest = new AbfsHttpOperation[2]; mockSetupForAppend(httpOpForAppendTest, spiedClient); Mockito.doReturn(spiedClient).when(os).getClient(); fs.delete(path, true); os.write(1); - LambdaTestUtils.intercept(FileNotFoundException.class, () -> { - os.close(); - }); + if(spiedClient instanceof AbfsDfsClient) { + LambdaTestUtils.intercept(FileNotFoundException.class, () -> { + os.close(); + }); + } else { + IOException ex = LambdaTestUtils.intercept(IOException.class, () -> { + os.close(); + }); + Assertions.assertThat(ex.getCause().getCause()).isInstanceOf( + AbfsRestOperationException.class); + } Assertions.assertThat(httpOpForAppendTest[0].getConnectionDisconnectedOnError()) .describedAs("First try from AbfsClient will have expect-100 " + "header and should fail with expect-100 error.").isTrue(); @@ -195,9 +215,23 @@ private void mockSetupForAppend(final AbfsHttpOperation[] httpOpForAppendTest, Mockito.doAnswer(abfsRestOpAppendGetInvocation -> { AbfsRestOperation op = Mockito.spy( (AbfsRestOperation) abfsRestOpAppendGetInvocation.callRealMethod()); + boolean[] isExpectCall = new boolean[1]; + for (AbfsHttpHeader header : op.getRequestHeaders()) { + if (header.getName().equals(EXPECT)) { + isExpectCall[0] = true; + } + } Mockito.doAnswer(createHttpOpInvocation -> { httpOpForAppendTest[index[0]] = Mockito.spy( (AbfsHttpOperation) createHttpOpInvocation.callRealMethod()); + if (isExpectCall[0]) { + Mockito.doAnswer(getConnOs -> { + OutputStream os = (OutputStream) getConnOs.callRealMethod(); + os.write(1); + os.close(); + throw new ProtocolException(EXPECT_100_JDK_ERROR); + }).when(httpOpForAppendTest[index[0]]).getConnOutputStream(); + } return httpOpForAppendTest[index[0]++]; }).when(op).createHttpOperation(); return op; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsOutputStream.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsOutputStream.java index f0987b5fd75ab..b4a7607a0fe34 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsOutputStream.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsOutputStream.java @@ -73,7 +73,7 @@ private AbfsOutputStreamContext populateAbfsOutputStreamContext( boolean disableOutputStreamFlush, boolean isAppendBlob, boolean isExpectHeaderEnabled, - AbfsClient client, + AbfsClientHandler clientHandler, String path, TracingContext tracingContext, ExecutorService executorService) throws IOException, @@ -97,7 +97,7 @@ private AbfsOutputStreamContext populateAbfsOutputStreamContext( .withAppendBlob(isAppendBlob) .withWriteMaxConcurrentRequestCount(abfsConf.getWriteMaxConcurrentRequestCount()) .withMaxWriteRequestsToQueue(abfsConf.getMaxWriteRequestsToQueue()) - .withClient(client) + .withClientHandler(clientHandler) .withPath(path) .withTracingContext(tracingContext) .withExecutorService(executorService) @@ -111,7 +111,8 @@ private AbfsOutputStreamContext populateAbfsOutputStreamContext( @Test public void verifyShortWriteRequest() throws Exception { - AbfsClient client = mock(AbfsClient.class); + AbfsClientHandler clientHandler = mock(AbfsClientHandler.class); + AbfsDfsClient client = mock(AbfsDfsClient.class); AbfsRestOperation op = mock(AbfsRestOperation.class); AbfsConfiguration abfsConf; final Configuration conf = new Configuration(); @@ -125,7 +126,8 @@ public void verifyShortWriteRequest() throws Exception { .thenReturn(op); when(client.flush(anyString(), anyLong(), anyBoolean(), anyBoolean(), any(), isNull(), any(), any(TracingContext.class))).thenReturn(op); - + when(clientHandler.getClient(any())).thenReturn(client); + when(clientHandler.getDfsClient()).thenReturn(client); AbfsOutputStream out = new AbfsOutputStream( populateAbfsOutputStreamContext( BUFFER_SIZE, @@ -133,7 +135,7 @@ public void verifyShortWriteRequest() throws Exception { false, false, true, - client, + clientHandler, PATH, new TracingContext(abfsConf.getClientCorrelationId(), "test-fs-id", FSOperationType.WRITE, abfsConf.getTracingHeaderFormat(), @@ -172,8 +174,8 @@ public void verifyShortWriteRequest() throws Exception { */ @Test public void verifyWriteRequest() throws Exception { - - AbfsClient client = mock(AbfsClient.class); + AbfsClientHandler clientHandler = mock(AbfsClientHandler.class); + AbfsDfsClient client = mock(AbfsDfsClient.class); AbfsRestOperation op = mock(AbfsRestOperation.class); AbfsConfiguration abfsConf; final Configuration conf = new Configuration(); @@ -187,7 +189,8 @@ public void verifyWriteRequest() throws Exception { when(client.getAbfsPerfTracker()).thenReturn(tracker); when(client.append(anyString(), any(byte[].class), any(AppendRequestParameters.class), any(), any(), any(TracingContext.class))).thenReturn(op); when(client.flush(anyString(), anyLong(), anyBoolean(), anyBoolean(), any(), isNull(), any(), any(TracingContext.class))).thenReturn(op); - + when(clientHandler.getClient(any())).thenReturn(client); + when(clientHandler.getDfsClient()).thenReturn(client); AbfsOutputStream out = new AbfsOutputStream( populateAbfsOutputStreamContext( BUFFER_SIZE, @@ -195,7 +198,7 @@ public void verifyWriteRequest() throws Exception { false, false, true, - client, + clientHandler, PATH, tracingContext, createExecutorService(abfsConf))); @@ -242,8 +245,8 @@ public void verifyWriteRequest() throws Exception { */ @Test public void verifyWriteRequestOfBufferSizeAndClose() throws Exception { - - AbfsClient client = mock(AbfsClient.class); + AbfsClientHandler clientHandler = mock(AbfsClientHandler.class); + AbfsDfsClient client = mock(AbfsDfsClient.class); AbfsRestOperation op = mock(AbfsRestOperation.class); AbfsHttpOperation httpOp = mock(AbfsHttpOperation.class); AbfsConfiguration abfsConf; @@ -260,7 +263,8 @@ public void verifyWriteRequestOfBufferSizeAndClose() throws Exception { when(client.flush(anyString(), anyLong(), anyBoolean(), anyBoolean(), any(), isNull(), any(), any(TracingContext.class))).thenReturn(op); when(op.getSasToken()).thenReturn("testToken"); when(op.getResult()).thenReturn(httpOp); - + when(clientHandler.getClient(any())).thenReturn(client); + when(clientHandler.getDfsClient()).thenReturn(client); AbfsOutputStream out = new AbfsOutputStream( populateAbfsOutputStreamContext( BUFFER_SIZE, @@ -268,7 +272,7 @@ public void verifyWriteRequestOfBufferSizeAndClose() throws Exception { false, false, true, - client, + clientHandler, PATH, tracingContext, createExecutorService(abfsConf))); @@ -315,8 +319,8 @@ public void verifyWriteRequestOfBufferSizeAndClose() throws Exception { */ @Test public void verifyWriteRequestOfBufferSize() throws Exception { - - AbfsClient client = mock(AbfsClient.class); + AbfsClientHandler clientHandler = mock(AbfsClientHandler.class); + AbfsDfsClient client = mock(AbfsDfsClient.class); AbfsRestOperation op = mock(AbfsRestOperation.class); AbfsHttpOperation httpOp = mock(AbfsHttpOperation.class); AbfsConfiguration abfsConf; @@ -333,7 +337,8 @@ public void verifyWriteRequestOfBufferSize() throws Exception { any(), isNull(), any(), any(TracingContext.class))).thenReturn(op); when(op.getSasToken()).thenReturn("testToken"); when(op.getResult()).thenReturn(httpOp); - + when(clientHandler.getClient(any())).thenReturn(client); + when(clientHandler.getDfsClient()).thenReturn(client); AbfsOutputStream out = new AbfsOutputStream( populateAbfsOutputStreamContext( BUFFER_SIZE, @@ -341,7 +346,7 @@ public void verifyWriteRequestOfBufferSize() throws Exception { false, false, true, - client, + clientHandler, PATH, new TracingContext(abfsConf.getClientCorrelationId(), "test-fs-id", FSOperationType.WRITE, abfsConf.getTracingHeaderFormat(), @@ -374,8 +379,8 @@ public void verifyWriteRequestOfBufferSize() throws Exception { */ @Test public void verifyWriteRequestOfBufferSizeWithAppendBlob() throws Exception { - - AbfsClient client = mock(AbfsClient.class); + AbfsClientHandler clientHandler = mock(AbfsClientHandler.class); + AbfsDfsClient client = mock(AbfsDfsClient.class); AbfsRestOperation op = mock(AbfsRestOperation.class); AbfsConfiguration abfsConf; final Configuration conf = new Configuration(); @@ -389,7 +394,8 @@ public void verifyWriteRequestOfBufferSizeWithAppendBlob() throws Exception { .thenReturn(op); when(client.flush(anyString(), anyLong(), anyBoolean(), anyBoolean(), any(), isNull(), any(), any(TracingContext.class))).thenReturn(op); - + when(clientHandler.getClient(any())).thenReturn(client); + when(clientHandler.getDfsClient()).thenReturn(client); AbfsOutputStream out = new AbfsOutputStream( populateAbfsOutputStreamContext( BUFFER_SIZE, @@ -397,7 +403,7 @@ public void verifyWriteRequestOfBufferSizeWithAppendBlob() throws Exception { false, true, true, - client, + clientHandler, PATH, new TracingContext(abfsConf.getClientCorrelationId(), "test-fs-id", FSOperationType.OPEN, abfsConf.getTracingHeaderFormat(), @@ -430,8 +436,8 @@ public void verifyWriteRequestOfBufferSizeWithAppendBlob() throws Exception { */ @Test public void verifyWriteRequestOfBufferSizeAndHFlush() throws Exception { - - AbfsClient client = mock(AbfsClient.class); + AbfsClientHandler clientHandler = mock(AbfsClientHandler.class); + AbfsDfsClient client = mock(AbfsDfsClient.class); AbfsRestOperation op = mock(AbfsRestOperation.class); when(op.getSasToken()).thenReturn(""); AbfsConfiguration abfsConf; @@ -449,7 +455,8 @@ public void verifyWriteRequestOfBufferSizeAndHFlush() throws Exception { .thenReturn(op); when(client.flush(anyString(), anyLong(), anyBoolean(), anyBoolean(), any(), isNull(), any(), any(TracingContext.class))).thenReturn(op); - + when(clientHandler.getClient(any())).thenReturn(client); + when(clientHandler.getDfsClient()).thenReturn(client); AbfsOutputStream out = new AbfsOutputStream( populateAbfsOutputStreamContext( BUFFER_SIZE, @@ -457,7 +464,7 @@ public void verifyWriteRequestOfBufferSizeAndHFlush() throws Exception { false, false, true, - client, + clientHandler, PATH, new TracingContext(abfsConf.getClientCorrelationId(), "test-fs-id", FSOperationType.OPEN, abfsConf.getTracingHeaderFormat(), @@ -505,8 +512,8 @@ public void verifyWriteRequestOfBufferSizeAndHFlush() throws Exception { */ @Test public void verifyWriteRequestOfBufferSizeAndFlush() throws Exception { - - AbfsClient client = mock(AbfsClient.class); + AbfsClientHandler clientHandler = mock(AbfsClientHandler.class); + AbfsDfsClient client = mock(AbfsDfsClient.class); AbfsRestOperation op = mock(AbfsRestOperation.class); AbfsConfiguration abfsConf; final Configuration conf = new Configuration(); @@ -519,7 +526,8 @@ public void verifyWriteRequestOfBufferSizeAndFlush() throws Exception { .thenReturn(op); when(client.flush(anyString(), anyLong(), anyBoolean(), anyBoolean(), any(), isNull(), any(), any(TracingContext.class))).thenReturn(op); - + when(clientHandler.getClient(any())).thenReturn(client); + when(clientHandler.getDfsClient()).thenReturn(client); AbfsOutputStream out = new AbfsOutputStream( populateAbfsOutputStreamContext( BUFFER_SIZE, @@ -527,7 +535,7 @@ public void verifyWriteRequestOfBufferSizeAndFlush() throws Exception { false, false, true, - client, + clientHandler, PATH, new TracingContext(abfsConf.getClientCorrelationId(), "test-fs-id", FSOperationType.WRITE, abfsConf.getTracingHeaderFormat(), diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperation.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperation.java index 683528b9c54d1..8ab8d07d7287d 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperation.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperation.java @@ -22,9 +22,14 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; import org.apache.hadoop.fs.azurebfs.utils.MetricFormat; + +import org.junit.Assume; import org.junit.Test; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_DELETE; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_ACCOUNT_KEY; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_ACCOUNT_NAME; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_FORMAT; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_URI; import static org.apache.hadoop.fs.azurebfs.services.AbfsRestOperationType.DeletePath; import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; @@ -39,6 +44,19 @@ public class TestAbfsRestOperation extends public TestAbfsRestOperation() throws Exception { } + private void checkPrerequisites() throws Exception { + checkIfConfigIsSet(FS_AZURE_METRIC_ACCOUNT_NAME); + checkIfConfigIsSet(FS_AZURE_METRIC_ACCOUNT_KEY); + checkIfConfigIsSet(FS_AZURE_METRIC_URI); + } + + private void checkIfConfigIsSet(String configKey){ + AbfsConfiguration conf = getConfiguration(); + String value = conf.get(configKey); + Assume.assumeTrue(configKey + " config is mandatory for the test to run", + value != null && value.trim().length() > 1); + } + /** * Test for backoff retry metrics. * @@ -49,6 +67,7 @@ public TestAbfsRestOperation() throws Exception { */ @Test public void testBackoffRetryMetrics() throws Exception { + checkPrerequisites(); // Create an AzureBlobFileSystem instance. final Configuration configuration = getRawConfiguration(); configuration.set(FS_AZURE_METRIC_FORMAT, String.valueOf(MetricFormat.INTERNAL_BACKOFF_METRIC_FORMAT)); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/DelegationSASGenerator.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/DelegationSASGenerator.java index 6f2209a6e8ced..64aed941c1f2c 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/DelegationSASGenerator.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/DelegationSASGenerator.java @@ -60,6 +60,7 @@ public String getDelegationSAS(String accountName, String containerName, String case SASTokenProvider.CREATE_DIRECTORY_OPERATION: case SASTokenProvider.WRITE_OPERATION: case SASTokenProvider.SET_PROPERTIES_OPERATION: + case SASTokenProvider.APPEND_BLOCK_OPERATION: sp = "w"; break; case SASTokenProvider.DELETE_OPERATION: @@ -78,6 +79,10 @@ public String getDelegationSAS(String accountName, String containerName, String case SASTokenProvider.LIST_OPERATION: sp = "l"; break; + case SASTokenProvider.LIST_BLOB_OPERATION: + sp = "l"; + sr = "c"; + break; case SASTokenProvider.GET_PROPERTIES_OPERATION: case SASTokenProvider.READ_OPERATION: sp = "r"; @@ -97,6 +102,10 @@ public String getDelegationSAS(String accountName, String containerName, String throw new IllegalArgumentException(operation); } + if (operation.equals(SASTokenProvider.APPEND_BLOCK_OPERATION)) { + sp += "a"; + } + String signature = computeSignatureForSAS(sp, st, se, sv, sr, accountName, containerName, path, saoid, suoid, scid); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderValidator.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderValidator.java index 7569c80d67c61..4e09e4b899728 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderValidator.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderValidator.java @@ -38,6 +38,8 @@ public class TracingHeaderValidator implements Listener { private TracingHeaderFormat format; private static final String GUID_PATTERN = "^[0-9a-fA-F]{8}-([0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12}$"; + private String ingressHandler = null; + private String position = null; @Override public void callTracingHeaderValidator(String tracingContextHeader, @@ -52,6 +54,8 @@ public TracingHeaderValidator getClone() { clientCorrelationId, fileSystemId, operation, needsPrimaryRequestId, retryNum, streamID); tracingHeaderValidator.primaryRequestId = primaryRequestId; + tracingHeaderValidator.ingressHandler = ingressHandler; + tracingHeaderValidator.position = position; return tracingHeaderValidator; } @@ -92,8 +96,13 @@ private void validateTracingHeader(String tracingContextHeader) { private void validateBasicFormat(String[] idList) { if (format == TracingHeaderFormat.ALL_ID_FORMAT) { + int expectedSize = 7; + if (ingressHandler != null) { + expectedSize += 2; + } Assertions.assertThat(idList) - .describedAs("header should have 7 elements").hasSize(7); + .describedAs("header should have " + expectedSize + " elements") + .hasSize(expectedSize); } else if (format == TracingHeaderFormat.TWO_ID_FORMAT) { Assertions.assertThat(idList) .describedAs("header should have 2 elements").hasSize(2); @@ -152,4 +161,14 @@ public void setOperation(FSOperationType operation) { public void updatePrimaryRequestID(String primaryRequestId) { this.primaryRequestId = primaryRequestId; } + + @Override + public void updateIngressHandler(String ingressHandler) { + this.ingressHandler = ingressHandler; + } + + @Override + public void updatePosition(String position) { + this.position = position; + } }