Skip to content

Commit

Permalink
HADOOP-18850. S3A: Enable dual-layer server-side encryption with AWS …
Browse files Browse the repository at this point in the history
…KMS keys (#6140)


Contributed by Viraj Jasani
  • Loading branch information
virajjasani authored Nov 1, 2023
1 parent 4c04a67 commit cf3a4b3
Show file tree
Hide file tree
Showing 14 changed files with 473 additions and 73 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1724,14 +1724,14 @@
<name>fs.s3a.encryption.algorithm</name>
<description>Specify a server-side encryption or client-side
encryption algorithm for s3a: file system. Unset by default. It supports the
following values: 'AES256' (for SSE-S3), 'SSE-KMS', 'SSE-C', and 'CSE-KMS'
following values: 'AES256' (for SSE-S3), 'SSE-KMS', 'DSSE-KMS', 'SSE-C', and 'CSE-KMS'
</description>
</property>

<property>
<name>fs.s3a.encryption.key</name>
<description>Specific encryption key to use if fs.s3a.encryption.algorithm
has been set to 'SSE-KMS', 'SSE-C' or 'CSE-KMS'. In the case of SSE-C
has been set to 'SSE-KMS', 'DSSE-KMS', 'SSE-C' or 'CSE-KMS'. In the case of SSE-C
, the value of this property should be the Base64 encoded key. If you are
using SSE-KMS and leave this property empty, you'll be using your default's
S3 KMS key, otherwise you should set this property to the specific KMS key
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,14 @@ public enum S3AEncryptionMethods {
SSE_KMS("SSE-KMS", true, false),
SSE_C("SSE-C", true, true),
CSE_KMS("CSE-KMS", false, true),
CSE_CUSTOM("CSE-CUSTOM", false, true);
CSE_CUSTOM("CSE-CUSTOM", false, true),
DSSE_KMS("DSSE-KMS", true, false);

/**
* Error string when {@link #getMethod(String)} fails.
* Used in tests.
*/
static final String UNKNOWN_ALGORITHM
public static final String UNKNOWN_ALGORITHM
= "Unknown encryption algorithm ";

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1440,6 +1440,11 @@ public static EncryptionSecrets buildEncryptionSecrets(String bucket,
diagnostics);
break;

case DSSE_KMS:
LOG.debug("Using DSSE-KMS with {}",
diagnostics);
break;

case NONE:
default:
LOG.debug("Data is unencrypted");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ public static Optional<String> getSSECustomerKey(final EncryptionSecrets secrets
* @return an optional key to attach to a request.
*/
public static Optional<String> getSSEAwsKMSKey(final EncryptionSecrets secrets) {
if (secrets.getEncryptionMethod() == S3AEncryptionMethods.SSE_KMS
if ((secrets.getEncryptionMethod() == S3AEncryptionMethods.SSE_KMS
|| secrets.getEncryptionMethod() == S3AEncryptionMethods.DSSE_KMS)
&& secrets.hasEncryptionKey()) {
return Optional.of(secrets.getEncryptionKey());
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
import org.apache.hadoop.fs.s3a.auth.delegation.EncryptionSecrets;

import static org.apache.commons.lang3.StringUtils.isNotEmpty;
import static org.apache.hadoop.fs.s3a.S3AEncryptionMethods.UNKNOWN_ALGORITHM;
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.DEFAULT_UPLOAD_PART_COUNT_LIMIT;
import static org.apache.hadoop.util.Preconditions.checkArgument;
import static org.apache.hadoop.util.Preconditions.checkNotNull;
Expand Down Expand Up @@ -273,24 +274,38 @@ protected void copyEncryptionParameters(HeadObjectResponse srcom,
return;
}

if (S3AEncryptionMethods.SSE_S3 == algorithm) {
switch (algorithm) {
case SSE_S3:
copyObjectRequestBuilder.serverSideEncryption(algorithm.getMethod());
} else if (S3AEncryptionMethods.SSE_KMS == algorithm) {
break;
case SSE_KMS:
copyObjectRequestBuilder.serverSideEncryption(ServerSideEncryption.AWS_KMS);
// Set the KMS key if present, else S3 uses AWS managed key.
EncryptionSecretOperations.getSSEAwsKMSKey(encryptionSecrets)
.ifPresent(kmsKey -> copyObjectRequestBuilder.ssekmsKeyId(kmsKey));
} else if (S3AEncryptionMethods.SSE_C == algorithm) {
.ifPresent(copyObjectRequestBuilder::ssekmsKeyId);
break;
case DSSE_KMS:
copyObjectRequestBuilder.serverSideEncryption(ServerSideEncryption.AWS_KMS_DSSE);
EncryptionSecretOperations.getSSEAwsKMSKey(encryptionSecrets)
.ifPresent(copyObjectRequestBuilder::ssekmsKeyId);
break;
case SSE_C:
EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets)
.ifPresent(base64customerKey -> {
copyObjectRequestBuilder.copySourceSSECustomerAlgorithm(
ServerSideEncryption.AES256.name()).copySourceSSECustomerKey(base64customerKey)
.copySourceSSECustomerKeyMD5(
Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey)))
.sseCustomerAlgorithm(ServerSideEncryption.AES256.name())
.sseCustomerKey(base64customerKey).sseCustomerKeyMD5(
Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey)));
});
.ifPresent(base64customerKey -> copyObjectRequestBuilder
.copySourceSSECustomerAlgorithm(ServerSideEncryption.AES256.name())
.copySourceSSECustomerKey(base64customerKey)
.copySourceSSECustomerKeyMD5(
Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey)))
.sseCustomerAlgorithm(ServerSideEncryption.AES256.name())
.sseCustomerKey(base64customerKey).sseCustomerKeyMD5(
Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey))));
break;
case CSE_KMS:
case CSE_CUSTOM:
case NONE:
break;
default:
LOG.warn(UNKNOWN_ALGORITHM + ": " + algorithm);
}
}
/**
Expand Down Expand Up @@ -348,20 +363,35 @@ private void putEncryptionParameters(PutObjectRequest.Builder putObjectRequestBu
final S3AEncryptionMethods algorithm
= getServerSideEncryptionAlgorithm();

if (S3AEncryptionMethods.SSE_S3 == algorithm) {
switch (algorithm) {
case SSE_S3:
putObjectRequestBuilder.serverSideEncryption(algorithm.getMethod());
} else if (S3AEncryptionMethods.SSE_KMS == algorithm) {
break;
case SSE_KMS:
putObjectRequestBuilder.serverSideEncryption(ServerSideEncryption.AWS_KMS);
// Set the KMS key if present, else S3 uses AWS managed key.
EncryptionSecretOperations.getSSEAwsKMSKey(encryptionSecrets)
.ifPresent(kmsKey -> putObjectRequestBuilder.ssekmsKeyId(kmsKey));
} else if (S3AEncryptionMethods.SSE_C == algorithm) {
.ifPresent(putObjectRequestBuilder::ssekmsKeyId);
break;
case DSSE_KMS:
putObjectRequestBuilder.serverSideEncryption(ServerSideEncryption.AWS_KMS_DSSE);
EncryptionSecretOperations.getSSEAwsKMSKey(encryptionSecrets)
.ifPresent(putObjectRequestBuilder::ssekmsKeyId);
break;
case SSE_C:
EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets)
.ifPresent(base64customerKey -> {
putObjectRequestBuilder.sseCustomerAlgorithm(ServerSideEncryption.AES256.name())
.sseCustomerKey(base64customerKey).sseCustomerKeyMD5(
Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey)));
});
.ifPresent(base64customerKey -> putObjectRequestBuilder
.sseCustomerAlgorithm(ServerSideEncryption.AES256.name())
.sseCustomerKey(base64customerKey)
.sseCustomerKeyMD5(Md5Utils.md5AsBase64(
Base64.getDecoder().decode(base64customerKey))));
break;
case CSE_KMS:
case CSE_CUSTOM:
case NONE:
break;
default:
LOG.warn(UNKNOWN_ALGORITHM + ": " + algorithm);
}
}

Expand Down Expand Up @@ -409,20 +439,35 @@ private void multipartUploadEncryptionParameters(
CreateMultipartUploadRequest.Builder mpuRequestBuilder) {
final S3AEncryptionMethods algorithm = getServerSideEncryptionAlgorithm();

if (S3AEncryptionMethods.SSE_S3 == algorithm) {
switch (algorithm) {
case SSE_S3:
mpuRequestBuilder.serverSideEncryption(algorithm.getMethod());
} else if (S3AEncryptionMethods.SSE_KMS == algorithm) {
break;
case SSE_KMS:
mpuRequestBuilder.serverSideEncryption(ServerSideEncryption.AWS_KMS);
// Set the KMS key if present, else S3 uses AWS managed key.
EncryptionSecretOperations.getSSEAwsKMSKey(encryptionSecrets)
.ifPresent(kmsKey -> mpuRequestBuilder.ssekmsKeyId(kmsKey));
} else if (S3AEncryptionMethods.SSE_C == algorithm) {
.ifPresent(mpuRequestBuilder::ssekmsKeyId);
break;
case DSSE_KMS:
mpuRequestBuilder.serverSideEncryption(ServerSideEncryption.AWS_KMS_DSSE);
EncryptionSecretOperations.getSSEAwsKMSKey(encryptionSecrets)
.ifPresent(mpuRequestBuilder::ssekmsKeyId);
break;
case SSE_C:
EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets)
.ifPresent(base64customerKey -> {
mpuRequestBuilder.sseCustomerAlgorithm(ServerSideEncryption.AES256.name())
.sseCustomerKey(base64customerKey).sseCustomerKeyMD5(
Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey)));
});
.ifPresent(base64customerKey -> mpuRequestBuilder
.sseCustomerAlgorithm(ServerSideEncryption.AES256.name())
.sseCustomerKey(base64customerKey)
.sseCustomerKeyMD5(
Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey))));
break;
case CSE_KMS:
case CSE_CUSTOM:
case NONE:
break;
default:
LOG.warn(UNKNOWN_ALGORITHM + ": " + algorithm);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ The server-side "SSE" encryption is performed with symmetric AES256 encryption;
S3 offers different mechanisms for actually defining the key to use.


There are four key management mechanisms, which in order of simplicity of use,
There are five key management mechanisms, which in order of simplicity of use,
are:

* S3 Default Encryption
Expand All @@ -75,6 +75,9 @@ are:
by Amazon's Key Management Service, a key referenced by name in the uploading client.
* SSE-C : the client specifies an actual base64 encoded AES-256 key to be used
to encrypt and decrypt the data.
* DSSE-KMS: Two independent layers of encryption at server side. An AES256 key is
generated in S3, and encrypted with a secret key provided by Amazon's Key Management
Service.

Encryption options

Expand All @@ -84,14 +87,15 @@ Encryption options
| `SSE-KMS` | server side, KMS key | key used to encrypt/decrypt | none |
| `SSE-C` | server side, custom key | encryption algorithm and secret | encryption algorithm and secret |
| `CSE-KMS` | client side, KMS key | encryption algorithm and key ID | encryption algorithm |
| `DSSE-KMS` | server side, KMS key | key used to encrypt/decrypt | none |

With server-side encryption, the data is uploaded to S3 unencrypted (but wrapped by the HTTPS
encryption channel).
The data is encrypted in the S3 store and decrypted when it's being retrieved.

A server side algorithm can be enabled by default for a bucket, so that
whenever data is uploaded unencrypted a default encryption algorithm is added.
When data is encrypted with S3-SSE or SSE-KMS it is transparent to all clients
When data is encrypted with S3-SSE, SSE-KMS or DSSE-KMS it is transparent to all clients
downloading the data.
SSE-C is different in that every client must know the secret key needed to decypt the data.

Expand Down Expand Up @@ -132,7 +136,7 @@ not explicitly declare an encryption algorithm.

[S3 Default Encryption for S3 Buckets](https://docs.aws.amazon.com/AmazonS3/latest/dev/bucket-encryption.html)

This supports SSE-S3 and SSE-KMS.
This supports SSE-S3, SSE-KMS and DSSE-KMS.

There is no need to set anything up in the client: do it in the AWS console.

Expand Down Expand Up @@ -316,6 +320,82 @@ metadata. Since only one encryption key can be provided at a time, S3A will not
pass the correct encryption key to decrypt the data.


### <a name="dsse-kms"></a> DSSE-KMS: Dual-layer Server-Encryption with KMS Managed Encryption Keys

By providing a dual-layer server-side encryption mechanism using AWS Key Management Service
(AWS KMS) keys, known as DSSE-KMS, two layers of encryption are applied to objects upon their
upload to Amazon S3. DSSE-KMS simplifies the process of meeting compliance requirements that
mandate the implementation of multiple layers of encryption for data while maintaining complete
control over the encryption keys.


When uploading data encrypted with SSE-KMS, the sequence is as follows:

1. The S3A client must declare a specific CMK in the property `fs.s3a.encryption.key`, or leave
it blank to use the default configured for that region.

2. The S3A client uploads all the data as normal, now including encryption information.

3. The S3 service encrypts the data with a symmetric key unique to the new object.

4. The S3 service retrieves the chosen CMK key from the KMS service, and, if the user has
the right to use it, uses it to provide dual-layer encryption for the data.


When downloading DSSE-KMS encrypted data, the sequence is as follows

1. The S3A client issues an HTTP GET request to read the data.

2. S3 sees that the data was encrypted with DSSE-KMS, and looks up the specific key in the
KMS service.

3. If and only if the requesting user has been granted permission to use the CMS key does
the KMS service provide S3 with the key.

4. As a result, S3 will only decode the data if the user has been granted access to the key.

Further reading on DSSE-KMS [here](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingDSSEncryption.html)

AWS Blog post [here](https://aws.amazon.com/blogs/aws/new-amazon-s3-dual-layer-server-side-encryption-with-keys-stored-in-aws-key-management-service-dsse-kms/)

### Enabling DSSE-KMS

To enable DSSE-KMS, the property `fs.s3a.encryption.algorithm` must be set to `DSSE-KMS` in `core-site`:

```xml
<property>
<name>fs.s3a.encryption.algorithm</name>
<value>DSSE-KMS</value>
</property>
```

The ID of the specific key used to encrypt the data should also be set in the property `fs.s3a.encryption.key`:

```xml
<property>
<name>fs.s3a.encryption.key</name>
<value>arn:aws:kms:us-west-2:360379543683:key/071a86ff-8881-4ba0-9230-95af6d01ca01</value>
</property>
```

Organizations may define a default key in the Amazon KMS; if a default key is set,
then it will be used whenever SSE-KMS encryption is chosen and the value of `fs.s3a.encryption.key` is empty.

### the S3A `fs.s3a.encryption.key` key only affects created files

With SSE-KMS, the S3A client option `fs.s3a.encryption.key` sets the
key to be used when new files are created. When reading files, this key,
and indeed the value of `fs.s3a.encryption.algorithm` is ignored:
S3 will attempt to retrieve the key and decrypt the file based on the create-time settings.

This means that

* There's no need to configure any client simply reading data.
* It is possible for a client to read data encrypted with one KMS key, and
write it with another.



## <a name="best_practises"></a> Encryption best practises


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ and rate of requests. Spreading data across different buckets, and/or using
a more balanced directory structure may be beneficial.
Consult [the AWS documentation](http://docs.aws.amazon.com/AmazonS3/latest/dev/request-rate-perf-considerations.html).

Reading or writing data encrypted with SSE-KMS forces S3 to make calls of
Reading or writing data encrypted with SSE-KMS or DSSE-KMS forces S3 to make calls of
the AWS KMS Key Management Service, which comes with its own
[Request Rate Limits](http://docs.aws.amazon.com/kms/latest/developerguide/limits.html).
These default to 1200/second for an account, across all keys and all uses of
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1087,7 +1087,7 @@ The specific tests an Assumed Role ARN is required for are
To run these tests you need:

1. A role in your AWS account will full read and write access rights to
the S3 bucket used in the tests, and KMS for any SSE-KMS tests.
the S3 bucket used in the tests, and KMS for any SSE-KMS or DSSE-KMS tests.


1. Your IAM User to have the permissions to "assume" that role.
Expand Down
Loading

0 comments on commit cf3a4b3

Please sign in to comment.