Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[core] Introduce encryption interface for paimon #2903

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions docs/layouts/shortcodes/generated/catalog_configuration.html
Original file line number Diff line number Diff line change
Expand Up @@ -92,5 +92,17 @@
<td>String</td>
<td>The warehouse root path of catalog.</td>
</tr>
<tr>
<td><h5>encryption.mechanism</h5></td>
<td style="word-wrap: break-word;">plaintext</td>
<td>Enum</td>
<td>Encryption mechanism for paimon, the default value is plaintext, which means it is not encrypted.<br /><br />Possible values:<ul><li>"plaintext": Do not encrypt the data files.</li><li>"envelope": Encrypt data file using envelope encryption mechanism.</li></ul></td>
</tr>
<tr>
<td><h5>encryption.kms-client</h5></td>
<td style="word-wrap: break-word;">(none)</td>
<td>Enum</td>
<td>The kms client for encryption, if the user has enabled encryption, the kms client must be specified.<br /><br />Possible values:<ul><li>"memory": Use memory kms for encryption, this is only for test, can not be used in production environment.</li><li>"hadoop": Use hadoop kms for encryption, parameters prefixed with `hadoop.security.` will be used to build hadoop kms client, the `hadoop.security.key.provider.path` is required. The hadoop parameters can be configured from catalog or environment,please refer to `https://paimon.apache.org/docs/master/filesystems/hdfs/#hdfs-configuration`.</li></ul></td>
</tr>
</tbody>
</table>
18 changes: 18 additions & 0 deletions docs/layouts/shortcodes/generated/core_configuration.html
Original file line number Diff line number Diff line change
Expand Up @@ -726,5 +726,23 @@
<td>Integer</td>
<td>The bytes of types (CHAR, VARCHAR, BINARY, VARBINARY) devote to the zorder sort.</td>
</tr>
<tr>
<td><h5>encryption.table.key-id</h5></td>
<td style="word-wrap: break-word;">(none)</td>
<td>String</td>
<td>Specify the master key id for encryption.</td>
</tr>
<tr>
<td><h5>encryption.columns</h5></td>
<td style="word-wrap: break-word;">(none)</td>
<td>String</td>
<td>Specify the partial columns to be encrypted, separated by commas. If this parameter is not specified, all columns will be encrypted.</td>
</tr>
<tr>
<td><h5>encryption.algorithm</h5></td>
<td style="word-wrap: break-word;">(none)</td>
<td>String</td>
<td>Encryption algorithm for encrypting data files, for parquet format, the value can be `AES_GCM_V1` or `AES_GCM_CTR_V1`, and the default value is `AES_GCM_CTR_V1`. And can not specify algorithm for orc format now.</td>
</tr>
</tbody>
</table>
28 changes: 28 additions & 0 deletions paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -1076,6 +1076,26 @@ public class CoreOptions implements Serializable {
+ "If the data size allocated for the sorting task is uneven,which may lead to performance bottlenecks, "
+ "the config can be set to size.");

public static final ConfigOption<String> ENCRYPTION_TABLE_KEY_ID =
key("encryption.table.key-id")
.stringType()
.noDefaultValue()
.withDescription("Specify the master key id for encryption.");

public static final ConfigOption<String> ENCRYPTION_COLUMNS =
key("encryption.columns")
.stringType()
.noDefaultValue()
.withDescription(
"Specify the partial columns to be encrypted, separated by commas. If this parameter is not specified, all columns will be encrypted.");

public static final ConfigOption<String> ENCRYPTION_ALGORITHM =
key("encryption.algorithm")
.stringType()
.noDefaultValue()
.withDescription(
"Encryption algorithm for encrypting data files, for parquet format, the value can be `AES_GCM_V1` or `AES_GCM_CTR_V1`, and the default value is `AES_GCM_CTR_V1`. And can not specify algorithm for orc format now.");

public static final ConfigOption<Integer> SORT_COMPACTION_SAMPLE_MAGNIFICATION =
key("sort-compaction.local-sample.magnification")
.intType()
Expand Down Expand Up @@ -1690,6 +1710,14 @@ public boolean deletionVectorsEnabled() {
return options.get(DELETION_VECTORS_ENABLED);
}

public String encryptionAlgorithm() {
return options.get(ENCRYPTION_ALGORITHM);
}

public String encryptionColumns() {
return options.get(ENCRYPTION_COLUMNS);
}

/** Specifies the merge engine for table with primary key. */
public enum MergeEngine implements DescribedEnum {
DEDUPLICATE("deduplicate", "De-duplicate and keep the last row."),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.paimon.encryption;

import org.apache.paimon.options.Options;

import java.io.Closeable;
import java.io.Serializable;

/** KMS client interface, provide common operations for KMS. */
public interface KmsClient extends Serializable, Closeable {
void configure(Options options);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add a line above


String identifier();

byte[] wrapKey(byte[] unWrappedKey, String masterKeyId);

byte[] unwrapKey(byte[] wrappedKey, String masterKeyId);
}
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,12 @@ public Optional<TableStatsExtractor> createStatsExtractor(

@VisibleForTesting
public static FileFormat fromIdentifier(String identifier, Options options) {
return fromIdentifier(identifier, new FormatContext(options, 1024));
FormatContext formatContext =
FileFormatFactory.formatContextBuilder()
.formatOptions(options)
.readBatchSize(1024)
.build();
return fromIdentifier(identifier, formatContext);
}

/** Create a {@link FileFormat} from format identifier and format options. */
Expand Down Expand Up @@ -105,8 +110,11 @@ private static Optional<FileFormat> fromIdentifier(

public static FileFormat getFileFormat(Options options, String formatIdentifier) {
int readBatchSize = options.get(CoreOptions.READ_BATCH_SIZE);
return FileFormat.fromIdentifier(
formatIdentifier,
new FormatContext(options.removePrefix(formatIdentifier + "."), readBatchSize));
FormatContext formatContext =
FileFormatFactory.formatContextBuilder()
.formatOptions(options.removePrefix(formatIdentifier + "."))
.readBatchSize(readBatchSize)
.build();
return FileFormat.fromIdentifier(formatIdentifier, formatContext);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,34 @@ public interface FileFormatFactory {

FileFormat create(FormatContext formatContext);

/** the format context. */
/** The format context for reader and writer. */
class FormatContext {
private final Options formatOptions;
private final int readBatchSize;
private final String compression;
private final String encryptionTableKeyId;
private final byte[] plaintextDataKey;
private final byte[] dataAADPrefix;
private final String encryptionAlgorithm;
private final String encryptionColumns;

public FormatContext(Options formatOptions, int readBatchSize) {
private FormatContext(
Options formatOptions,
int readBatchSize,
String compression,
String encryptionTableKeyId,
byte[] plaintextDataKey,
byte[] dataAADPrefix,
String encryptionAlgorithm,
String encryptionColumns) {
this.formatOptions = formatOptions;
this.readBatchSize = readBatchSize;
this.compression = compression;
this.encryptionTableKeyId = encryptionTableKeyId;
this.plaintextDataKey = plaintextDataKey;
this.dataAADPrefix = dataAADPrefix;
this.encryptionAlgorithm = encryptionAlgorithm;
this.encryptionColumns = encryptionColumns;
}

public Options formatOptions() {
Expand All @@ -44,5 +64,112 @@ public Options formatOptions() {
public int readBatchSize() {
return readBatchSize;
}

public String compression() {
return compression;
}

public String encryptionTableId() {
return encryptionTableKeyId;
}

public byte[] plaintextDataKey() {
return plaintextDataKey;
}

public byte[] dataAADPrefix() {
return dataAADPrefix;
}

public String encryptionAlgorithm() {
return encryptionAlgorithm;
}

public String encryptionColumns() {
return encryptionColumns;
}

public FormatContext newDataKey(byte[] dataKey) {
return new FormatContext(
formatOptions,
readBatchSize,
compression,
encryptionTableKeyId,
dataKey,
dataAADPrefix,
encryptionAlgorithm,
encryptionColumns);
}
}

/** Format context builder. */
class FormatContextBuilder {

private Options formatOptions;
private int readBatchSize;
private String compression;
private String encryptionTableId;
private byte[] plaintextDataKey;
private byte[] dataAADPrefix;
private String encryptionAlgorithm;
private String encryptionColumns;

private FormatContextBuilder() {}

public FormatContextBuilder formatOptions(Options formatOptions) {
this.formatOptions = formatOptions;
return this;
}

public FormatContextBuilder readBatchSize(int readBatchSize) {
this.readBatchSize = readBatchSize;
return this;
}

public FormatContextBuilder compression(String compression) {
this.compression = compression;
return this;
}

public FormatContextBuilder withEncryptionTableId(String keyId) {
this.encryptionTableId = keyId;
return this;
}

public FormatContextBuilder withPlaintextDataKey(byte[] plaintextDataKey) {
this.plaintextDataKey = plaintextDataKey;
return this;
}

public FormatContextBuilder withAADPrefix(byte[] dataAADPrefix) {
this.dataAADPrefix = dataAADPrefix;
return this;
}

public FormatContextBuilder withEncryptionAlgorithm(String encryptionAlgorithm) {
this.encryptionAlgorithm = encryptionAlgorithm;
return this;
}

public FormatContextBuilder withEncryptionColumns(String encryptionColumns) {
this.encryptionColumns = encryptionColumns;
return this;
}

public FormatContext build() {
return new FormatContext(
formatOptions,
readBatchSize,
compression,
encryptionTableId,
plaintextDataKey,
dataAADPrefix,
encryptionAlgorithm,
encryptionColumns);
}
}

static FormatContextBuilder formatContextBuilder() {
return new FormatContextBuilder();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,17 @@

package org.apache.paimon.options;

import org.apache.paimon.options.description.DescribedEnum;
import org.apache.paimon.options.description.Description;
import org.apache.paimon.options.description.InlineElement;
import org.apache.paimon.options.description.TextElement;
import org.apache.paimon.table.TableType;

import java.time.Duration;

import static org.apache.paimon.options.CatalogOptions.EncryptionMechanism.PLAINTEXT;
import static org.apache.paimon.options.ConfigOptions.key;
import static org.apache.paimon.options.description.TextElement.text;

/** Options for catalog. */
public class CatalogOptions {
Expand Down Expand Up @@ -110,4 +114,72 @@ public class CatalogOptions {
TextElement.text(
"\"custom\": You can implement LineageMetaFactory and LineageMeta to store lineage information in customized storage."))
.build());

public static final ConfigOption<EncryptionMechanism> ENCRYPTION_MECHANISM =
key("encryption.mechanism")
.enumType(EncryptionMechanism.class)
.defaultValue(PLAINTEXT)
.withDescription(
"Encryption mechanism for paimon, the default value is plaintext, which means it is not encrypted.");

public static final ConfigOption<EncryptionKmsClient> ENCRYPTION_KMS_CLIENT =
key("encryption.kms-client")
.enumType(EncryptionKmsClient.class)
.noDefaultValue()
.withDescription(
"The kms client for encryption, if the user has enabled encryption, the kms client must be specified.");

/** The encryption mechanism for paimon. */
public enum EncryptionMechanism implements DescribedEnum {
PLAINTEXT("plaintext", "Do not encrypt the data files."),
ENVELOPE("envelope", "Encrypt data file using envelope encryption mechanism.");

private final String value;
private final String description;

EncryptionMechanism(String value, String description) {
this.value = value;
this.description = description;
}

@Override
public String toString() {
return value;
}

@Override
public InlineElement getDescription() {
return text(description);
}
}

/** The kms client for encryption. */
public enum EncryptionKmsClient implements DescribedEnum {
MEMORY(
"memory",
"Use memory kms for encryption, this is only for test, can not be used in production environment."),
HADOOP(
"hadoop",
"Use hadoop kms for encryption, parameters prefixed with `hadoop.security.` will be used to build hadoop kms client, "
+ "the `hadoop.security.key.provider.path` is required. The hadoop parameters can be configured from catalog or environment,"
+ "please refer to `https://paimon.apache.org/docs/master/filesystems/hdfs/#hdfs-configuration`.");

private final String value;
private final String description;

EncryptionKmsClient(String value, String description) {
this.value = value;
this.description = description;
}

@Override
public String toString() {
return value;
}

@Override
public InlineElement getDescription() {
return text(description);
}
}
}
Loading