Skip to content

Commit

Permalink
Merge pull request data-integrations#1357 from data-integrations/bugf…
Browse files Browse the repository at this point in the history
…ix/plugin-1735

[PLUGIN-1735] Fix read timeout issue during GCS Bucket  copy/move actions for large files
  • Loading branch information
dj-smart authored Jan 31, 2024
2 parents b4c7d04 + c3fa512 commit 42f75fd
Show file tree
Hide file tree
Showing 9 changed files with 73 additions and 6 deletions.
4 changes: 4 additions & 0 deletions docs/GCSCopy-action.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ This value is ignored if the bucket already exists.
If the bucket already exists, this is ignored. More information can be found
[here](https://cloud.google.com/data-fusion/docs/how-to/customer-managed-encryption-keys)

**Read Timeout:** Timeout in seconds to read data from an established HTTP connection (Default value is 20).
For performing copy/move operation on large files in GCS buckets, a higher timeout might be needed. Setting it to 0
implies infinite timeout (no limit on the timeout) [NOT RECOMMENDED]

Example
-------

Expand Down
4 changes: 4 additions & 0 deletions docs/GCSMove-action.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ This value is ignored if the bucket already exists.
If the bucket already exists, this is ignored. More information can be found
[here](https://cloud.google.com/data-fusion/docs/how-to/customer-managed-encryption-keys)

**Read Timeout:** Timeout in seconds to read data from an established HTTP connection (Default value is 20).
For performing copy/move operation on large files in GCS buckets, a higher timeout might be needed. Setting it to 0
implies infinite timeout (no limit on the timeout) [NOT RECOMMENDED]

Example
-------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -615,7 +615,7 @@ private void emitMetricsForStorageBucket(boolean succeeded, BatchSinkContext con
}
try {
StorageClient storageClient = StorageClient.create(config.getProject(), config.getServiceAccount(),
config.isServiceAccountFilePath());
config.isServiceAccountFilePath(), null);
storageClient.mapMetaDataForAllBlobs(outputPath,
new MetricsEmitter(context.getMetrics())::emitMetrics);
} catch (Exception e) {
Expand Down
9 changes: 7 additions & 2 deletions src/main/java/io/cdap/plugin/gcp/gcs/StorageClient.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package io.cdap.plugin.gcp.gcs;

import com.google.api.gax.paging.Page;
import com.google.cloud.http.HttpTransportOptions;
import com.google.cloud.kms.v1.CryptoKeyName;
import com.google.cloud.storage.Blob;
import com.google.cloud.storage.BlobId;
Expand Down Expand Up @@ -320,17 +321,21 @@ private static String toPath(BlobId blobId) {
}

public static StorageClient create(String project, @Nullable String serviceAccount,
Boolean isServiceAccountFilePath) throws IOException {
Boolean isServiceAccountFilePath, @Nullable Integer readTimeout)
throws IOException {
StorageOptions.Builder builder = StorageOptions.newBuilder().setProjectId(project);
if (serviceAccount != null) {
builder.setCredentials(GCPUtils.loadServiceAccountCredentials(serviceAccount, isServiceAccountFilePath));
}
if (readTimeout != null) {
builder.setTransportOptions(HttpTransportOptions.newBuilder().setReadTimeout(readTimeout * 1000).build());
}
Storage storage = builder.build().getService();
return new StorageClient(storage);
}

public static StorageClient create(GCPConnectorConfig config) throws IOException {
return create(config.getProject(), config.getServiceAccount(), config.isServiceAccountFilePath());
return create(config.getProject(), config.getServiceAccount(), config.isServiceAccountFilePath(), null);
}

/**
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/io/cdap/plugin/gcp/gcs/actions/GCSCopy.java
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ public void run(ActionContext context) throws IOException {
return;
}
StorageClient storageClient = StorageClient.create(config.getProject(), config.getServiceAccount(),
isServiceAccountFilePath);
isServiceAccountFilePath, config.readTimeout);

GCSPath destPath = config.getDestPath();
CryptoKeyName cmekKeyName = CmekUtils.getCmekKey(config.cmekKey, context.getArguments().asMap(), collector);
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/io/cdap/plugin/gcp/gcs/actions/GCSMove.java
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ public void run(ActionContext context) throws IOException {
return;
}
StorageClient storageClient = StorageClient.create(config.getProject(), config.getServiceAccount(),
isServiceAccountFilePath);
isServiceAccountFilePath, config.readTimeout);
GCSPath destPath = config.getDestPath();
CryptoKeyName cmekKeyName = CmekUtils.getCmekKey(config.cmekKey, context.getArguments().asMap(), collector);
collector.getOrThrowException();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ public class SourceDestConfig extends GCPConfig {
public static final String NAME_SOURCE_PATH = "sourcePath";
public static final String NAME_DEST_PATH = "destPath";
public static final String NAME_LOCATION = "location";
public static final String READ_TIMEOUT = "readTimeout";

@Name(NAME_SOURCE_PATH)
@Macro
Expand Down Expand Up @@ -74,15 +75,25 @@ public class SourceDestConfig extends GCPConfig {
" at https://cloud.google.com/data-fusion/docs/how-to/customer-managed-encryption-keys")
protected String cmekKey;

@Name(READ_TIMEOUT)
@Macro
@Nullable
@Description("Timeout in seconds to read data from an established HTTP connection (Default value is 20). " +
("For performing copy/move operation on large files in GCS buckets, set a higher value. " +
"Set it to 0 for infinite(no limit)"))
protected Integer readTimeout;

public SourceDestConfig(@Nullable String project, @Nullable String serviceAccountType,
@Nullable String serviceFilePath, @Nullable String serviceAccountJson,
@Nullable String destPath, @Nullable String location, @Nullable String cmekKey) {
@Nullable String destPath, @Nullable String location, @Nullable Integer readTimeout,
@Nullable String cmekKey) {
this.serviceAccountType = serviceAccountType;
this.serviceAccountJson = serviceAccountJson;
this.serviceFilePath = serviceFilePath;
this.project = project;
this.destPath = destPath;
this.location = location;
this.readTimeout = readTimeout;
this.cmekKey = cmekKey;
}

Expand Down Expand Up @@ -125,9 +136,22 @@ public void validate(FailureCollector collector, Map<String, String> arguments)
if (!containsMacro(NAME_CMEK_KEY)) {
validateCmekKey(collector, arguments);
}
if (!containsMacro(READ_TIMEOUT)) {
validateReadTimeout(collector);
}
collector.getOrThrowException();
}

void validateReadTimeout(FailureCollector collector) {
if (readTimeout == null) {
return;
}
if (readTimeout < 0) {
collector.addFailure("Read Timeout cannot be less than 0. ",
"Please enter 0 or a positive value.").withConfigProperty(READ_TIMEOUT);
}
}

//This method validated the pattern of CMEK Key resource ID.
void validateCmekKey(FailureCollector failureCollector, Map<String, String> arguments) {
CryptoKeyName cmekKeyName = CmekUtils.getCmekKey(cmekKey, arguments, failureCollector);
Expand Down Expand Up @@ -161,6 +185,7 @@ public static class Builder {
private String destPath;
private String cmekKey;
private String location;
private Integer readTimeout;

public SourceDestConfig.Builder setProject(@Nullable String project) {
this.project = project;
Expand Down Expand Up @@ -205,6 +230,7 @@ public SourceDestConfig build() {
serviceAccountJson,
destPath,
location,
readTimeout,
cmekKey
);
}
Expand Down
14 changes: 14 additions & 0 deletions widgets/GCSCopy-action.json
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,20 @@
}
}
]
},
{
"label" : "Advanced",
"properties" : [
{
"name": "readTimeout",
"label" : "Read Timeout",
"widget-type": "number",
"widget-attributes": {
"default": "20",
"minimum": "0"
}
}
]
}
],
"filters": [
Expand Down
14 changes: 14 additions & 0 deletions widgets/GCSMove-action.json
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,20 @@
}
}
]
},
{
"label" : "Advanced",
"properties" : [
{
"name": "readTimeout",
"label" : "Read Timeout",
"widget-type": "number",
"widget-attributes": {
"default": "20",
"minimum": "0"
}
}
]
}
],
"filters": [
Expand Down

0 comments on commit 42f75fd

Please sign in to comment.