Skip to content

Commit

Permalink
Added mime type check for sheet files.
Browse files Browse the repository at this point in the history
  • Loading branch information
vikasrathee-cs committed Nov 19, 2024
1 parent fbbac79 commit 0069f97
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 11 deletions.
19 changes: 12 additions & 7 deletions src/main/java/io/cdap/plugin/google/common/APIRequestRetryer.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ public abstract class APIRequestRetryer {
protected static final int LIMIT_RATE_EXCEEDED_CODE = 403;
protected static final int BACKEND_ERROR_CODE = 500;
protected static final int SERVICE_UNAVAILABLE_CODE = 503;
private static final int MAX_RETRY_WAIT = 200;
private static final int MAX_RETRY_COUNT = 8;
private static final int MAX_RETRY_WAIT = 300;
private static final int MAX_RETRY_COUNT = 10;
private static final int MAX_RETRY_JITTER_WAIT = 100;
protected static final String TOO_MANY_REQUESTS_MESSAGE = "Too Many Requests";
protected static final String LIMIT_RATE_EXCEEDED_MESSAGE = "Rate Limit Exceeded";
Expand All @@ -76,7 +76,7 @@ public <V> void onRetry(Attempt<V> attempt) {
GoogleJsonResponseException e = (GoogleJsonResponseException) exceptionCause;
LOG.warn(String.format(
"Error code: '%d', message: '%s'. Attempt: '%d'. Delay since first: '%d'. Description: '%s'.",
e.getDetails().getCode(),
getExceptionStatusCode(e),
e.getStatusMessage(),
attempt.getAttemptNumber(),
attempt.getDelaySinceFirstAttempt(),
Expand Down Expand Up @@ -125,21 +125,26 @@ private static boolean checkHttpResponseException(Throwable t) {

private static boolean isTooManyRequestsError(GoogleJsonResponseException e) {
List<String> possibleMessages = Arrays.asList(TOO_MANY_REQUESTS_MESSAGE, LIMIT_RATE_EXCEEDED_MESSAGE);
return e.getDetails().getCode() == TOO_MANY_REQUESTS_CODE && possibleMessages.contains(e.getStatusMessage());
return getExceptionStatusCode(e) == TOO_MANY_REQUESTS_CODE && possibleMessages.contains(
e.getStatusMessage());
}

private static boolean isRateLimitError(GoogleJsonResponseException e) {
return e.getDetails().getCode() == LIMIT_RATE_EXCEEDED_CODE
return getExceptionStatusCode(e) == LIMIT_RATE_EXCEEDED_CODE
&& (LIMIT_RATE_EXCEEDED_MESSAGE.equals(e.getStatusMessage())
|| e.getDetails().getMessage().contains(LIMIT_RATE_EXCEEDED_MESSAGE));
}

private static boolean isBackendError(GoogleJsonResponseException e) {
return e.getDetails().getCode() == BACKEND_ERROR_CODE;
return getExceptionStatusCode(e) == BACKEND_ERROR_CODE;
}

private static boolean isServiceUnavailableError(GoogleJsonResponseException e) {
return e.getDetails().getCode() == SERVICE_UNAVAILABLE_CODE;
return getExceptionStatusCode(e) == SERVICE_UNAVAILABLE_CODE;
}

private static Integer getExceptionStatusCode(GoogleJsonResponseException e) {
return e.getDetails() != null ? e.getDetails().getCode() : null;
}

private static boolean isRateLimitError(HttpResponseException e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,10 @@ public List<File> getFilesSummary(List<ExportedType> exportedTypes, int filesNum
int retrievedFiles = 0;
int actualFilesNumber = filesNumber;
if (IdentifierType.FILE_IDENTIFIER.equals(config.getIdentifierType())) {
files.add(service.files().get(config.getFileIdentifier()).setSupportsAllDrives(true).execute());
files.add(getFilesSummaryByFileId());
return files;
}

Drive.Files.List request = service.files().list()
.setSupportsAllDrives(true)
.setIncludeItemsFromAllDrives(true)
Expand All @@ -99,6 +100,10 @@ public List<File> getFilesSummary(List<ExportedType> exportedTypes, int filesNum
});
}

protected File getFilesSummaryByFileId() throws IOException, ExecutionException {
return service.files().get(config.getFileIdentifier()).setSupportsAllDrives(true).execute();
}

private String generateFilter(List<ExportedType> exportedTypes) throws InterruptedException {
StringBuilder sb = new StringBuilder();

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* Copyright © 2024 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package io.cdap.plugin.google.sheets.source;

import com.google.api.services.drive.model.File;
import io.cdap.plugin.google.common.GoogleDriveFilteringClient;

import java.io.IOException;
import java.util.concurrent.ExecutionException;

/**
* Client for getting File information via Google Sheets API.
*/
public class GoogleSheetsFilteringClient extends GoogleDriveFilteringClient<GoogleSheetsSourceConfig> {

public GoogleSheetsFilteringClient(GoogleSheetsSourceConfig config) throws IOException {
super(config);
}

@Override
protected File getFilesSummaryByFileId() throws IOException, ExecutionException {
File file = service.files().get(config.getFileIdentifier()).setSupportsAllDrives(true).execute();
if (!file.getMimeType().equalsIgnoreCase(DRIVE_SPREADSHEETS_MIME)) {
throw new ExecutionException(
String.format("File with id: '%s' has a MIME_TYPE '%s' and is not a Google Sheets File.",
file.getMimeType(),
config.getFileIdentifier()), null);
}
return file;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public List<InputSplit> getSplits(JobContext jobContext) throws IOException {
GoogleSheetsInputFormatProvider.GSON.fromJson(headersJson, headersType);

// get all sheets files according to filter
GoogleDriveFilteringClient driveFilteringClient = new GoogleDriveFilteringClient(googleSheetsSourceConfig);
GoogleDriveFilteringClient driveFilteringClient = new GoogleSheetsFilteringClient(googleSheetsSourceConfig);
List<File> spreadsheetsFiles;
try {
spreadsheetsFiles = driveFilteringClient.getFilesSummary(Collections.singletonList(ExportedType.SPREADSHEETS));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ public ValidationResult validate(FailureCollector collector) {
GoogleDriveFilteringClient driveClient;
GoogleSheetsSourceClient sheetsSourceClient;
try {
driveClient = new GoogleDriveFilteringClient(this);
driveClient = new GoogleSheetsFilteringClient(this);
sheetsSourceClient = new GoogleSheetsSourceClient(this);
} catch (IOException e) {
collector.addFailure("Exception during drive and sheets connections instantiating.", null);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ public static StructuredRecord transform(RowRecord rowRecord, Schema schema, boo
builder.set(metadataRecordName, rowRecord.getMetadata());
} else {
ComplexSingleValueColumn complexSingleValueColumn = rowRecord.getHeaderedCells().get(name);
if (complexSingleValueColumn.getData() == null && complexSingleValueColumn.getSubColumns().isEmpty()) {
if (complexSingleValueColumn == null || (complexSingleValueColumn.getData() == null
&& complexSingleValueColumn.getSubColumns().isEmpty())) {
builder.set(name, null);
} else {
processCellData(builder, field, complexSingleValueColumn);
Expand Down

0 comments on commit 0069f97

Please sign in to comment.