Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Generic Importers #1389

Merged
merged 34 commits into from
Jan 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
fb65412
Add @type annotations when serializing SocialActivity data
calumcalder Nov 14, 2024
99cffe3
Implement basic GenericImporter for SOCIAL_POSTS
calumcalder Nov 14, 2024
80c55cb
Implement basic GenericFileImporter for BLOBS
calumcalder Nov 14, 2024
312862b
Add README with draft docs for generic importers
calumcalder Nov 15, 2024
40e18b1
Configure GenericFileImporter for MEDIA
calumcalder Nov 15, 2024
161bf7e
Configure GenericImporter for CALENDAR
calumcalder Nov 15, 2024
cdaf814
Extract generic BLOBS serializer to separate module
calumcalder Nov 27, 2024
c78c125
Extract generic SOCIAL_POSTS exporter to separate module
calumcalder Nov 27, 2024
c5bf7de
Extract generic CALENDAR exporter to separate module
calumcalder Nov 27, 2024
f2592db
Extract generic MEDIA exporter to separate module
calumcalder Nov 27, 2024
5eede87
Move CachedDownloadableItem to BlobbySerializer
calumcalder Nov 27, 2024
21c0682
Serialize dates to strings in tests
calumcalder Nov 27, 2024
46289c4
Preserve typing information of JSON payloads
calumcalder Nov 27, 2024
07bf8f7
Use custom schema for media GenericImporter serialization
calumcalder Nov 27, 2024
23bec4a
Tidy up TypeNames of top-level schemas
calumcalder Nov 28, 2024
d6d75e2
Configure endpoint and supported verticals for generic importers
calumcalder Nov 28, 2024
18c3d47
Support OAuth Authorization Code-based auth for generic importers
calumcalder Dec 5, 2024
b276dbf
Tidy up MediaSerializer
calumcalder Dec 5, 2024
51c8539
Tidy up BlobbySerializer
calumcalder Dec 5, 2024
dec93ba
Add tests for GenericImporter
calumcalder Dec 6, 2024
8cecc29
Simplify GenericFileImporter
calumcalder Dec 6, 2024
fe16106
Set mimetype in GenericFileImporter
calumcalder Dec 6, 2024
dd918e6
Tidy up old test code
calumcalder Dec 6, 2024
47878b5
Flatten BlobbySerializer File data
calumcalder Dec 6, 2024
76c37d7
Add schemas to README docs
calumcalder Dec 6, 2024
30fd368
Correct OAuth doc
calumcalder Dec 6, 2024
02f0668
Add configuration docs
calumcalder Dec 6, 2024
e0cfc5f
Remove test generic extension config file
calumcalder Dec 6, 2024
4aba1f1
Document GenericPayload wrapper
calumcalder Dec 6, 2024
e5ae96f
Rename ContainerMapper to ContainerSerializer
calumcalder Dec 11, 2024
0d4bee3
Expand documentation
calumcalder Dec 13, 2024
87184d2
Fix 'favourite' typo
calumcalder Dec 13, 2024
eb6435d
Remove extra import
calumcalder Dec 13, 2024
581e23e
Use set for vertical config storage
calumcalder Dec 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
736 changes: 736 additions & 0 deletions extensions/data-transfer/portability-data-transfer-generic/README.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* Copyright 2018 The Data Transfer Project Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
plugins {
id 'maven'
id 'signing'
id 'application'
}

dependencies {
compile project(':portability-spi-transfer')
compile project(':portability-spi-cloud')
compile project(':portability-types-common')
compile "com.squareup.okhttp3:okhttp:${okHttpVersion}"
compile "com.fasterxml.jackson.datatype:jackson-datatype-jsr310:${jacksonVersion}"
compile "com.fasterxml.jackson.datatype:jackson-datatype-jdk8:${jacksonVersion}"
testCompile "com.squareup.okhttp3:mockwebserver:${okHttpVersion}"
testCompile "commons-fileupload:commons-fileupload:1.5"
}

configurePublication(project)
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
package org.datatransferproject.datatransfer.generic;

import static java.lang.String.format;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonSubTypes;
import com.fasterxml.jackson.annotation.JsonTypeInfo;
import com.fasterxml.jackson.annotation.JsonTypeName;
import java.time.ZonedDateTime;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.Queue;
import org.datatransferproject.types.common.DownloadableItem;
import org.datatransferproject.types.common.models.blob.BlobbyStorageContainerResource;
import org.datatransferproject.types.common.models.blob.DigitalDocumentWrapper;
import org.datatransferproject.types.common.models.blob.DtpDigitalDocument;

/**
* Wrapper to adapt items known to be in temp storage (e.g. BLOB data) into {@link DownloadableItem}
*
* <p>It's useful to wrap such items so upstream code can consume either known temp store'd items or
* items the Importer has to download itself (some MEDIA items) from the same interface.
*/
class CachedDownloadableItem implements DownloadableItem {
private String cachedId;
private String name;

public CachedDownloadableItem(String cachedId, String name) {
this.cachedId = cachedId;
this.name = name;
}

@Override
public String getIdempotentId() {
return cachedId;
}

@Override
public String getFetchableUrl() {
// 'url' is ID when cached
return cachedId;
}

@Override
public boolean isInTempStore() {
return true;
}

@Override
public String getName() {
return name;
}
}

@JsonTypeInfo(use = JsonTypeInfo.Id.NAME)
@JsonTypeName("File")
class FileExportData implements BlobbySerializer.ExportData {
@JsonProperty private final String folder;
@JsonProperty private final String name;
@JsonProperty private final Optional<ZonedDateTime> dateModified;

private FileExportData(String folder, String name, Optional<ZonedDateTime> dateModified) {
this.folder = folder;
this.name = name;
this.dateModified = dateModified;
}

public String getFolder() {
return folder;
}

public String getName() {
return name;
}

public Optional<ZonedDateTime> getDateModified() {
return dateModified;
}

public static FileExportData fromDtpDigitalDocument(String path, DtpDigitalDocument model) {
return new FileExportData(
path,
model.getName(),
Optional.ofNullable(model.getDateModified())
.filter(string -> !string.isEmpty())
.map(dateString -> ZonedDateTime.parse(model.getDateModified())));
}
}

@JsonTypeInfo(use = JsonTypeInfo.Id.NAME)
@JsonTypeName("Folder")
class FolderExportData implements BlobbySerializer.ExportData {
@JsonProperty private final String path;

@JsonCreator
public FolderExportData(@JsonProperty String path) {
this.path = path;
}

public String getPath() {
return path;
}
}

public class BlobbySerializer {
@JsonSubTypes({
@JsonSubTypes.Type(FolderExportData.class),
@JsonSubTypes.Type(FileExportData.class),
})
public interface ExportData {}

static class BlobbyContainerPath {
private BlobbyStorageContainerResource container;
private String path;

public BlobbyContainerPath(BlobbyStorageContainerResource container, String path) {
this.container = container;
this.path = path;
}

public BlobbyStorageContainerResource getContainer() {
return container;
}

public String getPath() {
return path;
}
}

static final String SCHEMA_SOURCE =
GenericTransferConstants.SCHEMA_SOURCE_BASE
+ "/extensions/data-transfer/portability-data-transfer-generic/src/main/java/org/datatransferproject/datatransfer/generic/BlobbySerializer.java";

public static Iterable<ImportableData<ExportData>> serialize(
BlobbyStorageContainerResource root) {
List<ImportableData<ExportData>> results = new ArrayList<>();
// Search whole tree of container resource
Queue<BlobbyContainerPath> horizon = new ArrayDeque<>();
BlobbyContainerPath containerAndPath = new BlobbyContainerPath(root, "");
do {
BlobbyStorageContainerResource container = containerAndPath.getContainer();
String parentPath = containerAndPath.getPath();
String path = format("%s/%s", parentPath, container.getName());
// Import the current folder
results.add(
new ImportableData<>(
new GenericPayload<>(new FolderExportData(path), SCHEMA_SOURCE),
container.getId(),
path));

// Add all sub-folders to the search tree
for (BlobbyStorageContainerResource child : container.getFolders()) {
horizon.add(new BlobbyContainerPath(child, path));
}

// Import all files in the current folder
// Intentionally done after importing the current folder
for (DigitalDocumentWrapper file : container.getFiles()) {
results.add(
new ImportableFileData<>(
new CachedDownloadableItem(
file.getCachedContentId(), file.getDtpDigitalDocument().getName()),
file.getDtpDigitalDocument().getEncodingFormat(),
new GenericPayload<>(
FileExportData.fromDtpDigitalDocument(path, file.getDtpDigitalDocument()),
SCHEMA_SOURCE),
file.getCachedContentId(),
file.getDtpDigitalDocument().getName()));
}
} while ((containerAndPath = horizon.poll()) != null);

return results;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
package org.datatransferproject.datatransfer.generic;

import com.fasterxml.jackson.annotation.JsonSubTypes;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.datatransferproject.types.common.models.calendar.CalendarAttendeeModel;
import org.datatransferproject.types.common.models.calendar.CalendarContainerResource;
import org.datatransferproject.types.common.models.calendar.CalendarEventModel;
import org.datatransferproject.types.common.models.calendar.CalendarModel;
import org.datatransferproject.types.common.models.calendar.RecurrenceRule;

class CalendarExportData extends CalendarModel implements CalendarSerializer.ExportData {
private CalendarExportData(String id, String name, String description) {
super(id, name, description);
}

public static CalendarExportData fromModel(CalendarModel model) {
return new CalendarExportData(model.getId(), model.getName(), model.getDescription());
}
}

class CalendarEventExportData extends CalendarEventModel implements CalendarSerializer.ExportData {

private CalendarEventExportData(
String calendarId,
String title,
String notes,
List<CalendarAttendeeModel> attendees,
String location,
CalendarEventTime startTime,
CalendarEventTime endTime,
RecurrenceRule recurrenceRule) {
super(calendarId, title, notes, attendees, location, startTime, endTime, recurrenceRule);
}

public static CalendarEventExportData fromModel(CalendarEventModel model) {
return new CalendarEventExportData(
model.getCalendarId(),
model.getTitle(),
model.getNotes(),
model.getAttendees(),
model.getLocation(),
model.getStartTime(),
model.getEndTime(),
model.getRecurrenceRule());
}
}

public class CalendarSerializer {

@JsonSubTypes({
@JsonSubTypes.Type(value = CalendarExportData.class, name = "Calendar"),
@JsonSubTypes.Type(value = CalendarEventExportData.class, name = "CalendarEvent"),
})
public interface ExportData {}

static final String SCHEMA_SOURCE_CALENDAR =
GenericTransferConstants.SCHEMA_SOURCE_BASE
+ "/portability-types-common/src/main/java/org/datatransferproject/types/common/models/calendar/CalendarModel.java";
static final String SCHEMA_SOURCE_EVENT =
GenericTransferConstants.SCHEMA_SOURCE_BASE
+ "/portability-types-common/src/main/java/org/datatransferproject/types/common/models/calendar/CalendarEventModel.java";

public static Iterable<ImportableData<ExportData>> serialize(
CalendarContainerResource container) {
return Stream.concat(
container.getCalendars().stream()
.map(
calendar ->
new ImportableData<>(
new GenericPayload<ExportData>(
CalendarExportData.fromModel(calendar), SCHEMA_SOURCE_CALENDAR),
calendar.getId(),
calendar.getName())),
container.getEvents().stream()
.map(
event ->
new ImportableData<>(
new GenericPayload<ExportData>(
CalendarEventExportData.fromModel(event), SCHEMA_SOURCE_EVENT),
String.valueOf(event.hashCode()),
event.getTitle())))
.collect(Collectors.toList());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package org.datatransferproject.datatransfer.generic;

import static java.lang.String.format;

import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.Optional;
import java.util.UUID;
import okhttp3.MediaType;
import okhttp3.MultipartBody;
import okhttp3.Request;
import okhttp3.RequestBody;
import okhttp3.Response;
import org.datatransferproject.api.launcher.Monitor;
import org.datatransferproject.spi.cloud.connection.ConnectionProvider;
import org.datatransferproject.spi.cloud.storage.TemporaryPerJobDataStore;
import org.datatransferproject.spi.cloud.storage.TemporaryPerJobDataStore.InputStreamWrapper;
import org.datatransferproject.spi.transfer.types.InvalidTokenException;
import org.datatransferproject.types.common.models.ContainerResource;
import org.datatransferproject.types.transfer.auth.AppCredentials;
import org.datatransferproject.types.transfer.auth.AuthData;
import org.datatransferproject.types.transfer.auth.TokensAndUrlAuthData;

public class GenericFileImporter<C extends ContainerResource, R> extends GenericImporter<C, R> {
private TemporaryPerJobDataStore dataStore;
private ConnectionProvider connectionProvider;

static final MediaType MULTIPART_RELATED = MediaType.parse("multipart/related");
static final MediaType OCTET_STREAM = MediaType.parse("application/octet-stream");

public GenericFileImporter(
ContainerSerializer<C, R> containerSerializer,
AppCredentials appCredentials,
URL endpoint,
TemporaryPerJobDataStore dataStore,
Monitor monitor) {
super(containerSerializer, appCredentials, endpoint, monitor);
this.dataStore = dataStore;
this.connectionProvider = new ConnectionProvider(dataStore);
}

@Override
public boolean importSingleItem(
UUID jobId, TokensAndUrlAuthData authData, ImportableData<R> dataItem)
throws IOException, InvalidTokenException {
if (dataItem instanceof ImportableFileData) {
return importSingleFileItem(jobId, authData, (ImportableFileData<R>) dataItem);
} else {
return super.importSingleItem(jobId, authData, dataItem);
}
}

private <T> boolean importSingleFileItem(
UUID jobId, AuthData authData, ImportableFileData<R> data)
throws IOException, InvalidTokenException {
InputStreamWrapper wrapper = connectionProvider.getInputStreamForItem(jobId, data.getFile());
File tempFile =
dataStore.getTempFileFromInputStream(wrapper.getStream(), data.getFile().getName(), null);
MediaType mimeType =
Optional.ofNullable(MediaType.parse(data.getFileMimeType())).orElse(OCTET_STREAM);
Request request =
new Request.Builder()
.url(endpoint)
.addHeader("Authorization", format("Bearer %s", authData.getToken()))
.post(
new MultipartBody.Builder()
.setType(MULTIPART_RELATED)
.addPart(RequestBody.create(JSON, om.writeValueAsBytes(data.getJsonData())))
.addPart(MultipartBody.create(mimeType, tempFile))
.build())
.build();

try (Response response = client.newCall(request).execute()) {
return parseResponse(response);
} finally {
tempFile.delete();
}
}
}
Loading
Loading