diff --git a/docs/description.md b/docs/description.md index 195366e..a2cfa5e 100644 --- a/docs/description.md +++ b/docs/description.md @@ -39,12 +39,41 @@ closely follow the JSON that is passed to the Dataverse API. | File | Description | |------------------------|------------------------------------------------------------------------------------------------------------------------------------------| +| `init.yml` | Preconditions and instructions for creating a new dataset. | | `dataset.yml` | Dataset level metadata. | | `edit-files.yml` | Instructions for deleting, replacing or moving files, or updating the file metadata;
also included: restricting and embargoing files | | `edit-metadata.yml` | Edit dataset level metadata, including metadata value deletions | | `edit-permissions.yml` | Role assignments to create or delete on the dataset | | `update-state.yml` | Whether to publish the dataset version or submit it for review | +##### init.yml + +The init file initializes the ingest process. It can be used to verify that an expected precondition is met: + +```yaml +init: + expect: + state: 'released' # or 'draft', 'absent'. +``` + +If the state of the dataset does not match the expected state, the ingest procedure will be aborted. The state can be either `released`, `draft` or `absent` +(meaning that the dataset should not exist). By default, no check will be performed. + +It can also be used to instruct the service to import the bag as a dataset with an existing DOI: + +```yaml +init: + create: + importPid: 'doi:10.5072/FK2/ABCDEF' +``` + +In this case the `updates-dataset` property in `deposit.properties` should not be set. It will be ignored if it is. By default, a new dataset will be created, +whose persistent identifier will be assigned by Dataverse. + +The user is responsible for providing expectations and instructions that do not conflict with each other. For example, if the `importPid` property is set, and +the `state` property is set to `released`, the service will either abort because the dataset already exists, or it will fail to import the dataset, because +the dataset already exists. + ##### dataset.yml The format is the same as the JSON that is passed to the [createDataset]{:target=_blank} endpoint of the Dataverse API. Note that the `files` field is not used. @@ -83,7 +112,7 @@ editFiles: - 'file4.txt' - 'subdirectory/file5.txt' addUnrestrictedFiles: - - 'file6.txt' + - 'file6.txt' moveFiles: - from: 'file6.txt' # Old location in the dataset to: 'subdirectory/file6.txt' # New location in the dataset @@ -107,7 +136,7 @@ editFiles: The actions specified in this file correspond roughly to the actions available in the dropdown menu in the file view of a dataset in Dataverse. -The replacement file is looked up in the bag, under the `data` directory under the same path as the original file has in the dataset. Note that files in +The replacement file is looked up in the bag, under the `data` directory under the same path as the original file has in the dataset. Note that files in `replaceFiles` will automatically be skipped in the add files step, the deleted files, however, will not. In other words, it is also possible to remove a file and add a file back to the same location in one deposit. In that case, there will be no continuous history of the file in the dataset. @@ -183,14 +212,16 @@ Allows you to selectively delete or add role assignments on the dataset. The for ##### update-state.yml ```yaml -action: 'submit-for-review' -# One of the following actions: -# - 'leave-draft' (default) -# - 'publish-major-version' -# - 'publish-minor-version' -# - 'submit-for-review' +updateState: + publish: major # or 'minor' ``` +```yaml +updateState: + releaseMigrated: 2021-01-01 +``` + + #### New versions of existing datasets A deposit can also be used to create a new version of an existing dataset. In this case, the `deposit.properties` file must contain the following property: diff --git a/src/main/java/nl/knaw/dans/dvingest/DdDataverseIngestApplication.java b/src/main/java/nl/knaw/dans/dvingest/DdDataverseIngestApplication.java index 4e32127..2c9ec76 100644 --- a/src/main/java/nl/knaw/dans/dvingest/DdDataverseIngestApplication.java +++ b/src/main/java/nl/knaw/dans/dvingest/DdDataverseIngestApplication.java @@ -91,8 +91,8 @@ public void run(final DdDataverseIngestConfiguration configuration, final Enviro DansDepositSupportFactory dansDepositSupportFactoryImport = new DansDepositSupportDisabledFactory(); if (dansDepositConversionConfig != null) { var dansBagMappingServiceImport = createDansBagMappingService(false, dansDepositConversionConfig, dataverseService); - var validateDansBagImportImport = new ValidateDansBagServiceImpl(dansDepositConversionConfig.getValidateDansBag(), false); - dansDepositSupportFactoryImport = new DansDepositSupportFactoryImpl(validateDansBagImportImport, dansBagMappingServiceImport, dataverseService, yamlService); + var validateDansBagImport = new ValidateDansBagServiceImpl(dansDepositConversionConfig.getValidateDansBag(), false); + dansDepositSupportFactoryImport = new DansDepositSupportFactoryImpl(validateDansBagImport, dansBagMappingServiceImport, dataverseService, yamlService); } var depositTaskFactoryImport = new DepositTaskFactoryImpl(bagProcessorFactory, dansDepositSupportFactoryImport); var importJobFactory = new ImportJobFactoryImpl(dataverseIngestDepositFactory, depositTaskFactoryImport); @@ -105,9 +105,9 @@ public void run(final DdDataverseIngestConfiguration configuration, final Enviro */ DansDepositSupportFactory dansDepositSupportFactoryMigration = new DansDepositSupportDisabledFactory(); if (dansDepositConversionConfig != null) { - var dansBagMappingService = createDansBagMappingService(true, dansDepositConversionConfig, dataverseService); - var validateDansBagImport = new ValidateDansBagServiceImpl(dansDepositConversionConfig.getValidateDansBag(), true); - dansDepositSupportFactoryMigration = new DansDepositSupportFactoryImpl(validateDansBagImport, dansBagMappingService, dataverseService, yamlService); + var dansBagMappingServiceMigration = createDansBagMappingService(true, dansDepositConversionConfig, dataverseService); + var validateDansBagMigration = new ValidateDansBagServiceImpl(dansDepositConversionConfig.getValidateDansBag(), true); + dansDepositSupportFactoryMigration = new DansDepositSupportFactoryImpl(validateDansBagMigration, dansBagMappingServiceMigration, dataverseService, yamlService); } var depositTaskFactoryMigration = new DepositTaskFactoryImpl(bagProcessorFactory, dansDepositSupportFactoryMigration); var migrationJobFactory = new ImportJobFactoryImpl(dataverseIngestDepositFactory, depositTaskFactoryMigration); diff --git a/src/main/java/nl/knaw/dans/dvingest/core/DataverseIngestBag.java b/src/main/java/nl/knaw/dans/dvingest/core/DataverseIngestBag.java index 463a8a4..1a302e9 100644 --- a/src/main/java/nl/knaw/dans/dvingest/core/DataverseIngestBag.java +++ b/src/main/java/nl/knaw/dans/dvingest/core/DataverseIngestBag.java @@ -24,7 +24,11 @@ import nl.knaw.dans.dvingest.core.yaml.EditMetadataRoot; import nl.knaw.dans.dvingest.core.yaml.EditPermissions; import nl.knaw.dans.dvingest.core.yaml.EditPermissionsRoot; +import nl.knaw.dans.dvingest.core.yaml.Init; +import nl.knaw.dans.dvingest.core.yaml.InitRoot; +import nl.knaw.dans.dvingest.core.yaml.UpdateAction; import nl.knaw.dans.dvingest.core.yaml.UpdateState; +import nl.knaw.dans.dvingest.core.yaml.UpdateStateRoot; import nl.knaw.dans.lib.dataverse.model.dataset.Dataset; import java.io.IOException; @@ -35,6 +39,7 @@ public class DataverseIngestBag implements Comparable { private final YamlServiceImpl yamService; + public static final String INIT_YML = "init.yml"; public static final String DATASET_YML = "dataset.yml"; public static final String EDIT_FILES_YML = "edit-files.yml"; public static final String EDIT_METADATA_YML = "edit-metadata.yml"; @@ -56,6 +61,14 @@ public boolean looksLikeDansBag() { return Files.exists(bagDir.resolve("metadata/dataset.xml")); } + public Init getInit() throws IOException, ConfigurationException { + if (!Files.exists(bagDir.resolve(INIT_YML))) { + return null; + } + var initRoot = yamService.readYaml(bagDir.resolve(INIT_YML), InitRoot.class); + return initRoot.getInit(); + } + public Dataset getDatasetMetadata() throws IOException, ConfigurationException { if (!Files.exists(bagDir.resolve(DATASET_YML))) { return null; @@ -89,11 +102,12 @@ public EditPermissions getEditPermissions() throws IOException, ConfigurationExc return editPermissionsRoot.getEditPermissions(); } - public UpdateState getUpdateState() throws IOException, ConfigurationException { + public UpdateAction getUpdateState() throws IOException, ConfigurationException { if (!Files.exists(bagDir.resolve(UPDATE_STATE_YML))) { return null; } - return yamService.readYaml(bagDir.resolve(UPDATE_STATE_YML), UpdateState.class); + var updateStateRoot = yamService.readYaml(bagDir.resolve(UPDATE_STATE_YML), UpdateStateRoot.class); + return updateStateRoot.getUpdateState(); } @Override diff --git a/src/main/java/nl/knaw/dans/dvingest/core/bagprocessor/BagProcessor.java b/src/main/java/nl/knaw/dans/dvingest/core/bagprocessor/BagProcessor.java index 0eb51cb..ef6a12d 100644 --- a/src/main/java/nl/knaw/dans/dvingest/core/bagprocessor/BagProcessor.java +++ b/src/main/java/nl/knaw/dans/dvingest/core/bagprocessor/BagProcessor.java @@ -21,6 +21,8 @@ import nl.knaw.dans.dvingest.core.DataverseIngestBag; import nl.knaw.dans.dvingest.core.service.DataverseService; import nl.knaw.dans.dvingest.core.service.UtilityServices; +import nl.knaw.dans.dvingest.core.yaml.UpdateState; +import nl.knaw.dans.dvingest.core.yaml.UpdateStateRoot; import nl.knaw.dans.lib.dataverse.DataverseException; import java.io.IOException; @@ -39,7 +41,7 @@ public class BagProcessor { @Builder private BagProcessor(UUID depositId, DataverseIngestBag bag, DataverseService dataverseService, UtilityServices utilityServices) throws IOException, ConfigurationException { - this.datasetVersionCreator = new DatasetVersionCreator(depositId, dataverseService, bag.getDatasetMetadata()); + this.datasetVersionCreator = new DatasetVersionCreator(depositId, dataverseService, bag.getInit(), bag.getDatasetMetadata()); this.filesEditor = new FilesEditor(depositId, bag.getDataDir(), bag.getEditFiles(), dataverseService, utilityServices); this.metadataEditor = new MetadataEditor(depositId, bag.getEditMetadata(), dataverseService); this.permissionsEditor = new PermissionsEditor(depositId, bag.getEditPermissions(), dataverseService); diff --git a/src/main/java/nl/knaw/dans/dvingest/core/bagprocessor/DatasetVersionCreator.java b/src/main/java/nl/knaw/dans/dvingest/core/bagprocessor/DatasetVersionCreator.java index e38e164..e5fff8c 100644 --- a/src/main/java/nl/knaw/dans/dvingest/core/bagprocessor/DatasetVersionCreator.java +++ b/src/main/java/nl/knaw/dans/dvingest/core/bagprocessor/DatasetVersionCreator.java @@ -19,6 +19,8 @@ import lombok.NonNull; import lombok.extern.slf4j.Slf4j; import nl.knaw.dans.dvingest.core.service.DataverseService; +import nl.knaw.dans.dvingest.core.yaml.Expect; +import nl.knaw.dans.dvingest.core.yaml.Init; import nl.knaw.dans.lib.dataverse.DataverseException; import nl.knaw.dans.lib.dataverse.model.dataset.Dataset; @@ -36,15 +38,27 @@ public class DatasetVersionCreator { @NonNull private final DataverseService dataverseService; + private final Init init; + private final Dataset dataset; public String createDatasetVersion(String targetPid) throws IOException, DataverseException { + if (init != null && init.getExpect() != null) { + checkExpectations(init.getExpect(), targetPid); + } + var pid = targetPid; if (targetPid == null) { if (dataset == null) { throw new IllegalArgumentException("Must have dataset metadata to create a new dataset."); } - pid = createDataset(); + if (init != null && init.getCreate() != null && init.getCreate().getImportPid() != null) { + importDataset(init.getCreate().getImportPid()); + pid = init.getCreate().getImportPid(); + } + else { + pid = createDataset(); + } } // Even if we just created the dataset, we still need to update the metadata, because Dataverse ignores some things // in the create request. @@ -54,6 +68,46 @@ public String createDatasetVersion(String targetPid) throws IOException, Dataver return pid; } + private void checkExpectations(Expect expect, String targetPid) throws DataverseException, IOException { + if (targetPid == null) { + return; + } + + if (expect != null && expect.getState() != null) { + switch (expect.getState()) { + case draft: + case released: + var state = dataverseService.getDatasetState(targetPid); + if (expect.getState().name().equals(state.toLowerCase())) { + log.debug("Expected state {} found for dataset {}", expect.getState(), targetPid); + } + else { + throw new IllegalStateException("Expected state " + expect.getState() + " but found " + state + " for dataset " + targetPid); + } + break; + case absent: + try { + dataverseService.getDatasetState(targetPid); + throw new IllegalStateException("Expected state absent but found for dataset " + targetPid); + } + catch (DataverseException e) { + if (e.getMessage().contains("404")) { + log.debug("Expected state absent found for dataset {}", targetPid); + } + else { + throw e; + } + } + } + } + } + + private void importDataset(String pid) throws IOException, DataverseException { + log.debug("Start importing dataset for deposit {}", depositId); + dataverseService.importDataset(pid, dataset); + log.debug("End importing dataset for deposit {}", depositId); + } + private String createDataset() throws IOException, DataverseException { log.debug("Start creating dataset for deposit {}", depositId); var pid = dataverseService.createDataset(dataset); diff --git a/src/main/java/nl/knaw/dans/dvingest/core/bagprocessor/FilesEditor.java b/src/main/java/nl/knaw/dans/dvingest/core/bagprocessor/FilesEditor.java index e693839..21f624b 100644 --- a/src/main/java/nl/knaw/dans/dvingest/core/bagprocessor/FilesEditor.java +++ b/src/main/java/nl/knaw/dans/dvingest/core/bagprocessor/FilesEditor.java @@ -122,16 +122,34 @@ private void deleteFiles() throws IOException, DataverseException { log.debug("End deleting files for deposit {}", depositId); } - private void replaceFiles() throws IOException, DataverseException { + private void replaceFiles() throws IOException { log.debug("Start replacing {} files for deposit {}", depositId, editFiles.getReplaceFiles().size()); for (var filepath : editFiles.getReplaceFiles()) { log.debug("Replacing file: {}", filepath); var fileMeta = filesInDatasetCache.get(filepath); - dataverseService.replaceFile(pid, fileMeta, dataDir.resolve(filepath)); + utilityServices.wrapIfZipFile(dataDir.resolve(filepath)).ifPresentOrElse( + zipFile -> { + replaceFileOrThrow(pid, fileMeta, zipFile); + FileUtils.deleteQuietly(zipFile.toFile()); + }, + () -> { + var fileToUpload = dataDir.resolve(filepath); + replaceFileOrThrow(pid, fileMeta, fileToUpload); + } + ); } log.debug("End replacing files for deposit {}", depositId); } + private void replaceFileOrThrow(String pid, FileMeta fileMeta, Path fileToUpload) { + try { + dataverseService.replaceFile(pid, fileMeta, fileToUpload); + } + catch (IOException | DataverseException e) { + throw new RuntimeException(e); + } + } + private void addRestrictedFiles() throws IOException, DataverseException { log.debug("Start adding {} restricted files for deposit {}", editFiles.getAddRestrictedFiles().size(), depositId); var iterator = new PathIterator(getRestrictedFilesToUpload()); diff --git a/src/main/java/nl/knaw/dans/dvingest/core/bagprocessor/StateUpdater.java b/src/main/java/nl/knaw/dans/dvingest/core/bagprocessor/StateUpdater.java index d6156ca..2e31465 100644 --- a/src/main/java/nl/knaw/dans/dvingest/core/bagprocessor/StateUpdater.java +++ b/src/main/java/nl/knaw/dans/dvingest/core/bagprocessor/StateUpdater.java @@ -18,7 +18,9 @@ import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import nl.knaw.dans.dvingest.core.service.DataverseService; -import nl.knaw.dans.dvingest.core.yaml.UpdateState; +import nl.knaw.dans.dvingest.core.yaml.PublishAction; +import nl.knaw.dans.dvingest.core.yaml.ReleaseMigratedAction; +import nl.knaw.dans.dvingest.core.yaml.UpdateAction; import nl.knaw.dans.lib.dataverse.DataverseException; import nl.knaw.dans.lib.dataverse.model.dataset.UpdateType; @@ -29,29 +31,18 @@ @RequiredArgsConstructor public class StateUpdater { private final UUID depositId; - private final UpdateState updateState; + private final UpdateAction updateAction; private final DataverseService dataverseService; private String pid; public void updateState(String pid) throws DataverseException, IOException { this.pid = pid; - if (updateState == null) { - log.debug("No update state found. Skipping update state processing."); - return; + if (updateAction instanceof PublishAction) { + publishVersion(((PublishAction) updateAction).getUpdateType()); } - if ("publish-major".equals(updateState.getAction())) { - publishVersion(UpdateType.major); - } - else if ("publish-minor".equals(updateState.getAction())) { - publishVersion(UpdateType.minor); - } - else if ("submit-for-review".equals(updateState.getAction())) { - // TODO: Implement submit for review - throw new UnsupportedOperationException("Submit for review not yet implemented"); - } - else { - throw new IllegalArgumentException("Unknown update state action: " + updateState.getAction()); + else if (updateAction instanceof ReleaseMigratedAction) { + releaseMigrated(((ReleaseMigratedAction) updateAction).getReleaseDate()); } } @@ -62,4 +53,10 @@ private void publishVersion(UpdateType updateType) throws DataverseException, IO log.debug("End publishing version for deposit {}", depositId); } + public void releaseMigrated(String date) throws DataverseException, IOException { + log.debug("Start releasing migrated version for deposit {}", depositId); + dataverseService.releaseMigratedDataset(pid, date); + dataverseService.waitForState(pid, "RELEASED"); + log.debug("End releasing migrated version for deposit {}", depositId); + } } diff --git a/src/main/java/nl/knaw/dans/dvingest/core/dansbag/DansBagMappingService.java b/src/main/java/nl/knaw/dans/dvingest/core/dansbag/DansBagMappingService.java index bf274a4..64f3ea7 100644 --- a/src/main/java/nl/knaw/dans/dvingest/core/dansbag/DansBagMappingService.java +++ b/src/main/java/nl/knaw/dans/dvingest/core/dansbag/DansBagMappingService.java @@ -19,6 +19,8 @@ import nl.knaw.dans.dvingest.core.dansbag.exception.InvalidDepositException; import nl.knaw.dans.dvingest.core.yaml.EditFiles; import nl.knaw.dans.dvingest.core.yaml.EditPermissions; +import nl.knaw.dans.dvingest.core.yaml.Init; +import nl.knaw.dans.dvingest.core.yaml.UpdateAction; import nl.knaw.dans.lib.dataverse.DataverseException; import nl.knaw.dans.lib.dataverse.model.dataset.Dataset; import nl.knaw.dans.lib.dataverse.model.dataset.DatasetVersion; @@ -49,6 +51,15 @@ public interface DansBagMappingService { */ DansBagDeposit readDansDeposit(Path depositDir) throws InvalidDepositException; + /** + * Determines what preconditions to expect and whether and how to create a new dataset based on the DANS deposit. + * + * @param dansDeposit the DANS deposit + * @param isUpdate + * @return the preconditions to expect and whether and how to create a new dataset + */ + Init getInitFromDansDeposit(DansBagDeposit dansDeposit, boolean isUpdate); + /** * Maps the metadata from the DANS deposit to the new dataset level metadata for the dataset. For some parts the new metadata depends on the current metadata of the dataset. That is why the * current metadata is also given as input. If the deposit is to create a new dataset, the current metadata is null. @@ -62,8 +73,8 @@ public interface DansBagMappingService { /** * Maps file information in the DANS bag to edit actions for the files in the dataset. The edit actions are used to update the files in the dataset. * - * @param dansDeposit the DANS deposit - * @param updatesDataset the DOI of the dataset that needs to be updated, or null if the deposit is to create a new dataset + * @param dansDeposit the DANS deposit + * @param updatesDataset the DOI of the dataset that needs to be updated, or null if the deposit is to create a new dataset * @return the edit actions for the files in the dataset */ EditFiles getEditFilesFromDansDeposit(DansBagDeposit dansDeposit, String updatesDataset); @@ -78,11 +89,10 @@ public interface DansBagMappingService { EditPermissions getEditPermissionsFromDansDeposit(DansBagDeposit dansDeposit, String updatesDataset); /** - * Packages the original metadata of the DANS bag into a ZIP file and returns the local path to the ZIP file. + * Maps the DANS deposit to an update action for the dataset. This determines how to publish the dataset (as migrated or as new). * * @param dansDeposit the DANS deposit - * @return the local path to the ZIP file - * @throws IOException if there was an error reading the deposit or writing the ZIP file + * @return the update action for the dataset */ - String packageOriginalMetadata(DansBagDeposit dansDeposit) throws IOException; + UpdateAction getUpdateActionFromDansDeposit(DansBagDeposit dansDeposit); } diff --git a/src/main/java/nl/knaw/dans/dvingest/core/dansbag/DansBagMappingServiceImpl.java b/src/main/java/nl/knaw/dans/dvingest/core/dansbag/DansBagMappingServiceImpl.java index 5d9cb17..67dfb37 100644 --- a/src/main/java/nl/knaw/dans/dvingest/core/dansbag/DansBagMappingServiceImpl.java +++ b/src/main/java/nl/knaw/dans/dvingest/core/dansbag/DansBagMappingServiceImpl.java @@ -23,24 +23,38 @@ import nl.knaw.dans.dvingest.core.dansbag.deposit.FileInfo; import nl.knaw.dans.dvingest.core.dansbag.exception.InvalidDepositException; import nl.knaw.dans.dvingest.core.dansbag.mapper.DepositToDvDatasetMetadataMapper; +import nl.knaw.dans.dvingest.core.dansbag.mapper.mapping.Amd; import nl.knaw.dans.dvingest.core.dansbag.mapper.mapping.FileElement; import nl.knaw.dans.dvingest.core.dansbag.xml.XPathEvaluator; import nl.knaw.dans.dvingest.core.dansbag.xml.XmlReader; import nl.knaw.dans.dvingest.core.dansbag.xml.XmlReaderImpl; import nl.knaw.dans.dvingest.core.service.DataverseService; +import nl.knaw.dans.dvingest.core.yaml.Create; import nl.knaw.dans.dvingest.core.yaml.EditFiles; import nl.knaw.dans.dvingest.core.yaml.EditPermissions; +import nl.knaw.dans.dvingest.core.yaml.Expect; +import nl.knaw.dans.dvingest.core.yaml.Expect.State; +import nl.knaw.dans.dvingest.core.yaml.Init; +import nl.knaw.dans.dvingest.core.yaml.PublishAction; +import nl.knaw.dans.dvingest.core.yaml.ReleaseMigratedAction; +import nl.knaw.dans.dvingest.core.yaml.UpdateAction; import nl.knaw.dans.lib.dataverse.DataverseException; import nl.knaw.dans.lib.dataverse.model.RoleAssignment; import nl.knaw.dans.lib.dataverse.model.dataset.Dataset; import nl.knaw.dans.lib.dataverse.model.dataset.DatasetVersion; +import nl.knaw.dans.lib.dataverse.model.dataset.UpdateType; +import nl.knaw.dans.lib.dataverse.model.file.Checksum; +import nl.knaw.dans.lib.dataverse.model.file.DataFile; +import nl.knaw.dans.lib.dataverse.model.file.FileMeta; import nl.knaw.dans.lib.dataverse.model.user.AuthenticatedUser; import nl.knaw.dans.lib.util.ZipUtil; +import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.lang3.StringUtils; import org.joda.time.DateTime; import org.joda.time.format.DateTimeFormat; import org.joda.time.format.DateTimeFormatter; +import java.io.FileInputStream; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; @@ -59,6 +73,7 @@ @Slf4j public class DansBagMappingServiceImpl implements DansBagMappingService { + private static final String ORIGINAL_METADATA_ZIP = "original-metadata.zip"; private static final DateTimeFormatter yyyymmddPattern = DateTimeFormat.forPattern("YYYY-MM-dd"); private final DepositToDvDatasetMetadataMapper depositToDvDatasetMetadataMapper; @@ -119,6 +134,32 @@ else if (depositToDvDatasetMetadataMapper.isMigration()) { } } + @Override + public Init getInitFromDansDeposit(DansBagDeposit dansDeposit, boolean isUpdate) { + if (!isUpdate && depositToDvDatasetMetadataMapper.isMigration()) { + if (StringUtils.isBlank(dansDeposit.getDoi())) { + throw new IllegalArgumentException("Migration deposit must have a DOI"); + } + var create = new Create(); + var doi = dansDeposit.getDoi(); + if (!doi.startsWith("doi:")) { + doi = "doi:" + doi; + } + create.setImportPid(doi); + var init = new Init(); + init.setCreate(create); + return init; + } + else if (isUpdate) { + var expect = new Expect(); + expect.setState(State.released); + var init = new Init(); + init.setExpect(expect); + return init; + } + return null; + } + @Override public Dataset getDatasetMetadataFromDansDeposit(DansBagDeposit dansDeposit, DatasetVersion currentMetadata) { // TODO: rename to DatasetComposer en push the terms stuff into it as well. @@ -151,6 +192,14 @@ public Dataset getDatasetMetadataFromDansDeposit(DansBagDeposit dansDeposit, Dat @Override public EditFiles getEditFilesFromDansDeposit(DansBagDeposit dansDeposit, String updatesDataset) { var files = getFileInfo(dansDeposit); + if (!depositToDvDatasetMetadataMapper.isMigration()) { + try { + files.put(Path.of(ORIGINAL_METADATA_ZIP), createOriginalMetadataFileInfo(dansDeposit)); + } + catch (IOException e) { + throw new RuntimeException("Error creating original metadata zip", e); + } + } var dateAvailable = getDateAvailable(dansDeposit); if (updatesDataset == null) { return new EditFilesComposer(files, dateAvailable, fileExclusionPattern, embargoExclusions).composeEditFiles(); @@ -160,6 +209,24 @@ public EditFiles getEditFilesFromDansDeposit(DansBagDeposit dansDeposit, String } } + private FileInfo createOriginalMetadataFileInfo(DansBagDeposit dansDeposit) throws IOException { + var metadataDir = dansDeposit.getBagDir().resolve("metadata"); + var zipFile = dansDeposit.getBagDir().resolve("data/" + ORIGINAL_METADATA_ZIP); + ZipUtil.zipDirectory(metadataDir, zipFile, false); + var checksum = DigestUtils.sha1Hex(new FileInputStream(zipFile.toFile())); + var fileMeta = new FileMeta(); + fileMeta.setLabel(ORIGINAL_METADATA_ZIP); + var dataFile = new DataFile(); + dataFile.setFilename(ORIGINAL_METADATA_ZIP); + var dfChecksum = new Checksum(); + dfChecksum.setType("SHA-1"); + dfChecksum.setValue(checksum); + dataFile.setChecksum(dfChecksum); + fileMeta.setDataFile(dataFile); + fileMeta.setRestrict(false); + return new FileInfo(zipFile, checksum, false, fileMeta); + } + @Override public EditPermissions getEditPermissionsFromDansDeposit(DansBagDeposit dansDeposit, String updatesDataset) { if (updatesDataset == null) { @@ -177,12 +244,25 @@ public EditPermissions getEditPermissionsFromDansDeposit(DansBagDeposit dansDepo } @Override - public String packageOriginalMetadata(DansBagDeposit dansDeposit) throws IOException { - // Zip the contents of the metadata directory of the bag - var metadataDir = dansDeposit.getBagDir().resolve("metadata"); - var zipFile = dansDeposit.getBagDir().resolve("data/original-metadata.zip"); - ZipUtil.zipDirectory(metadataDir, zipFile, false); - return zipFile.toString(); + public UpdateAction getUpdateActionFromDansDeposit(DansBagDeposit dansDeposit) { + if (depositToDvDatasetMetadataMapper.isMigration()) { + var amd = dansDeposit.getAmd(); + + if (amd == null) { + throw new RuntimeException(String.format("no AMD found for %s", dansDeposit.getDoi())); + } + + var date = Amd.toPublicationDate(amd); + + if (date.isEmpty()) { + throw new IllegalArgumentException(String.format("no publication date found in AMD for %s", dansDeposit.getDoi())); + } + + return new ReleaseMigratedAction(date.get()); + } + else { + return new PublishAction(UpdateType.major); + } } // todo: move to mapping package diff --git a/src/main/java/nl/knaw/dans/dvingest/core/dansbag/DansDepositConverter.java b/src/main/java/nl/knaw/dans/dvingest/core/dansbag/DansDepositConverter.java index 2c4e928..9336b93 100644 --- a/src/main/java/nl/knaw/dans/dvingest/core/dansbag/DansDepositConverter.java +++ b/src/main/java/nl/knaw/dans/dvingest/core/dansbag/DansDepositConverter.java @@ -16,17 +16,24 @@ package nl.knaw.dans.dvingest.core.dansbag; import lombok.AllArgsConstructor; +import lombok.extern.slf4j.Slf4j; import nl.knaw.dans.dvingest.core.dansbag.deposit.DansBagDeposit; import nl.knaw.dans.dvingest.core.service.YamlService; import nl.knaw.dans.dvingest.core.yaml.EditFilesRoot; import nl.knaw.dans.dvingest.core.yaml.EditPermissionsRoot; -import nl.knaw.dans.dvingest.core.yaml.UpdateState; +import nl.knaw.dans.dvingest.core.yaml.InitRoot; +import nl.knaw.dans.dvingest.core.yaml.UpdateStateRoot; import nl.knaw.dans.lib.dataverse.model.dataset.DatasetVersion; +import org.apache.commons.io.FileUtils; import java.io.IOException; +import java.util.List; +@Slf4j @AllArgsConstructor public class DansDepositConverter { + private static final List YAML_FILES = List.of("init.yml", "dataset.yml", "edit-files.yml", "edit-permissions.yml", "update-state.yml"); + private final DansBagDeposit dansDeposit; private final String updatesDataset; private final DatasetVersion currentMetadata; @@ -34,8 +41,11 @@ public class DansDepositConverter { private final YamlService yamlService; public void run() throws IOException { - // TODO: pass to getEditFilesFromDansDeposit so that update-deposit can register it as a replaced file - var originalMetadataPath = mappingService.packageOriginalMetadata(dansDeposit); + deleteOldYamlFilesIfPresent(); + var init = mappingService.getInitFromDansDeposit(dansDeposit, updatesDataset != null); + if (init != null) { + yamlService.writeYaml(new InitRoot(init), dansDeposit.getBagDir().resolve("init.yml")); + } var dataset = mappingService.getDatasetMetadataFromDansDeposit(dansDeposit, currentMetadata); yamlService.writeYaml(dataset, dansDeposit.getBagDir().resolve("dataset.yml")); @@ -46,8 +56,20 @@ public void run() throws IOException { var editPermissions = mappingService.getEditPermissionsFromDansDeposit(dansDeposit, updatesDataset); yamlService.writeYaml(new EditPermissionsRoot(editPermissions), dansDeposit.getBagDir().resolve("edit-permissions.yml")); - var updateState = new UpdateState(); - updateState.setAction("publish-major"); - yamlService.writeYaml(updateState, dansDeposit.getBagDir().resolve("update-state.yml")); + var updateState = mappingService.getUpdateActionFromDansDeposit(dansDeposit); + yamlService.writeYaml(new UpdateStateRoot(updateState), dansDeposit.getBagDir().resolve("update-state.yml")); + } + + private void deleteOldYamlFilesIfPresent() { + log.debug("Starting with clean slate, deleting old YAML files if present"); + for (String file : YAML_FILES) { + var deleted = FileUtils.deleteQuietly(dansDeposit.getBagDir().resolve(file).toFile()); + if (deleted) { + log.debug("Deleted old YAML file: {}", file); + } + else { + log.debug("No old YAML file found or could not be deleted: {}", file); + } + } } } diff --git a/src/main/java/nl/knaw/dans/dvingest/core/dansbag/DansDepositSupport.java b/src/main/java/nl/knaw/dans/dvingest/core/dansbag/DansDepositSupport.java index cfc4bd7..ef1dfa4 100644 --- a/src/main/java/nl/knaw/dans/dvingest/core/dansbag/DansDepositSupport.java +++ b/src/main/java/nl/knaw/dans/dvingest/core/dansbag/DansDepositSupport.java @@ -28,11 +28,15 @@ import nl.knaw.dans.dvingest.core.dansbag.exception.RejectedDepositException; import nl.knaw.dans.dvingest.core.service.DataverseService; import nl.knaw.dans.dvingest.core.service.YamlService; +import nl.knaw.dans.dvingest.core.yaml.PublishAction; +import nl.knaw.dans.dvingest.core.yaml.ReleaseMigratedAction; import nl.knaw.dans.lib.dataverse.DataverseException; import nl.knaw.dans.lib.dataverse.model.dataset.DatasetVersion; import java.io.IOException; +import java.nio.file.Files; import java.nio.file.Path; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.UUID; @@ -52,8 +56,8 @@ public class DansDepositSupport implements Deposit { public DansDepositSupport(DataverseIngestDeposit dataverseIngestDeposit, ValidateDansBagService validateDansBagService, DansBagMappingService dansBagMappingService, DataverseService dataverseService, YamlService yamlService) { - this.validateDansBagService = validateDansBagService; this.ingestDataverseIngestDeposit = dataverseIngestDeposit; + this.validateDansBagService = validateDansBagService; this.dansBagMappingService = dansBagMappingService; this.dataverseService = dataverseService; this.yamlService = yamlService; @@ -63,6 +67,12 @@ public DansDepositSupport(DataverseIngestDeposit dataverseIngestDeposit, Validat catch (IOException e) { throw new RuntimeException("Error reading bags", e); } + try { + Files.deleteIfExists(dataverseIngestDeposit.getBags().get(0).getDataDir().resolve("original-metadata.zip")); + } + catch (IOException e) { + throw new RuntimeException("Error deleting original-metadata.zip", e); + } } @Override @@ -112,39 +122,37 @@ public Path getLocation() { public void onSuccess(@NonNull String pid, String message) { try { var bag = ingestDataverseIngestDeposit.getBags().get(0); - var action = bag.getUpdateState().getAction(); - if (action.startsWith("publish")) { - try { - var nbn = dataverseService.getDatasetUrnNbn(pid); - ingestDataverseIngestDeposit.updateProperties(Map.of( - "state.label", "PUBLISHED", - "state.description", "The dataset is published", - "identifier.doi", pid, - "identifier.urn", nbn - ) - ); - } - catch (IOException | DataverseException e) { - throw new RuntimeException("Error getting URN:NBN", e); // Cancelling the "success" - } + var action = bag.getUpdateState(); + + if (action instanceof PublishAction) { + handlePublishAction(pid, false); } - else if (action.equals("submit-for-review")) { - ingestDataverseIngestDeposit.updateProperties(Map.of( - "state.label", "ACCEPTED", - "state.description", "The dataset is submitted for review", - "identifier.doi", pid - ) - ); + else if (action instanceof ReleaseMigratedAction) { + handlePublishAction(pid, true); } else { throw new RuntimeException("Unknown update action: " + action); } } - catch (IOException e) { - throw new RuntimeException("Error reading bag", e); + catch (IOException | ConfigurationException e) { + throw new RuntimeException("Error processing onSuccess", e); + } + } + + private void handlePublishAction(String pid, boolean isMigration) { + try { + var nbn = dataverseService.getDatasetUrnNbn(pid); + var newProps = new HashMap(); + newProps.put("state.label", "PUBLISHED"); + newProps.put("state.description", "The dataset is published"); + if (!isMigration) { + newProps.put("identifier.doi", pid); + newProps.put("identifier.urn", nbn); + } + ingestDataverseIngestDeposit.updateProperties(newProps); } - catch (ConfigurationException e) { - throw new RuntimeException(e); + catch (IOException | DataverseException e) { + throw new RuntimeException("Error getting URN:NBN", e); } } @@ -177,6 +185,7 @@ public void validate() { if (!result.getIsCompliant()) { throw new RejectedDepositException(ingestDataverseIngestDeposit, objectMapper.writeValueAsString(result)); } + log.debug("Validation successful. Bag is compliant."); } catch (IOException e) { throw new RuntimeException(e); diff --git a/src/main/java/nl/knaw/dans/dvingest/core/service/DataverseService.java b/src/main/java/nl/knaw/dans/dvingest/core/service/DataverseService.java index 8f8c36c..7475819 100644 --- a/src/main/java/nl/knaw/dans/dvingest/core/service/DataverseService.java +++ b/src/main/java/nl/knaw/dans/dvingest/core/service/DataverseService.java @@ -74,4 +74,10 @@ public interface DataverseService { List findDoiByMetadataField(String fieldName, String value) throws IOException, DataverseException; DatasetVersion getDatasetMetadata(String pid) throws IOException, DataverseException; + + String getDatasetState(String targetPid) throws IOException, DataverseException; + + void importDataset(String pid, Dataset dataset) throws IOException, DataverseException; + + void releaseMigratedDataset(String pid, String date) throws DataverseException, IOException; } diff --git a/src/main/java/nl/knaw/dans/dvingest/core/service/DataverseServiceImpl.java b/src/main/java/nl/knaw/dans/dvingest/core/service/DataverseServiceImpl.java index 4434189..694adce 100644 --- a/src/main/java/nl/knaw/dans/dvingest/core/service/DataverseServiceImpl.java +++ b/src/main/java/nl/knaw/dans/dvingest/core/service/DataverseServiceImpl.java @@ -211,6 +211,26 @@ public DatasetVersion getDatasetMetadata(String pid) throws IOException, Dataver return dataverseClient.dataset(pid).getVersion().getData(); } + @Override + public String getDatasetState(String pid) throws IOException, DataverseException { + return dataverseClient.dataset(pid).getVersion(Version.LATEST.toString(), true).getData().getVersionState(); + } + + @Override + public void importDataset(String pid, Dataset dataset) throws IOException, DataverseException { + log.debug("Start importing dataset for deposit {}", pid); + var result = dataverseClient.dataverse("root").importDataset(dataset, pid, false, metadataKeys); + log.debug(result.getEnvelopeAsString()); + log.debug("End importing dataset for deposit {}", pid); + } + + @Override + public void releaseMigratedDataset(String pid, String date) throws DataverseException, IOException { + var datePublishJsonLd = String.format("{\"http://schema.org/datePublished\": \"%s\"}", date); + var result = dataverseClient.dataset(pid).releaseMigrated(datePublishJsonLd, true); + log.debug(result.getEnvelopeAsString()); + } + // TODO: move this to dans-dataverse-client-lib; it is similar to awaitLockState. public void waitForState(String datasetId, String expectedState) { var numberOfTimesTried = 0; @@ -242,10 +262,4 @@ public void waitForState(String datasetId, String expectedState) { throw new RuntimeException(e); } } - - private String getDatasetState(String datasetId) throws IOException, DataverseException { - var version = dataverseClient.dataset(datasetId).getVersion(Version.LATEST.toString(), true); - return version.getData().getVersionState(); - - } } diff --git a/src/main/java/nl/knaw/dans/dvingest/core/service/UtilityServices.java b/src/main/java/nl/knaw/dans/dvingest/core/service/UtilityServices.java index 904e217..2d4428d 100644 --- a/src/main/java/nl/knaw/dans/dvingest/core/service/UtilityServices.java +++ b/src/main/java/nl/knaw/dans/dvingest/core/service/UtilityServices.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.nio.file.Path; import java.util.Map; +import java.util.Optional; public interface UtilityServices { @@ -28,4 +29,6 @@ public interface UtilityServices { PathIteratorZipperBuilder createPathIteratorZipperBuilder(); PathIteratorZipperBuilder createPathIteratorZipperBuilder(Map renameMap); + + Optional wrapIfZipFile(Path path) throws IOException; } diff --git a/src/main/java/nl/knaw/dans/dvingest/core/service/UtilityServicesImpl.java b/src/main/java/nl/knaw/dans/dvingest/core/service/UtilityServicesImpl.java index feecf7b..e33fed0 100644 --- a/src/main/java/nl/knaw/dans/dvingest/core/service/UtilityServicesImpl.java +++ b/src/main/java/nl/knaw/dans/dvingest/core/service/UtilityServicesImpl.java @@ -16,19 +16,34 @@ package nl.knaw.dans.dvingest.core.service; import lombok.Builder; +import lombok.extern.slf4j.Slf4j; +import net.lingala.zip4j.ZipFile; +import net.lingala.zip4j.model.ZipParameters; +import net.lingala.zip4j.model.enums.CompressionMethod; import nl.knaw.dans.lib.util.PathIteratorZipper; import nl.knaw.dans.lib.util.PathIteratorZipper.PathIteratorZipperBuilder; +import org.apache.tika.Tika; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.UUID; +@Slf4j @Builder public class UtilityServicesImpl implements UtilityServices { + private final Set needToBeZipWrapped = Set.of( + "application/zip", + "application/zipped-shapefile", + "application/fits-gzipped" + ); private final Path tempDir; private final int maxNumberOfFilesPerUpload; private final long maxUploadSize; + private final Tika tika = new Tika(); @Override public Path createTempZipFile() throws IOException { @@ -52,4 +67,51 @@ public PathIteratorZipperBuilder createPathIteratorZipperBuilder(Map wrapIfZipFile(Path path) throws IOException { + if (needsToBeWrapped(path)) { + var filename = Optional.ofNullable(path.getFileName()) + .map(Path::toString) + .orElse(""); + + var randomName = String.format("zip-wrapped-%s-%s.zip", + filename, UUID.randomUUID()); + + var tempFile = tempDir.resolve(randomName); + + try (var zip = new ZipFile(tempFile.toFile())) { + zip.addFile(path.toFile(), zipWithoutCompressing()); + } + + return Optional.of(tempFile); + } + else { + return Optional.empty(); + } + } + + private ZipParameters zipWithoutCompressing() { + var params = new ZipParameters(); + params.setCompressionMethod(CompressionMethod.STORE); + return params; + } + + private boolean needsToBeWrapped(Path path) throws IOException { + var endsWithZip = Optional.ofNullable(path.getFileName()) + .map(Path::toString) + .map(x -> x.endsWith(".zip")) + .orElse(false); + + log.debug("Checking if path {} needs to be wrapped: endsWithZip={}", path, endsWithZip); + + return endsWithZip || needToBeZipWrapped.contains(getMimeType(path)); + } + + private String getMimeType(Path path) throws IOException { + String result = tika.detect(path); + log.debug("MimeType of path {} is {}", path, result); + return result; + } + } diff --git a/src/main/java/nl/knaw/dans/dvingest/core/service/YamlServiceImpl.java b/src/main/java/nl/knaw/dans/dvingest/core/service/YamlServiceImpl.java index dfaed55..69bc0b6 100644 --- a/src/main/java/nl/knaw/dans/dvingest/core/service/YamlServiceImpl.java +++ b/src/main/java/nl/knaw/dans/dvingest/core/service/YamlServiceImpl.java @@ -25,10 +25,12 @@ import io.dropwizard.configuration.YamlConfigurationFactory; import lombok.Data; import lombok.extern.slf4j.Slf4j; +import nl.knaw.dans.dvingest.core.yaml.DataverseIngestModule; import nl.knaw.dans.dvingest.core.yaml.EditFilesRoot; import nl.knaw.dans.dvingest.core.yaml.EditMetadataRoot; import nl.knaw.dans.dvingest.core.yaml.EditPermissionsRoot; -import nl.knaw.dans.dvingest.core.yaml.UpdateState; +import nl.knaw.dans.dvingest.core.yaml.InitRoot; +import nl.knaw.dans.dvingest.core.yaml.UpdateStateRoot; import nl.knaw.dans.lib.dataverse.MetadataFieldDeserializer; import nl.knaw.dans.lib.dataverse.model.dataset.Dataset; import nl.knaw.dans.lib.dataverse.model.dataset.MetadataField; @@ -69,11 +71,13 @@ public YamlServiceImpl() { mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, true); mapper.addMixIn(FileMeta.class, FileMetaMixin.class); mapper.registerModule(module); + mapper.registerModule(new DataverseIngestModule()); + yamlConfigurationFactories.put(InitRoot.class, new YamlConfigurationFactory<>(InitRoot.class, factory.getValidator(), mapper, "dw")); yamlConfigurationFactories.put(Dataset.class, new YamlConfigurationFactory<>(Dataset.class, factory.getValidator(), mapper, "dw")); yamlConfigurationFactories.put(EditFilesRoot.class, new YamlConfigurationFactory<>(EditFilesRoot.class, factory.getValidator(), mapper, "dw")); yamlConfigurationFactories.put(EditMetadataRoot.class, new YamlConfigurationFactory<>(EditMetadataRoot.class, factory.getValidator(), mapper, "dw")); yamlConfigurationFactories.put(EditPermissionsRoot.class, new YamlConfigurationFactory<>(EditPermissionsRoot.class, factory.getValidator(), mapper, "dw")); - yamlConfigurationFactories.put(UpdateState.class, new YamlConfigurationFactory<>(UpdateState.class, factory.getValidator(), mapper, "dw")); + yamlConfigurationFactories.put(UpdateStateRoot.class, new YamlConfigurationFactory<>(UpdateStateRoot.class, factory.getValidator(), mapper, "dw")); } catch (Throwable e) { // This ctor is called from a static context, so we log the error to make sure it is not lost diff --git a/src/main/java/nl/knaw/dans/dvingest/core/yaml/Create.java b/src/main/java/nl/knaw/dans/dvingest/core/yaml/Create.java new file mode 100644 index 0000000..d3773d9 --- /dev/null +++ b/src/main/java/nl/knaw/dans/dvingest/core/yaml/Create.java @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2024 DANS - Data Archiving and Networked Services (info@dans.knaw.nl) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nl.knaw.dans.dvingest.core.yaml; + +import lombok.Data; + +@Data +public class Create { + private String importPid; +} diff --git a/src/main/java/nl/knaw/dans/dvingest/core/yaml/DataverseIngestModule.java b/src/main/java/nl/knaw/dans/dvingest/core/yaml/DataverseIngestModule.java new file mode 100644 index 0000000..0aad875 --- /dev/null +++ b/src/main/java/nl/knaw/dans/dvingest/core/yaml/DataverseIngestModule.java @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2024 DANS - Data Archiving and Networked Services (info@dans.knaw.nl) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nl.knaw.dans.dvingest.core.yaml; + +import com.fasterxml.jackson.databind.module.SimpleModule; + +public class DataverseIngestModule extends SimpleModule { + public DataverseIngestModule() { + addDeserializer(UpdateStateRoot.class, new UpdateStateRootDeserializer(UpdateStateRoot.class)); + addSerializer(UpdateStateRoot.class, new UpdateStateRootSerializer(UpdateStateRoot.class)); + } +} diff --git a/src/main/java/nl/knaw/dans/dvingest/core/yaml/Expect.java b/src/main/java/nl/knaw/dans/dvingest/core/yaml/Expect.java new file mode 100644 index 0000000..1211ba1 --- /dev/null +++ b/src/main/java/nl/knaw/dans/dvingest/core/yaml/Expect.java @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2024 DANS - Data Archiving and Networked Services (info@dans.knaw.nl) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nl.knaw.dans.dvingest.core.yaml; + +import io.dropwizard.util.Duration; +import lombok.Data; + +@Data +public class Expect { + public enum State { + draft, released, absent + } + + private State state; +} diff --git a/src/main/java/nl/knaw/dans/dvingest/core/yaml/Init.java b/src/main/java/nl/knaw/dans/dvingest/core/yaml/Init.java new file mode 100644 index 0000000..1025b0a --- /dev/null +++ b/src/main/java/nl/knaw/dans/dvingest/core/yaml/Init.java @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2024 DANS - Data Archiving and Networked Services (info@dans.knaw.nl) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nl.knaw.dans.dvingest.core.yaml; + +import lombok.Data; + +@Data +public class Init { + private Expect expect = new Expect(); + private Create create; +} diff --git a/src/main/java/nl/knaw/dans/dvingest/core/yaml/InitRoot.java b/src/main/java/nl/knaw/dans/dvingest/core/yaml/InitRoot.java new file mode 100644 index 0000000..7cb3d8e --- /dev/null +++ b/src/main/java/nl/knaw/dans/dvingest/core/yaml/InitRoot.java @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2024 DANS - Data Archiving and Networked Services (info@dans.knaw.nl) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nl.knaw.dans.dvingest.core.yaml; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@AllArgsConstructor +@NoArgsConstructor +public class InitRoot { + private Init init; +} diff --git a/src/main/java/nl/knaw/dans/dvingest/core/yaml/PublishAction.java b/src/main/java/nl/knaw/dans/dvingest/core/yaml/PublishAction.java new file mode 100644 index 0000000..30b6ad6 --- /dev/null +++ b/src/main/java/nl/knaw/dans/dvingest/core/yaml/PublishAction.java @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2024 DANS - Data Archiving and Networked Services (info@dans.knaw.nl) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nl.knaw.dans.dvingest.core.yaml; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; +import nl.knaw.dans.lib.dataverse.model.dataset.UpdateType; + +@Data +@AllArgsConstructor +@NoArgsConstructor +public class PublishAction implements UpdateAction { + private UpdateType updateType; +} diff --git a/src/main/java/nl/knaw/dans/dvingest/core/yaml/ReleaseMigratedAction.java b/src/main/java/nl/knaw/dans/dvingest/core/yaml/ReleaseMigratedAction.java new file mode 100644 index 0000000..7e205b9 --- /dev/null +++ b/src/main/java/nl/knaw/dans/dvingest/core/yaml/ReleaseMigratedAction.java @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2024 DANS - Data Archiving and Networked Services (info@dans.knaw.nl) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nl.knaw.dans.dvingest.core.yaml; + +import lombok.Data; +import lombok.NoArgsConstructor; + +import java.util.regex.Pattern; + +@Data +@NoArgsConstructor +public class ReleaseMigratedAction implements UpdateAction { + private static final Pattern DATE_PATTERN = Pattern.compile("\\d{4}-\\d{2}-\\d{2}"); + + private String releaseDate; + + public ReleaseMigratedAction(String releaseDate) { + setReleaseDate(releaseDate); + } + + public void setReleaseDate(String releaseDate) { + if (!DATE_PATTERN.matcher(releaseDate).matches()) { + throw new IllegalArgumentException("Release date must be in the format YYYY-MM-DD"); + } + this.releaseDate = releaseDate; + } +} \ No newline at end of file diff --git a/src/main/java/nl/knaw/dans/dvingest/core/yaml/UpdateAction.java b/src/main/java/nl/knaw/dans/dvingest/core/yaml/UpdateAction.java new file mode 100644 index 0000000..9786a30 --- /dev/null +++ b/src/main/java/nl/knaw/dans/dvingest/core/yaml/UpdateAction.java @@ -0,0 +1,19 @@ +/* + * Copyright (C) 2024 DANS - Data Archiving and Networked Services (info@dans.knaw.nl) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nl.knaw.dans.dvingest.core.yaml; + +public interface UpdateAction { +} diff --git a/src/main/java/nl/knaw/dans/dvingest/core/yaml/UpdateState.java b/src/main/java/nl/knaw/dans/dvingest/core/yaml/UpdateState.java index a6629f4..084a94e 100644 --- a/src/main/java/nl/knaw/dans/dvingest/core/yaml/UpdateState.java +++ b/src/main/java/nl/knaw/dans/dvingest/core/yaml/UpdateState.java @@ -17,8 +17,9 @@ import lombok.Data; +import java.util.Map; + @Data public class UpdateState { - // TODO: convert to enum - private String action; + private Map action; } diff --git a/src/main/java/nl/knaw/dans/dvingest/core/yaml/UpdateStateRoot.java b/src/main/java/nl/knaw/dans/dvingest/core/yaml/UpdateStateRoot.java new file mode 100644 index 0000000..1e74774 --- /dev/null +++ b/src/main/java/nl/knaw/dans/dvingest/core/yaml/UpdateStateRoot.java @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2024 DANS - Data Archiving and Networked Services (info@dans.knaw.nl) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nl.knaw.dans.dvingest.core.yaml; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; + +import java.util.Map; + +@Data +@AllArgsConstructor +@NoArgsConstructor +public class UpdateStateRoot { + private UpdateAction updateState; +} diff --git a/src/main/java/nl/knaw/dans/dvingest/core/yaml/UpdateStateRootDeserializer.java b/src/main/java/nl/knaw/dans/dvingest/core/yaml/UpdateStateRootDeserializer.java new file mode 100644 index 0000000..d5f0e90 --- /dev/null +++ b/src/main/java/nl/knaw/dans/dvingest/core/yaml/UpdateStateRootDeserializer.java @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2024 DANS - Data Archiving and Networked Services (info@dans.knaw.nl) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nl.knaw.dans.dvingest.core.yaml; + +import com.fasterxml.jackson.core.JacksonException; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.deser.std.StdDeserializer; +import com.fasterxml.jackson.databind.node.ValueNode; +import nl.knaw.dans.lib.dataverse.model.dataset.UpdateType; + +import java.io.IOException; +import java.time.Instant; + +public class UpdateStateRootDeserializer extends StdDeserializer { + + protected UpdateStateRootDeserializer(Class vc) { + super(vc); + } + + @Override + public UpdateStateRoot deserialize(JsonParser p, DeserializationContext ctxt) throws IOException, JacksonException { + var node = p.getCodec().readTree(p); + var updateActionInfo = node.get("updateState"); + if (updateActionInfo == null) { + throw new IllegalArgumentException("No updateState found in the yaml"); + } + var publishAction = updateActionInfo.get("publish"); + var releaseMigratedAction = updateActionInfo.get("releaseMigrated"); + + if ((publishAction == null) == (releaseMigratedAction == null)) { + throw new IllegalArgumentException("Exactly one of publish or releaseMigrated must be set"); + } + + if (publishAction != null) { + ValueNode publishActionValue = (ValueNode) publishAction; + return new UpdateStateRoot(new PublishAction(UpdateType.valueOf(publishActionValue.asText().toLowerCase()))); + } + else { + ValueNode releaseMigratedActionValue = (ValueNode) releaseMigratedAction; + return new UpdateStateRoot(new ReleaseMigratedAction(releaseMigratedActionValue.asText())); + } + } +} diff --git a/src/main/java/nl/knaw/dans/dvingest/core/yaml/UpdateStateRootSerializer.java b/src/main/java/nl/knaw/dans/dvingest/core/yaml/UpdateStateRootSerializer.java new file mode 100644 index 0000000..3b8669b --- /dev/null +++ b/src/main/java/nl/knaw/dans/dvingest/core/yaml/UpdateStateRootSerializer.java @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2024 DANS - Data Archiving and Networked Services (info@dans.knaw.nl) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nl.knaw.dans.dvingest.core.yaml; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.fasterxml.jackson.databind.ser.std.StdSerializer; + +import java.io.IOException; + +public class UpdateStateRootSerializer extends StdSerializer { + protected UpdateStateRootSerializer(Class t) { + super(t); + } + + @Override + public void serialize(UpdateStateRoot value, JsonGenerator gen, SerializerProvider provider) throws IOException { + gen.writeStartObject(); // begin root + gen.writeFieldName("updateState"); // begin updateState + gen.writeStartObject(); + if (value.getUpdateState() instanceof PublishAction publishAction) { + gen.writeStringField("publish", publishAction.getUpdateType().toString()); + } + else if (value.getUpdateState() instanceof ReleaseMigratedAction releaseMigratedAction) { + gen.writeStringField("releaseMigrated", releaseMigratedAction.getReleaseDate().toString()); + } + gen.writeEndObject(); // end updateState + gen.writeEndObject(); // end root + } +} diff --git a/src/test/java/nl/knaw/dans/dvingest/core/bagprocessor/DatasetVersionCreatorTest.java b/src/test/java/nl/knaw/dans/dvingest/core/bagprocessor/DatasetVersionCreatorTest.java index 87401fa..735a146 100644 --- a/src/test/java/nl/knaw/dans/dvingest/core/bagprocessor/DatasetVersionCreatorTest.java +++ b/src/test/java/nl/knaw/dans/dvingest/core/bagprocessor/DatasetVersionCreatorTest.java @@ -38,7 +38,7 @@ public void createDatasetVersion_creates_a_new_dataset_if_targetPid_is_null() th // Given var depositId = UUID.randomUUID(); var dataset = new Dataset(); - DatasetVersionCreator datasetVersionCreator = new DatasetVersionCreator(depositId, dataverseServiceMock, dataset); + DatasetVersionCreator datasetVersionCreator = new DatasetVersionCreator(depositId, dataverseServiceMock, null, dataset); // When datasetVersionCreator.createDatasetVersion(null); @@ -53,7 +53,7 @@ public void createDatasetVersion_updates_the_dataset_if_targetPid_is_not_null() // Given var depositId = UUID.randomUUID(); var dataset = new Dataset(); - DatasetVersionCreator datasetVersionCreator = new DatasetVersionCreator(depositId, dataverseServiceMock, dataset); + DatasetVersionCreator datasetVersionCreator = new DatasetVersionCreator(depositId, dataverseServiceMock, null, dataset); // When datasetVersionCreator.createDatasetVersion("pid"); @@ -67,7 +67,7 @@ public void createDatasetVersion_updates_the_dataset_if_targetPid_is_not_null() public void createDatasetVersion_throws_IllegalArgumentException_if_dataset_is_null() { // Given var depositId = UUID.randomUUID(); - DatasetVersionCreator datasetVersionCreator = new DatasetVersionCreator(depositId, dataverseServiceMock, null); + DatasetVersionCreator datasetVersionCreator = new DatasetVersionCreator(depositId, dataverseServiceMock, null, null); // When // Then @@ -80,7 +80,7 @@ public void createDatasetVersion_throws_IllegalArgumentException_if_dataset_is_n public void createDatasetVersion_is_noop_if_dataset_is_null_and_targetPid_is_not_null() throws Exception { // Given var depositId = UUID.randomUUID(); - DatasetVersionCreator datasetVersionCreator = new DatasetVersionCreator(depositId, dataverseServiceMock, null); + DatasetVersionCreator datasetVersionCreator = new DatasetVersionCreator(depositId, dataverseServiceMock, null, null); // When datasetVersionCreator.createDatasetVersion("pid"); @@ -96,14 +96,14 @@ public void ctor_throws_NullPointerException_if_dataverseService_is_null() { var depositId = UUID.randomUUID(); // When // Then - assertThatThrownBy(() -> new DatasetVersionCreator(depositId, null, new Dataset())) + assertThatThrownBy(() -> new DatasetVersionCreator(depositId, null, null, new Dataset())) .isInstanceOf(NullPointerException.class); } // Throws NullPointerException if dataverseService is null @Test public void ctor_throws_NullPointerException_if_depositId_is_null() { - assertThatThrownBy(() -> new DatasetVersionCreator(null, dataverseServiceMock, new Dataset())) + assertThatThrownBy(() -> new DatasetVersionCreator(null, dataverseServiceMock, null, new Dataset())) .isInstanceOf(NullPointerException.class); } } diff --git a/src/test/java/nl/knaw/dans/dvingest/core/yaml/ExpectTest.java b/src/test/java/nl/knaw/dans/dvingest/core/yaml/ExpectTest.java new file mode 100644 index 0000000..7e54338 --- /dev/null +++ b/src/test/java/nl/knaw/dans/dvingest/core/yaml/ExpectTest.java @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2024 DANS - Data Archiving and Networked Services (info@dans.knaw.nl) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nl.knaw.dans.dvingest.core.yaml; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.exc.InvalidFormatException; +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; +import nl.knaw.dans.dvingest.core.yaml.Expect.State; +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +public class ExpectTest { + private static final ObjectMapper MAPPER = new ObjectMapper(new YAMLFactory()); + + @Test + public void existing_state_is_parsed_correctly() throws Exception { + var yaml = "state: draft"; + var expect = MAPPER.readValue(yaml, Expect.class); + + assertThat(expect.getState()).isEqualTo(State.draft); + } + + @Test + public void invalid_state_throw_exception() { + var yaml = "state: ambiguous"; + assertThatThrownBy(() -> MAPPER.readValue(yaml, Expect.class)) + .isInstanceOf(InvalidFormatException.class) + .hasMessageContaining("not one of the values accepted for Enum class: [draft, released, absent]"); + } + +} diff --git a/src/test/java/nl/knaw/dans/dvingest/core/yaml/UpdateStateTest.java b/src/test/java/nl/knaw/dans/dvingest/core/yaml/UpdateStateTest.java new file mode 100644 index 0000000..3127169 --- /dev/null +++ b/src/test/java/nl/knaw/dans/dvingest/core/yaml/UpdateStateTest.java @@ -0,0 +1,119 @@ +/* + * Copyright (C) 2024 DANS - Data Archiving and Networked Services (info@dans.knaw.nl) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nl.knaw.dans.dvingest.core.yaml; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; +import nl.knaw.dans.lib.dataverse.model.dataset.UpdateType; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +public class UpdateStateTest { + private ObjectMapper mapper; + + @BeforeEach + public void setUp() { + mapper = new ObjectMapper(new YAMLFactory()); + mapper.registerModule(new DataverseIngestModule()); + } + + @Test + public void publish_action_is_deserialized_correctly() throws Exception { + var yaml = """ + updateState: + publish: major + """; + + var updateState = mapper.readValue(yaml, UpdateStateRoot.class); + assertThat(updateState.getUpdateState()).isInstanceOf(PublishAction.class); + assertThat(((PublishAction) updateState.getUpdateState()).getUpdateType()).isEqualTo(UpdateType.major); + } + + @Test + public void publish_action_is_serialized_correctly() throws Exception { + var updateState = new UpdateStateRoot(new PublishAction(UpdateType.major)); + var yaml = mapper.writeValueAsString(updateState); + assertThat(yaml.trim()).isEqualTo(""" + --- + updateState: + publish: "major" + """.trim()); + } + + @Test + public void release_migrated_action_is_deserialized_correctly() throws Exception { + var yaml = """ + updateState: + releaseMigrated: 2021-09-01 + """; + + var updateState = mapper.readValue(yaml, UpdateStateRoot.class); + assertThat(updateState.getUpdateState()).isInstanceOf(ReleaseMigratedAction.class); + assertThat(((ReleaseMigratedAction) updateState.getUpdateState()).getReleaseDate()).isEqualTo("2021-09-01"); + } + + @Test + public void release_date_must_follow_yyyy_mmd_dd_pattern() { + var yaml = """ + updateState: + releaseMigrated: 2021-09-01T00:00:00Z + """; + + assertThatThrownBy(() -> mapper.readValue(yaml, UpdateStateRoot.class)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Release date must be in the format YYYY-MM-DD"); + } + + @Test + public void release_migrated_action_is_serialized_correctly() throws Exception { + var updateState = new UpdateStateRoot(new ReleaseMigratedAction("2021-09-01")); + var yaml = mapper.writeValueAsString(updateState); + assertThat(yaml.trim()).isEqualTo(""" + --- + updateState: + releaseMigrated: "2021-09-01" + """.trim()); + } + + @Test + public void both_publish_and_release_migrated_actions_are_not_allowed() { + var yaml = """ + updateState: + publish: major + releaseMigrated: 2021-09-01 + """; + + assertThatThrownBy(() -> mapper.readValue(yaml, UpdateStateRoot.class)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Exactly one of publish or releaseMigrated must be set"); + } + + @Test + public void neither_publish_nor_release_migrated_actions_are_not_allowed() { + var yaml = """ + updateState: {} + """; + + assertThatThrownBy(() -> mapper.readValue(yaml, UpdateStateRoot.class)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Exactly one of publish or releaseMigrated must be set"); + } + + +} diff --git a/src/test/resources/test-deposits/072625c6-c2a8-43a6-9f35-f49b2db9435c/1/init.yml b/src/test/resources/test-deposits/072625c6-c2a8-43a6-9f35-f49b2db9435c/1/init.yml new file mode 100644 index 0000000..9c1bbd5 --- /dev/null +++ b/src/test/resources/test-deposits/072625c6-c2a8-43a6-9f35-f49b2db9435c/1/init.yml @@ -0,0 +1,3 @@ +init: + create: + importPid: 'doi:10.5072/FK2/8QZQZM' diff --git a/src/test/resources/test-deposits/072625c6-c2a8-43a6-9f35-f49b2db9435c/2/init.yml b/src/test/resources/test-deposits/072625c6-c2a8-43a6-9f35-f49b2db9435c/2/init.yml new file mode 100644 index 0000000..f25cf9b --- /dev/null +++ b/src/test/resources/test-deposits/072625c6-c2a8-43a6-9f35-f49b2db9435c/2/init.yml @@ -0,0 +1,3 @@ +init: + expect: + state: 'released'