Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MET-5966: Align with latest harvesting changes in metis framework. #160

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
<properties>
<version.java>21</version.java>
<version.springdoc-openapi-starter-webmvc-ui>2.3.0</version.springdoc-openapi-starter-webmvc-ui>
<version.metis>12.2</version.metis>
<version.metis>13-SNAPSHOT</version.metis>
<version.metis.normalization>13</version.metis.normalization>
<version.spring.boot>3.2.3</version.spring.boot>
<version.europeana>2.16.7</version.europeana>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ public DatasetIdDto harvestDatasetFromFile(
DatasetMetadata datasetMetadata = DatasetMetadata.builder().withDatasetId(createdDatasetId)
.withDatasetName(datasetName).withCountry(country).withLanguage(language)
.withStepSize(stepsize).build();
harvestPublishService.runHarvestFileAsync(dataset, datasetMetadata, compressedFileExtension)
harvestPublishService.runHarvestProvidedFileAsync(dataset, datasetMetadata, compressedFileExtension)
.exceptionally(e -> datasetLogService.logException(createdDatasetId, e));
return new DatasetIdDto(createdDatasetId);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ public interface HarvestPublishService {
* if it reached the max number of records
* @throws ServiceException if file is not valid, error reading file, if records are empty
*/
CompletableFuture<Void> runHarvestFileAsync(MultipartFile file, DatasetMetadata datasetMetadata, CompressedFileExtension compressedFileExtension);
CompletableFuture<Void> runHarvestProvidedFileAsync(MultipartFile file, DatasetMetadata datasetMetadata, CompressedFileExtension compressedFileExtension);

/**
* Start the harvest of an url asynchronously on the given URL {@link String}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,17 +43,12 @@
}

@Override
public CompletableFuture<Void> runHarvestFileAsync(MultipartFile file, DatasetMetadata datasetMetadata,
public CompletableFuture<Void> runHarvestProvidedFileAsync(MultipartFile file, DatasetMetadata datasetMetadata,
CompressedFileExtension compressedFileExtension) {
try {
Record.RecordBuilder recordDataEncapsulated = Record.builder()
.datasetId(datasetMetadata.getDatasetId())
.datasetName(datasetMetadata.getDatasetName())
.country(datasetMetadata.getCountry())
.language(datasetMetadata.getLanguage());
harvestingParameterService.createDatasetHarvestingParameters(datasetMetadata.getDatasetId(),
new FileHarvestingDto(file.getOriginalFilename(), compressedFileExtension.name()));
return runHarvestFileAsync(file.getInputStream(), recordDataEncapsulated, datasetMetadata, compressedFileExtension);
return runHarvestFileAsync(file.getInputStream(), datasetMetadata, compressedFileExtension);
} catch (IOException e) {
throw new ServiceException("Error harvesting records from file " + file.getName(), e);
}
Expand All @@ -62,43 +57,40 @@
@Override
public CompletableFuture<Void> runHarvestHttpFileAsync(String url, DatasetMetadata datasetMetadata,
CompressedFileExtension compressedFileExtension) {
Record.RecordBuilder recordDataEncapsulated = Record.builder()
.datasetId(datasetMetadata.getDatasetId())
.datasetName(datasetMetadata.getDatasetName())
.country(datasetMetadata.getCountry())
.language(datasetMetadata.getLanguage());
harvestingParameterService.createDatasetHarvestingParameters(datasetMetadata.getDatasetId(), new HttpHarvestingDto(url));
return CompletableFuture.runAsync(() -> {
try (InputStream input = new URI(url).toURL().openStream()) {
harvestService.harvest(input, datasetMetadata.getDatasetId(), recordDataEncapsulated,
datasetMetadata.getStepSize(), compressedFileExtension);
} catch (UnknownHostException e) {
throw new ServiceException(HARVESTING_ERROR_MESSAGE + datasetMetadata.getDatasetId()
+ " - unknown host: " + e.getMessage());
} catch (IOException | URISyntaxException | HarvesterException e) {
throw new ServiceException(HARVESTING_ERROR_MESSAGE + datasetMetadata.getDatasetId(), e);
}
}, asyncServiceTaskExecutor);
try {

Check warning

Code scanning / SonarCloud

Server-side requests should not be vulnerable to forging attacks Medium

Change this code to not construct the URL from user-controlled data. See more on SonarCloud
final InputStream inputStreamToHarvest = new URI(url).toURL().openStream();
return runHarvestFileAsync(inputStreamToHarvest, datasetMetadata, compressedFileExtension);
} catch (UnknownHostException e) {
throw new ServiceException(HARVESTING_ERROR_MESSAGE + datasetMetadata.getDatasetId()
+ " - unknown host: " + e.getMessage());
} catch (IOException | URISyntaxException e) {
throw new ServiceException(HARVESTING_ERROR_MESSAGE + datasetMetadata.getDatasetId(), e);
}
}

private CompletableFuture<Void> runHarvestFileAsync(InputStream inputStreamToHarvest,
Record.RecordBuilder recordDataEncapsulated,
DatasetMetadata datasetMetadata,
CompressedFileExtension compressedFileExtension) {
DatasetMetadata datasetMetadata, CompressedFileExtension compressedFileExtension) {
return CompletableFuture.runAsync(() -> {
final Record.RecordBuilder recordDataEncapsulated = Record.builder()
.datasetId(datasetMetadata.getDatasetId())
.datasetName(datasetMetadata.getDatasetName())
.country(datasetMetadata.getCountry())
.language(datasetMetadata.getLanguage());
try {
harvestService.harvest(inputStreamToHarvest, datasetMetadata.getDatasetId(), recordDataEncapsulated,
harvestService.harvestFromCompressedArchive(inputStreamToHarvest,
datasetMetadata.getDatasetId(), recordDataEncapsulated,
datasetMetadata.getStepSize(), compressedFileExtension);
} catch (HarvesterException e) {
throw new ServiceException(HARVESTING_ERROR_MESSAGE + datasetMetadata.getDatasetId(), e);
} finally {
try {
inputStreamToHarvest.close();
} catch (IOException e) {
LOGGER.warn("Could not close input stream", e);
}
}
}, asyncServiceTaskExecutor).whenComplete((result, exception) -> {
try {
inputStreamToHarvest.close();
} catch (IOException e) {
LOGGER.warn("Could not close input stream", e);
}
});
}, asyncServiceTaskExecutor);
}

@Override
Expand All @@ -113,7 +105,7 @@
new OAIPmhHarvestingDto(oaiHarvestData.getUrl(), oaiHarvestData.getSetspec(),
oaiHarvestData.getMetadataformat()));
return CompletableFuture.runAsync(
() -> harvestService.harvestOaiPmh(datasetMetadata.getDatasetId(), recordDataEncapsulated, oaiHarvestData,
() -> harvestService.harvestFromOaiPmh(datasetMetadata.getDatasetId(), recordDataEncapsulated, oaiHarvestData,
datasetMetadata.getStepSize()), asyncServiceTaskExecutor);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,22 @@ public interface HarvestService {
* @param oaiHarvestData The object that encapsulate the necessary data for harvesting
* @param stepSize The step size to apply in the record selection
*/
void harvestOaiPmh(String datasetId, Record.RecordBuilder recordDataEncapsulated, OaiHarvestData oaiHarvestData,
Integer stepSize);
void harvestFromOaiPmh(String datasetId, Record.RecordBuilder recordDataEncapsulated,
OaiHarvestData oaiHarvestData, Integer stepSize);

/**
* Harvest the input stream {@link InputStream} with the given datasetId and data of the records
*
* @param inputStream The input stream to harvest from
* @param inputStream The input stream to harvest from - the caller is responsible
* for closing the stream after it has been consumed.
* @param datasetId The id of the dataset to be harvested
* @param recordDataEncapsulated The encapsulation of data to be used to harvest each record
* @param stepSize The step size to apply in the record selection
* @param compressedFileExtension The content type of the file being uploaded
* @throws HarvesterException In case an issue occurs while harvesting
*/
void harvest(InputStream inputStream, String datasetId, Record.RecordBuilder recordDataEncapsulated, Integer stepSize,
CompressedFileExtension compressedFileExtension) throws HarvesterException;
void harvestFromCompressedArchive(InputStream inputStream, String datasetId,
Record.RecordBuilder recordDataEncapsulated, Integer stepSize,
CompressedFileExtension compressedFileExtension) throws HarvesterException;

}
Loading
Loading