Skip to content

Commit

Permalink
Finished implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
janvanmansum committed Oct 25, 2024
1 parent 550fb34 commit acaf755
Show file tree
Hide file tree
Showing 8 changed files with 357 additions and 40 deletions.
7 changes: 6 additions & 1 deletion src/main/java/nl/knaw/dans/dvcli/action/BatchProcessor.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
@Slf4j
public class BatchProcessor<I, R> {
/**
* The labeled items to process.
* The labeled items to process. The String is the label, <code>I</code> is the item.
*/
@NonNull
private final Stream<Pair<String, I>> labeledItems;
Expand Down Expand Up @@ -69,6 +69,11 @@ public BatchProcessorBuilder<I, R> labeledItems(Collection<Pair<String, I>> item
this.numberOfItems = (long) items.size();
return this;
}

public BatchProcessorBuilder<I, R> labeledItems(Stream<Pair<String, I>> items) {
this.labeledItems = items;
return this;
}
}

public void process() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@
import java.io.IOException;
import java.util.List;

/**
*
* @param <T>
*/
public abstract class AbstractSubcommandContainer<T> extends AbstractCmd {
private static final long DEFAULT_DELAY = 1000;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,26 @@
*/
package nl.knaw.dans.dvcli.command.dataset;

import lombok.Value;
import nl.knaw.dans.dvcli.action.Pair;
import nl.knaw.dans.dvcli.action.ThrowingFunction;
import nl.knaw.dans.dvcli.command.AbstractCmd;
import nl.knaw.dans.dvcli.inputparsers.FieldValuesParamsFileParser;
import nl.knaw.dans.dvcli.inputparsers.FieldValuesParser;
import nl.knaw.dans.lib.dataverse.DatasetApi;
import nl.knaw.dans.lib.dataverse.model.dataset.FieldList;
import nl.knaw.dans.lib.dataverse.model.dataset.MetadataField;
import picocli.CommandLine.ArgGroup;
import picocli.CommandLine.Command;
import picocli.CommandLine.Option;
import picocli.CommandLine.ParentCommand;

import java.nio.file.Path;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Set;
import java.util.stream.Stream;

@Command(name = "delete-metadata",
mixinStandardHelpOptions = true,
Expand Down Expand Up @@ -54,12 +63,52 @@ static class FieldValueOrParameterFile {
@ArgGroup(multiplicity = "1")
private FieldValueOrParameterFile fieldValueOrParameterFile;

private static class DeleteMetadataAction implements ThrowingFunction<DeleteMetadataParams, String, Exception> {
@Override
public String apply(DeleteMetadataParams deleteMetadataParams) throws Exception {
var fieldList = new FieldList(deleteMetadataParams.fieldValues.stream().toList());
deleteMetadataParams.api.deleteMetadata(fieldList, Collections.emptyMap());
return "Delete metadata";
}
}

@Value
private static class DeleteMetadataParams {
DatasetApi api;
Set<MetadataField> fieldValues;
}

@Override
public void doCall() throws Exception {
var metadataFields = new FieldValuesParser(fieldValueOrParameterFile.fieldValues).parse();
datasetCmd.batchProcessor(d -> {
d.deleteMetadata(new FieldList(metadataFields), Collections.emptyMap());
return "Delete metadata";
}).process();
datasetCmd.<DeleteMetadataParams> paramsBatchProcessorBuilder()
.labeledItems(getLabeledItems())
.action(new DeleteMetadataAction())
.build()
.process();
}

private Stream<Pair<String, DeleteMetadataParams>> getLabeledItems() {
try {
if (fieldValueOrParameterFile.fieldValues != null) {
var keyValues = new HashMap<String, String>();
for (var fieldValue : fieldValueOrParameterFile.fieldValues) {
var split = fieldValue.split("=", 2);
keyValues.put(split[0], split[1]);
}
return datasetCmd.getItems().stream()
.map(p -> new Pair<>(p.getFirst(), new FieldValuesParser(keyValues).parse()))
.map(p -> new Pair<>(p.getFirst(), new DeleteMetadataParams(datasetCmd.getDataverseClient().dataset(p.getFirst()), p.getSecond())));

}
else if (fieldValueOrParameterFile.parametersFile != null) {
return new FieldValuesParamsFileParser(fieldValueOrParameterFile.parametersFile)
.parse()
.map(p -> new Pair<>(p.getFirst(), new DeleteMetadataParams(datasetCmd.getDataverseClient().dataset(p.getFirst()), p.getSecond())));
}
}
catch (Exception e) {
throw new RuntimeException("Error parsing field values or parameter file.", e);
}
throw new IllegalArgumentException("No field values or parameter file specified.");
}
}
45 changes: 45 additions & 0 deletions src/main/java/nl/knaw/dans/dvcli/inputparsers/CsvStream.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* Copyright (C) 2024 DANS - Data Archiving and Networked Services ([email protected])
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nl.knaw.dans.dvcli.inputparsers;

import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

public class CsvStream {
private final Path csvFile;

public CsvStream(Path csvFile) {
this.csvFile = csvFile;
}

public Stream<CSVRecord> stream() throws IOException {
CSVParser parser = CSVParser.parse(csvFile, StandardCharsets.UTF_8, CSVFormat.DEFAULT.builder().setSkipHeaderRecord(true).build());
return StreamSupport.stream(parser.spliterator(), false).onClose(() -> {
try {
parser.close();
} catch (IOException e) {
e.printStackTrace();
}
});
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/*
* Copyright (C) 2024 DANS - Data Archiving and Networked Services ([email protected])
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nl.knaw.dans.dvcli.inputparsers;

import lombok.AllArgsConstructor;
import nl.knaw.dans.dvcli.action.Pair;
import nl.knaw.dans.lib.dataverse.model.dataset.MetadataField;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

@AllArgsConstructor
public class FieldValuesParamsFileParser {
private final Path csvFile;

/**
* Parse the parameters file and return a stream of pairs of PID and a list of MetadataFields. The client is expected to process the list of MetadataFields for the given PID. Note that the
* MetadataField class actually represents a metadata field value, not merely field definition (although it does contain the field definition, such as repeatability).
*
* The parameters file must have the following format:
*
* <pre>
* PID,field1,parentField1*.subfieldA,parentField1*.subfieldB <-- the header
* doi:10.5072/dans-2a3-4b5,foo,bar,baz <-- a row
* doi:10.5072/dans-2a3-4b5,foo,bar,baz <-- another row
* </pre>
*
* The asterisk (*) is used to indicate that the field is multi-value, i.e. repeatable.
*
* @return a stream of pairs of PID and a list of MetadataFields
*/
public Stream<Pair<String, Set<MetadataField>>> parse() {
try {
CSVParser parser = CSVParser.parse(csvFile, StandardCharsets.UTF_8,
CSVFormat.DEFAULT.builder()
.setHeader()
.setSkipHeaderRecord(true).build());
return StreamSupport.stream(parser.spliterator(), false).onClose(() -> {
try {
parser.close();
}
catch (IOException e) {
throw new RuntimeException(e);
}
}).map(record -> parseRecord(record, new HashSet<>(parser.getHeaderMap().keySet())));
}
catch (IOException e) {
throw new RuntimeException(e);
}
}

private Pair<String, Set<MetadataField>> parseRecord(CSVRecord record, Set<String> headers) {
String pid = record.get("PID");
if (pid == null || pid.isBlank()) {
throw new IllegalArgumentException("PID is missing in the parameters file");
}

Map<String, String> keyValues = new HashMap<>();
for (String header : headers) {
if (header.equals("PID")) {
continue;
}
keyValues.put(header, record.get(header));
}

return new Pair<>(pid, new FieldValuesParser(keyValues).parse());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,22 @@
import nl.knaw.dans.lib.dataverse.model.dataset.MetadataField;
import nl.knaw.dans.lib.dataverse.model.dataset.PrimitiveMultiValueField;
import nl.knaw.dans.lib.dataverse.model.dataset.PrimitiveSingleValueField;
import org.apache.commons.lang3.StringUtils;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

@AllArgsConstructor
public class FieldValuesParser {
private final List<String> values;
private final Map<String, String> keyValues;

public List<MetadataField> parse() {
Map<String, String> keyValues = new HashMap<>();

for (var value : values) {
String[] split = value.split("=", 2);
keyValues.put(checkValidName(split[0]), split[1]);
public Set<MetadataField> parse() {
for (var key : keyValues.keySet()) {
checkValidName(key);
}

Map<String, Map<String, String>> compoundFields = new HashMap<>();
Expand All @@ -56,42 +56,55 @@ public List<MetadataField> parse() {
keyValues.remove(key);
}

List<MetadataField> result = new ArrayList<>();
Set<MetadataField> result = new HashSet<>();

for (var key : keyValues.keySet()) {
if (key.endsWith("*")) {
result.add(new PrimitiveMultiValueField(key.substring(0, key.length() - 1), List.of(keyValues.get(key))));
}
else {
result.add(new PrimitiveSingleValueField(key, keyValues.get(key)));
if (StringUtils.isNotBlank(keyValues.get(key))) {
if (key.endsWith("*")) {
result.add(new PrimitiveMultiValueField(key.substring(0, key.length() - 1), List.of(keyValues.get(key))));
}
else {
result.add(new PrimitiveSingleValueField(key, keyValues.get(key)));
}
}
}

for (var parent : compoundFields.keySet()) {
Map<String, String> subfields = compoundFields.get(parent);
if (parent.endsWith("*")) {
var builder = new CompoundFieldBuilder(parent.substring(0, parent.length() - 1), true);
boolean hasValues = false;
for (var subfield : subfields.keySet()) {
builder.addSubfield(subfield, subfields.get(subfield));
if (StringUtils.isNotBlank(subfields.get(subfield))) {
builder.addSubfield(subfield, subfields.get(subfield));
hasValues = true;
}
}
if (hasValues) {
result.add(builder.build());
}
result.add(builder.build());
}
else {
var builder = new CompoundFieldBuilder(parent, false);
boolean hasValues = false;
for (var subfield : subfields.keySet()) {
builder.addSubfield(subfield, subfields.get(subfield));
if (StringUtils.isNotBlank(subfields.get(subfield))) {
builder.addSubfield(subfield, subfields.get(subfield));
hasValues = true;
}
}
if (hasValues) {
result.add(builder.build());
}
result.add(builder.build());
}
}

return result;
}

private String checkValidName(String name) {
private void checkValidName(String name) {
if (!name.matches("[a-zA-Z0-9]+\\*?(\\.[a-zA-Z0-9]+)?")) {
throw new IllegalArgumentException("Invalid field name: " + name);
}
return name;
}
}
Loading

0 comments on commit acaf755

Please sign in to comment.