Skip to content
This repository has been archived by the owner on Sep 6, 2023. It is now read-only.

Commit

Permalink
Add option to apply filters to the handles to be transformed
Browse files Browse the repository at this point in the history
  • Loading branch information
abelgomez committed Mar 28, 2016
1 parent 86476e9 commit f6f85cb
Show file tree
Hide file tree
Showing 5 changed files with 105 additions and 59 deletions.
117 changes: 65 additions & 52 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,52 +1,65 @@
# Sistedes Digital Library Tools

Tools for the Sistedes digital library.

## es.sistedes.handle.generator

Generates a list of handle value lines to register the handles for the wordpress entries in the [Sistedes Digital Library](http://biblioteca.sistedes.es).

### Downloads

Find the releases at https://github.com/abelgomez/sistedes.biblioteca/releases

To run the tool, only the `generator.jar` file is needed.

### Command line options:

```
usage: java -jar <this-file.jar> -p <prefix> [-i <input file>] [-o <output file>] [-g] [-d]
-p,--prefix <prefix> Handle server's prefix (mandatory)
-i,--input <input file> The input file (optional, stdin will be used if no input file is specified)
-o,--output <output file> The output file (optional, stdout will be used if no input file is specified)
-g,--guid Use the guid tag instead of the link
-d,--add-delete Add delete statements before the creation
```

### How to use this tool

1. Step 1: Export the wordpress articles using the administrator GUI:

![Exporting articles](doc/export.png)

1. Step 2: Launch the generator:

````
$ java -jar generator.jar -p 11705 -i export.xml -o result.txt
$ cat result.txt
CREATE 11705/JISBD/2015/009
100 HS_ADMIN 86400 1110 ADMIN 300:111111111111:0.NA/11705
1 URL 86400 1110 UTF8 http://biblioteca.sistedes.es/articulo/un-indice-espacio-temporal-compacto-para-consultas-time-slice-y-time-interval/
```
It is also possible to run the command without the `-i` and `-o` arguments, and stdin and stdout will be used instead. This feature is specially useful when using the tool within shell scripts:
````
$ cat export.xml | java -jar generator.jar -p 11705
CREATE 11705/JISBD/2015/009
100 HS_ADMIN 86400 1110 ADMIN 300:111111111111:0.NA/11705
1 URL 86400 1110 UTF8 http://biblioteca.sistedes.es/articulo/un-indice-espacio-temporal-compacto-para-consultas-time-slice-y-time-interval/

```
# Sistedes Digital Library Tools

Tools for the Sistedes digital library.

## es.sistedes.handle.generator

Generates a list of handle value lines to register the handles for the wordpress entries in the [Sistedes Digital Library](http://biblioteca.sistedes.es).

### Downloads

Find the releases at https://github.com/abelgomez/sistedes.biblioteca/releases

To run the tool, only the `generator.jar` file is needed.

### Command line options:

```
usage: java -jar <this-file.jar> -p <prefix> [-i <input file>] [-o <output file>] [-g] [-d]
-p,--prefix <prefix> Handle server's prefix (mandatory)
-i,--input <input file> The input file (optional, stdin will be used if no input file is specified)
-o,--output <output file> The output file (optional, stdout will be used if no input file is specified)
-g,--guid Use the guid tag instead of the link
-d,--add-delete Add delete statements before the creation
-f,--filter <filter> Regular expression that the handles of the elements to be transformed must match
```

### How to use this tool

1. Step 1: Export the wordpress articles using the administrator GUI:

![Exporting articles](doc/export.png)

1. Step 2: Launch the generator:

````
$ java -jar generator.jar -p 11705 -i export.xml -o result.txt
$ cat result.txt
CREATE 11705/JISBD/2015/009
100 HS_ADMIN 86400 1110 ADMIN 300:111111111111:0.NA/11705
1 URL 86400 1110 UTF8 http://biblioteca.sistedes.es/articulo/un-indice-espacio-temporal-compacto-para-consultas-time-slice-y-time-interval/
```
It is also possible to run the command without the `-i` and `-o` arguments, and stdin and stdout will be used instead. This feature is specially useful when using the tool within shell scripts:
````
$ cat export.xml | java -jar generator.jar -p 11705
CREATE 11705/JISBD/2015/009
100 HS_ADMIN 86400 1110 ADMIN 300:111111111111:0.NA/11705
1 URL 86400 1110 UTF8 http://biblioteca.sistedes.es/articulo/un-indice-espacio-temporal-compacto-para-consultas-time-slice-y-time-interval/

```
An example using the regular expression filters could be:
````
$ java -jar generator.jar -p 11705 -i export.xml -f this-filter-does-not-match-anything
$ java -jar generator.jar -p 11705 -i export.xml -f 11705/JISBD/.*
CREATE 11705/JISBD/2015/009
100 HS_ADMIN 86400 1110 ADMIN 300:111111111111:0.NA/11705
1 URL 86400 1110 UTF8 http://biblioteca.sistedes.es/articulo/un-indice-espacio-temporal-compacto-para-consultas-time-slice-y-time-interval/
```
1 change: 1 addition & 0 deletions es.sistedes.handle.generator/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
/bin/
/example.xml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.util.Comparator;
import java.util.logging.Level;
Expand Down Expand Up @@ -41,7 +42,9 @@ public class CliLauncher {
private static final String USE_GUID_LONG = "guid";
private static final String ADD_DELETE = "d";
private static final String ADD_DELETE_LONG = "add-delete";

private static final String FILTER = "f";
private static final String FILTER_LONG = "filter";

private static final Options options = new Options();

static {
Expand Down Expand Up @@ -95,15 +98,18 @@ private static void run(String[] args) throws Exception {

conversor.putOption(ConversorOptions.USE_GUID, commandLine.hasOption(USE_GUID));
conversor.putOption(ConversorOptions.ADD_DELETE, commandLine.hasOption(ADD_DELETE));
if (commandLine.hasOption(FILTER)) {
conversor.putOption(ConversorOptions.FILTER, commandLine.getOptionValue(FILTER));
}

try {
conversor.generate();
} finally {
IOUtils.closeQuietly(input);
IOUtils.closeQuietly(output);
}
} catch (ConversionException e) {
printError(e.getLocalizedMessage());
} catch (ConversionException | FileNotFoundException e) {
printError("ERROR: " + e.getLocalizedMessage());
throw e;
}
}
Expand Down Expand Up @@ -152,11 +158,16 @@ private static void configureOptions(Options options) {
Option deleteOpt = Option.builder(ADD_DELETE).longOpt(ADD_DELETE_LONG)
.desc("Add delete statements before the creation").build();

Option filterOpt = Option.builder(FILTER).longOpt(FILTER_LONG).argName("filter")
.desc("Regular expression that the handles of the elements to be transformed must match").numberOfArgs(1)
.build();

options.addOption(prefixOpt);
options.addOption(inputOpt);
options.addOption(outputOpt);
options.addOption(guidOpt);
options.addOption(deleteOpt);
options.addOption(filterOpt);
}

/**
Expand All @@ -167,7 +178,7 @@ private static void configureOptions(Options options) {
* @param <T>
*/
private static class OptionComarator<T extends Option> implements Comparator<T> {
private static final String OPTS_ORDER = "piogdq";
private static final String OPTS_ORDER = "piogdf";

@Override
public int compare(T o1, T o2) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import java.util.regex.Pattern;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
Expand Down Expand Up @@ -139,12 +140,21 @@ public synchronized void generate() throws ConversionException {

for (int i = 0; i < list.getLength(); i++) {
Node node = list.item(i);
String handle = xpath.evaluate("postmeta[meta_key/text()='handle']/meta_value/text()", node);
if (filter() != null) {
// We use a regex in the Node instead of a XPath filter in
// the NodeList because Java only supports XPath 1 and the
// "matches" function has been introduced in XPath 2
Pattern pattern = Pattern.compile(filter());
if (!pattern.matcher(handle).matches()) {
continue;
}
}
vars.put(HandleVariables.handle.toString(), handle);
vars.put(HandleVariables.url.toString(),
useGuid ?
xpath.evaluate("guid/text()", node) :
xpath.evaluate("link/text()", node));
vars.put(HandleVariables.handle.toString(),
xpath.evaluate("postmeta[meta_key/text()='handle']/meta_value/text()", node));

if (addDelete) {
outputWriter.println(StrSubstitutor.replace(commands.get("command.delete"), vars));
Expand Down Expand Up @@ -178,6 +188,16 @@ private Boolean addDelete() {
private Boolean useGuid() {
return (Boolean) (options.get(ConversorOptions.USE_GUID) != null ? options.get(ConversorOptions.USE_GUID) : false);
}

/**
* Returns the {@link ConversorOptions#FILTER} option or <code>null</code>
* if no filter has been specified
*
* @return The filter or <code>null</code>
*/
private String filter() {
return (String) options.get(ConversorOptions.FILTER);
}

/**
* Loads the properties file containing the command strings
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
public enum ConversorOptions {

USE_GUID,
ADD_DELETE
ADD_DELETE,
FILTER

}

0 comments on commit f6f85cb

Please sign in to comment.