Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MET-5837 metis dataset cleaner application #118

Open
wants to merge 11 commits into
base: develop
Choose a base branch
from
3 changes: 3 additions & 0 deletions metis-dataset-cleaner/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
##Add to ignore to not commit by mistake
/src/main/resources/*.properties

3 changes: 3 additions & 0 deletions metis-dataset-cleaner/README.MD
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
**Tool to clean dataset by id on demand**
This project contains functionality to remove a dataset by id.
It is required to avoid automatically initializing indices generation, which could block read operations on a live environment.
123 changes: 123 additions & 0 deletions metis-dataset-cleaner/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>eu.europeana.metis</groupId>
<artifactId>metis-dataset-cleaner</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>jar</packaging>
<name>metis-dataset-cleaner</name>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<version.maven.compiler.plugin>3.8.1</version.maven.compiler.plugin>
<version.spring.boot>3.2.3</version.spring.boot>
<version.metis>12-SNAPSHOT</version.metis>
<version.corelib>2.16.8</version.corelib>
<!-- These two versions are interdependent. -->
<version.slf4j>1.7.30</version.slf4j>
<version.log4j>2.17.1</version.log4j>
</properties>

<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
<exclusions>
<exclusion>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-logging</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-log4j2</artifactId>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
<version>${version.log4j}</version>
</dependency>
<dependency>
<groupId>eu.europeana.metis</groupId>
<artifactId>metis-core-service</artifactId>
<version>${version.metis}</version>
</dependency>
<dependency>
<groupId>eu.europeana.metis</groupId>
<artifactId>metis-indexing</artifactId>
<version>${version.metis}</version>
</dependency>
<dependency>
<groupId>eu.europeana.corelib</groupId>
<artifactId>corelib-storage</artifactId>
<version>${version.corelib}</version>
</dependency>
</dependencies>

<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-dependencies</artifactId>
<version>${version.spring.boot}</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>${version.maven.compiler.plugin}</version>
<configuration>
<release>21</release>
</configuration>
</plugin>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<version>${version.spring.boot}</version>
<executions>
<execution>
<goals>
<goal>repackage</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>

<repositories>
<!--Release repositories-->
<repository>
<id>libs-release</id>
<name>libs-release</name>
<url>https://artifactory.eanadev.org/artifactory/libs-release</url>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
<repository>
<id>libs-snapshot</id>
<name>libs-snapshot</name>
<url>https://artifactory.eanadev.org/artifactory/libs-snapshot</url>
<releases>
<enabled>false</enabled>
</releases>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
</repositories>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package eu.europeana.metis.cleaner;

import eu.europeana.metis.cleaner.common.PropertyFileLoader;
import eu.europeana.metis.cleaner.utilities.IndexWrapper;
import java.util.Properties;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ApplicationInitializer {

private static final Logger LOGGER = LoggerFactory.getLogger(ApplicationInitializer.class);
private static final Properties indexingProperties = new Properties();
private final IndexWrapper indexWrapper;

public ApplicationInitializer() {
PropertyFileLoader.loadPropertyFile("",
"application.properties",
indexingProperties);
LOGGER.info("Indexing properties loaded.");
indexWrapper = IndexWrapper.getInstance(indexingProperties);
}

public IndexWrapper getIndexWrapper() {
return indexWrapper;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
package eu.europeana.metis.cleaner;

import eu.europeana.indexing.IndexingProperties;
import eu.europeana.metis.cleaner.common.TargetIndexingDatabase;
import java.io.FileInputStream;
import java.nio.charset.StandardCharsets;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.boot.ApplicationArguments;
import org.springframework.boot.ApplicationRunner;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;

@SpringBootApplication
public class MetisDatasetCleaner implements ApplicationRunner {

private static final Logger LOGGER = LoggerFactory.getLogger(MetisDatasetCleaner.class);

public static void main(String[] args) {
SpringApplication.run(MetisDatasetCleaner.class, args);
}

@Override
public void run(ApplicationArguments args) throws Exception {
LOGGER.info("Starting cleaning database script");
ApplicationInitializer applicationInitializer = new ApplicationInitializer();
// Usage help
LOGGER.info("Usage: where # is record or dataset id number.");
LOGGER.info(" where X is path to a rdf xml record ready for preview and publish.");
LOGGER.info(
"mvn spring-boot:run -Dspring-boot.run.arguments=\"--record.id=#\" or \"--dataset.id=#\" or --index.file=\"X\" or --index.files=\"Y,Z\"");
LOGGER.info("java -jar metis-dataset-cleaner-1.0-SNAPSHOT.jar --record.id=# or --dataset.id=#");
LOGGER.info("java -jar metis-dataset-cleaner-1.0-SNAPSHOT.jar --index.file=\"X\" or --index.files=\"Y,Z\"");

// Reading command-Line arguments
LOGGER.info("Application started with command-line arguments: {}", Arrays.toString(args.getSourceArgs()));
LOGGER.info("NonOptionArgs: {}", args.getNonOptionArgs());
LOGGER.info("OptionNames: {}", args.getOptionNames());

for (String name : args.getOptionNames()) {
LOGGER.info("arg-{}={}", name, args.getOptionValues(name));
}

final boolean containsRecordId = args.containsOption("record.id");
if (containsRecordId) {
LOGGER.info("::Contains record.id::");
final String recordId = args.getOptionValues("record.id").getFirst();
LOGGER.info("cleaning preview record.id: {}", recordId);
applicationInitializer.getIndexWrapper().getIndexer(TargetIndexingDatabase.PREVIEW).remove(recordId);
LOGGER.info("cleaning publish record.id: {}", recordId);
applicationInitializer.getIndexWrapper().getIndexer(TargetIndexingDatabase.PUBLISH).remove(recordId);
}

final boolean containsDatasetId = args.containsOption("dataset.id");
if (containsDatasetId) {
LOGGER.info("::Contains dataset.id::");
final String datasetId = args.getOptionValues("dataset.id").getFirst();
LOGGER.info("cleaning preview dataset.id: {}", datasetId);
applicationInitializer.getIndexWrapper().getIndexer(TargetIndexingDatabase.PREVIEW)
.removeAll(datasetId, Date.from(Instant.now()));
LOGGER.info("cleaning publish dataset.id: {}", datasetId);
applicationInitializer.getIndexWrapper().getIndexer(TargetIndexingDatabase.PUBLISH)
.removeAll(datasetId, Date.from(Instant.now()));
}

final boolean containsRecordFile = args.containsOption("index.file");
if (containsRecordFile) {
LOGGER.info("::Contains index.file::");
final String fileName = args.getOptionValues("index.file").getFirst();
try (FileInputStream fileInputStream = new FileInputStream(fileName)) {
final String fileData = new String(fileInputStream.readAllBytes(), StandardCharsets.UTF_8);
IndexingProperties indexingProperties = new IndexingProperties(Date.from(Instant.now()), true, null, true, true);
LOGGER.info("contents: {}", fileData);
LOGGER.info("indexing preview file: {}", fileName);
applicationInitializer.getIndexWrapper().getIndexer(TargetIndexingDatabase.PREVIEW)
.index(fileData, indexingProperties);
LOGGER.info("indexing publish file: {}", fileName);
applicationInitializer.getIndexWrapper().getIndexer(TargetIndexingDatabase.PUBLISH)
.index(fileData, indexingProperties);
}
}

final boolean containsRecordFiles = args.containsOption("index.files");
if (containsRecordFiles) {
LOGGER.info("::Contains index.files::");
final String fileNames = args.getOptionValues("index.files").getFirst();
List<String> recordList = new ArrayList<>(fileNames.split(",").length);
for (String fName : fileNames.split(",")) {
try (FileInputStream fileInputStream = new FileInputStream(fName)) {
final String fileData = new String(fileInputStream.readAllBytes(), StandardCharsets.UTF_8);
LOGGER.info("contents: {}", fileData);
recordList.add(fileData);
}
}
IndexingProperties indexingProperties = new IndexingProperties(Date.from(Instant.now()), true, null, true, true);
LOGGER.info("indexing preview record list");
applicationInitializer.getIndexWrapper().getIndexer(TargetIndexingDatabase.PREVIEW)
.index(recordList, indexingProperties);
LOGGER.info("indexing publish record list");
applicationInitializer.getIndexWrapper().getIndexer(TargetIndexingDatabase.PUBLISH)
.index(recordList, indexingProperties);
}

LOGGER.info("Finished cleaning database script");
System.exit(0);
}
}
Loading