Skip to content

Commit

Permalink
Merge pull request #906 from NASA-PDS/issue_895
Browse files Browse the repository at this point in the history
Refactored `validate-refs` to use new OpenSearch Serverless and registry-common library
  • Loading branch information
jordanpadams authored Nov 25, 2024
2 parents 40cf6aa + be4d470 commit 63a5026
Show file tree
Hide file tree
Showing 71 changed files with 169 additions and 758 deletions.
18 changes: 4 additions & 14 deletions .secrets.baseline
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,9 @@
{
"path": "detect_secrets.filters.regex.should_exclude_file",
"pattern": [
"\\.secrets..*",
"\\.git.*",
"\\.pre-commit-config\\.yaml",
"\\.secrets..*",
"target"
]
}
Expand Down Expand Up @@ -227,7 +227,7 @@
"filename": "src/site/markdown/operate/index.md",
"hashed_secret": "3a6d7aa49a8e4a2fe32a5cd0e53da9cb96bd8d29",
"is_verified": false,
"line_number": 968,
"line_number": 991,
"is_secret": false
}
],
Expand Down Expand Up @@ -273,7 +273,7 @@
"filename": "src/site/xdoc/operate/errors.xml.vm",
"hashed_secret": "4fb813c304003b3813b35a85f05b7cb0c3994cc1",
"is_verified": false,
"line_number": 173,
"line_number": 179,
"is_secret": false
}
],
Expand Down Expand Up @@ -388,17 +388,7 @@
"line_number": 76,
"is_secret": false
}
],
"src/test/resources/riut/auth.txt": [
{
"type": "Secret Keyword",
"filename": "src/test/resources/riut/auth.txt",
"hashed_secret": "d033e22ae348aeb5660fc2140aec35850c4da997",
"is_verified": false,
"line_number": 3,
"is_secret": false
}
]
},
"generated_at": "2024-05-22T15:27:52Z"
"generated_at": "2024-11-01T18:54:24Z"
}
48 changes: 40 additions & 8 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -367,19 +367,51 @@
<version>5.4.1</version>
</dependency>
<dependency>
<groupId>org.opensearch.client</groupId>
<artifactId>opensearch-rest-client</artifactId>
<version>2.16.0</version>
<groupId>org.mp4parser</groupId>
<artifactId>isoparser</artifactId>
<version>1.9.56</version>
</dependency>
<!-- three (4) artifacts for complete configuration parsing and RI calls -->
<dependency>
<groupId>gov.nasa.pds</groupId>
<artifactId>registry-common</artifactId>
<version>2.1.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>jakarta.xml.bind</groupId>
<artifactId>jakarta.xml.bind-api</artifactId>
<version>4.0.1</version>
</dependency>
<dependency>
<groupId>com.sun.xml.bind</groupId>
<artifactId>jaxb-impl</artifactId>
<version>4.0.4</version>
</dependency>
<dependency>
<groupId>jakarta.activation</groupId>
<artifactId>jakarta.activation-api</artifactId>
<version>2.1.2</version>
</dependency>
<!-- four artifacts for opensearch serverless -->
<dependency>
<groupId>org.apache.httpcomponents.client5</groupId>
<artifactId>httpclient5</artifactId>
<version>5.3.1</version>
</dependency>
<dependency>
<groupId>org.opensearch.client</groupId>
<artifactId>opensearch-rest-high-level-client</artifactId>
<version>2.5.0</version>
<artifactId>opensearch-java</artifactId>
<version>2.13.0</version>
</dependency>
<dependency>
<groupId>org.mp4parser</groupId>
<artifactId>isoparser</artifactId>
<version>1.9.56</version>
<groupId>software.amazon.awssdk</groupId>
<artifactId>opensearch</artifactId>
<version>2.25.31</version>
</dependency>
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>apache-client</artifactId>
<version>2.25.31</version>
</dependency>
</dependencies>

Expand Down
125 changes: 32 additions & 93 deletions src/main/java/gov/nasa/pds/validate/ri/AuthInformation.java
Original file line number Diff line number Diff line change
@@ -1,102 +1,41 @@
package gov.nasa.pds.validate.ri;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Properties;
import java.util.Scanner;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.lang.NotImplementedException;
import gov.nasa.pds.registry.common.ConnectionFactory;
import gov.nasa.pds.registry.common.EstablishConnectionFactory;

public class AuthInformation {
final public static AuthInformation NO_AUTH = new AuthInformation(false, "", "", "");
final private boolean trustSelfSigned;
final private String password;
final private String url;
final private String username;

private AuthInformation(boolean tss, String pwd, String un, String url) {
this.password = pwd;
this.trustSelfSigned = tss;
this.url = url;
this.username = un;
final private String apiAuthFile;
final private String osAuthFile;
final private String regConn;
private transient ConnectionFactory factory = null;
private AuthInformation(String a, String A, String r) {
this.apiAuthFile = A;
this.osAuthFile = a;
this.regConn = r;
}

public static AuthInformation buildFrom(String filename)
throws IOException, ParserConfigurationException, SAXException {
boolean tss;
File file = new File(filename);
Scanner textReader;
String line = null, pwd, un, url;

if (filename == null || filename.length() == 0)
return NO_AUTH;
if (!file.exists())
throw new IOException("Filename '" + filename + "' does not exist");

// Get the first non-comment line
textReader = new Scanner(file, Charset.defaultCharset().name());
while (textReader.hasNext() && line == null) {
line = textReader.nextLine().strip();
if (line.charAt(0) == '#')
line = null;
}
textReader.close();

// Determine which file processing to use
if (line.startsWith("<?xml ") && line.endsWith("?>")) { // XML
// <registry url="http://localhost:9200" index="registry" auth="/path/to/auth.cfg" />
DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
Document document = builder.parse(file);
NodeList registries = document.getElementsByTagName("registry");

if (registries.getLength() != 1)
throw new SAXException(
"There should be one and only registry tag in the harvest config file but found "
+ registries.getLength());
if (registries.item(0).getAttributes().getNamedItem("auth") == null)
throw new SAXException("Requires an authorization file or 'auth' attribute on <registry>.");

filename = registries.item(0).getAttributes().getNamedItem("auth").getNodeValue();
url = registries.item(0).getAttributes().getNamedItem("url").getNodeValue();
} else { // java property
FileInputStream input = new FileInputStream(file);
Properties properties = new Properties();
properties.load(input);
url = properties.getProperty("url");
filename = properties.getProperty("credentials");
input.close();
}

// Get credentials
FileInputStream input = new FileInputStream(filename);
Properties properties = new Properties();
properties.load(input);
pwd = properties.getProperty("password");
tss = Boolean.valueOf(properties.getProperty("trust.self-signed", "false"));
un = properties.getProperty("user");
input.close();
return new AuthInformation(tss, pwd, un, url);
}

public String getPassword() {
return password;
}

public boolean getTrustSelfSigned() {
return trustSelfSigned;
public static AuthInformation buildFrom(CommandLine cl) {
return new AuthInformation(
cl.getOptionValue("a",""),
cl.getOptionValue("A",""),
cl.getOptionValue("r",""));
}

public String getUsername() {
return username;
public synchronized ConnectionFactory getConnectionFactory() throws Exception {
if (this.factory == null) {
if (!this.apiAuthFile.isBlank()) {
throw new NotImplementedException();
}
if (!this.osAuthFile.isBlank()) {
this.factory = EstablishConnectionFactory.from(this.regConn, this.osAuthFile);
}
if (this.factory == null) {
throw new IllegalArgumentException("did not supply necessary arguments on the CLI");
}
}
return this.factory;
}

public String getUrl() {
return url;
public String getURL() {
return factory != null ? this.factory.toString() : "uninitialized connection factory";
}
}
73 changes: 28 additions & 45 deletions src/main/java/gov/nasa/pds/validate/ri/CommandLineInterface.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,13 @@ public CommandLineInterface() {
"file with the URL and credential content to have full (all product states) read-only access to the Registry Search API")
.hasArg(true).longOpt("auth-api").numberOfArgs(1).optionalArg(true).build()); */
this.opts.addOption(Option.builder("a").argName("auth-file").desc(
"file with the URL and credential content to have full, direct read-only access to the Registry OpenSearch DB")
"file with the credential content to have full, direct read-only access to the Registry OpenSearch DB")
.hasArg(true).longOpt("auth-opensearch").numberOfArgs(1).optionalArg(true).build());
this.opts.addOption(Option.builder("h").desc("show this text and exit").hasArg(false)
.longOpt("help").optionalArg(true).build());
this.opts.addOption(Option.builder("r").argName("registry-connection").desc(
"URL point to the registry connection information usually of the form app://connection/direct/localhost.xml")
.hasArg(true).longOpt("registry-connection").numberOfArgs(1).optionalArg(true).build());
this.opts.addOption(Option.builder("t").argName("count").desc(
"process the lidvids in parallel (multiple threads) with this argument being the maximum number of threads")
.hasArg(true).longOpt("threads").optionalArg(true).build());
Expand All @@ -55,16 +58,11 @@ public void help() {
"Multiple arguments may be given in any order, for example:\n" +
" > validate-refs urn:nasa:pds:foo::1.0 label.xml urn:nasa:pds:bar::2.0 manifest.txt\n\n",
opts,
"\nAn auth-file is either a text file of the Java property format " +
"with two variables, 'url' and 'credentials': \n\n" +
" - The 'url' property is the complete base URL to the Registry OpenSearch endpoint or Search API\n" +
" * 'https://my-registry.es.amazonaws.com/_search'\n\n" +
" - The 'credentials' is the path to:\n" +
" * Harvest config file containing the necessary Registry OpenSearch authorization\n" +
" <registry url=\"http://localhost:9200\" index=\"registry\" auth=\"/path/to/auth.cfg\" />\n" +
" * Java Properties file with a 'user' and 'password' specified, for example: \n" +
" user=janedoe\n" +
" password=mypassword\n\n",
"\nAn auth-file is a text file of the Java property format " +
"with two variables, 'user' and 'password' for example: \n" +
" user=janedoe\n" +
" password=mypassword\n\n" +
"Both -a and -r are required.\n\n",
true);
}

Expand All @@ -89,17 +87,16 @@ public int process(String[] args)
loggerConfig.setLevel(Level.INFO);
ctx.updateLoggers();

if (!cl.hasOption("a")) {
throw new ParseException("Not yet implemented. Must provide OpenSearch Registry authorization information.");
} else if (!cl.hasOption("A")) {
log.warn("Using Registry OpenSearch Database to check references.");
if (cl.hasOption("A")) {
throw new ParseException("Not yet implemented. Must provide OpenSearch Registry authorization information through -a and -r.");
} else {
/* not true statement until registry handles authentication
* throw new ParseException("Must supply authorization file for access to either OpenSearch Database (auth-opensearch) or OpenSearch Registry (auth-api).");
*/
throw new ParseException("Must define authorization file for access to OpenSearch Database (auth-opensearch).");
boolean both = cl.hasOption("a") && cl.hasOption("r");
if (!both) {
throw new ParseException("Both -a and -r must be given.");
} else {
log.warn("Using Registry OpenSearch Database to check references.");
}
}

if (cl.getArgList().size() < 1)
throw new ParseException("Must provide at least one LIDVID, Label file path, or manifest file path as a starting point.");

Expand All @@ -115,31 +112,17 @@ public int process(String[] args)
} else
this.log.info("lidvids will be sequentially processed.");

try {
DuplicateFileAreaFilenames scanner = new DuplicateFileAreaFilenames(
AuthInformation.buildFrom(cl.getOptionValue("auth-api", "")),
AuthInformation.buildFrom(cl.getOptionValue("auth-opensearch", "")));
Engine engine = new Engine(cylinders, UserInput.toLidvids (cl.getArgList()),
AuthInformation.buildFrom(cl.getOptionValue("auth-api", "")),
AuthInformation.buildFrom(cl.getOptionValue("auth-opensearch", "")));
this.log.info("Starting the duplicate filename in FileArea checks.");
scanner.findDuplicatesInBackground();
this.log.info("Starting the reference integrity checks.");
engine.processQueueUntilEmpty();
scanner.waitTillDone();
this.broken = engine.getBroken();
this.duplicates = scanner.getResults();
this.total = engine.getTotal();
} catch (IOException e) {
this.log.fatal("Cannot process request because of IO problem.", e);
throw e;
} catch (ParserConfigurationException e) {
this.log.fatal("Could not parse the harvest configuration file.", e);
throw e;
} catch (SAXException e) {
this.log.fatal("Mal-formed harvest configuration file.", e);
throw e;
}
DuplicateFileAreaFilenames scanner = new DuplicateFileAreaFilenames(AuthInformation.buildFrom(cl));
Engine engine = new Engine(cylinders, UserInput.toLidvids (cl.getArgList()), AuthInformation.buildFrom(cl));
this.log.info("Starting the duplicate filename in FileArea checks.");
scanner.findDuplicatesInBackground();
this.log.info("Starting the reference integrity checks.");
engine.processQueueUntilEmpty();
scanner.waitTillDone();
this.broken = engine.getBroken();
this.duplicates = scanner.getResults();
this.total = engine.getTotal();

if (-1 < this.total) {
this.log.info("Reference Summary:");
this.log.info(" " + this.total + " products processed");
Expand Down
5 changes: 3 additions & 2 deletions src/main/java/gov/nasa/pds/validate/ri/CountingAppender.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package gov.nasa.pds.validate.ri;

import java.io.Serializable;
import org.apache.logging.log4j.Level;
import org.apache.logging.log4j.core.Appender;
import org.apache.logging.log4j.core.ErrorHandler;
Expand All @@ -11,7 +12,7 @@ class CountingAppender implements Appender {
private int err = 0, fatal = 0, warn = 0;
private ErrorHandler handler = null;
private Filter filter = null;
private Layout layout = null;
private Layout<Serializable> layout = null;
private String name = "";

public void addFilter(Filter newFilter) {
Expand Down Expand Up @@ -54,7 +55,7 @@ public ErrorHandler getHandler() {
}

@Override
public Layout getLayout() {
public Layout<Serializable> getLayout() {
return this.layout;
}

Expand Down
Loading

0 comments on commit 63a5026

Please sign in to comment.