Skip to content

Commit

Permalink
update the example to include schema
Browse files Browse the repository at this point in the history
  • Loading branch information
Al Niessner authored and Al Niessner committed Apr 19, 2024
1 parent ca3e0c6 commit 27e5ed7
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 11 deletions.
5 changes: 3 additions & 2 deletions src/main/java/gov/nasa/pds/harvest/dao/MetadataWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public class MetadataWriter implements Closeable
private final static String WARN_SKIP_PRE = "Skipping registered product ";
private final static String WARN_SKIP_POST = " (LIDVID/LID already exists in registry database)";
private final static int ES_DOC_BATCH_SIZE = 50;

private final ConnectionFactory conFact;
private Logger log;

private RegistryDao registryDao;
Expand All @@ -42,6 +42,7 @@ public class MetadataWriter implements Closeable
*/
public MetadataWriter(ConnectionFactory conFact, RegistryDao dao, Counter counter) throws Exception
{
this.conFact = conFact;
log = LogManager.getLogger(this.getClass());
loader = new DataLoader(conFact);
docBatch = new RegistryDocBatch();
Expand All @@ -60,7 +61,7 @@ public void setOverwriteExisting(boolean b)

public void write(Metadata meta) throws Exception
{
docBatch.write(meta, jobId);
docBatch.write(this.conFact, meta, jobId);

if(docBatch.size() % ES_DOC_BATCH_SIZE == 0)
{
Expand Down
34 changes: 29 additions & 5 deletions src/main/java/gov/nasa/pds/harvest/dao/RegistryDocBatch.java
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
package gov.nasa.pds.harvest.dao;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import gov.nasa.pds.registry.common.ConnectionFactory;
import gov.nasa.pds.registry.common.RestClient;
import gov.nasa.pds.registry.common.meta.Metadata;
import gov.nasa.pds.registry.common.util.json.RegistryDocBuilder;

Expand All @@ -20,7 +24,8 @@ public static class NJsonItem
public String dataJson; // Data JSON (line 2)

}

final private static HashSet<String> alreadyLearned = new HashSet<String>();
final private Logger log = LogManager.getLogger(RegistryDocBatch.class);
private List<NJsonItem> items;


Expand All @@ -32,15 +37,34 @@ public RegistryDocBatch()
items = new ArrayList<>();
}


public void write(Metadata meta, String jobId) throws Exception
/* hack for PDS-NASA/harvest#127
* search the JSON string for any ref_lid_ and add if necessary to index so that it is searchable
*/
private void updateIndex(ConnectionFactory conFact, String json) {
int begin_index = json.indexOf("ref_lid_"), end_index;
String name;
while (-1 < begin_index) {
end_index = json.indexOf('"', begin_index+5);
name = json.substring(begin_index, end_index);
System.out.println(name);
if (!alreadyLearned.contains(name)) {
try (RestClient client = conFact.createRestClient()) {
// FIXME: how to add text or keyword to index to make it searchable??
} catch (Exception e) {
log.error("Unexpected error (should not have made it here) while updating index with " + name,e);
}
}
alreadyLearned.add(name);
}
}
public void write(ConnectionFactory conFact, Metadata meta, String jobId) throws Exception
{
NJsonItem item = new NJsonItem();
item.lidvid = meta.lidvid;
item.prodClass = meta.prodClass;
item.pkJson = RegistryDocBuilder.createPKJson(meta);
item.dataJson = RegistryDocBuilder.createDataJson(meta, jobId);

this.updateIndex(conFact, item.dataJson);
items.add(item);
}

Expand Down
4 changes: 3 additions & 1 deletion src/main/resources/conf/examples/bundles.xml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
* PSA - Planetary Science Archive
* JAXA - Japan Aerospace Exploration Agency
-->
<harvest>
<harvest
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="https://github.com/NASA-PDS/harvest/blob/main/src/main/resources/conf/configuration.xsd">
<nodeName>CHANGE_ME</nodeName>
<!-- Registry configuration -->
<!-- UPDATE with your registry information -->
Expand Down
4 changes: 3 additions & 1 deletion src/main/resources/conf/examples/directories.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
* PSA - Planetary Science Archive
* JAXA - Japan Aerospace Exploration Agency
-->
<harvest>
<harvest
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="https://github.com/NASA-PDS/harvest/blob/main/src/main/resources/conf/configuration.xsd">
<nodeName>CHANGE_ME</nodeName>

<!-- Registry configuration -->
Expand Down
4 changes: 3 additions & 1 deletion src/main/resources/conf/examples/files.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
* PSA - Planetary Science Archive
* JAXA - Japan Aerospace Exploration Agency
-->
<harvest>
<harvest
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="https://github.com/NASA-PDS/harvest/blob/main/src/main/resources/conf/configuration.xsd">
<nodeName>CHANGE_ME</nodeName>

<!-- Registry configuration -->
Expand Down
4 changes: 3 additions & 1 deletion src/main/resources/conf/examples/xpaths.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>

<harvest>
<harvest
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="https://github.com/NASA-PDS/harvest/blob/main/src/main/resources/conf/configuration.xsd">
<nodeName>CHANGE_ME</nodeName>
<load>
<directories>
Expand Down

0 comments on commit 27e5ed7

Please sign in to comment.