Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bxc 4082 thumbnail solr index #1611

Merged
merged 8 commits into from
Sep 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ private List<String> determineContentStatus(DocumentIndexingPackage dip)
if (parentResc.hasProperty(Cdr.primaryObject, resc)) {
status.add(FacetConstants.IS_PRIMARY_OBJECT);
}
if (parentResc.hasProperty(Cdr.useAsThumbnail, resc)) {
sharonluong marked this conversation as resolved.
Show resolved Hide resolved
status.add(FacetConstants.IS_ASSIGNED_THUMBNAIL);
}
}

return status;
Expand All @@ -70,5 +73,11 @@ private void addWorkObjectStatuses(List<String> status, Resource resource) {
} else {
status.add(FacetConstants.MEMBERS_ARE_UNORDERED);
}

if (resource.hasProperty(Cdr.useAsThumbnail)) {
status.add(FacetConstants.THUMBNAIL_ASSIGNED);
} else {
status.add(FacetConstants.NO_THUMBNAIL_ASSIGNED);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
Expand Down Expand Up @@ -49,6 +50,7 @@ public class SetDatastreamFilter implements IndexDocumentFilter {

private DerivativeService derivativeService;
private TechnicalMetadataService technicalMetadataService;
private static final List<DatastreamType> THUMBNAIL_DS_TYPES = Arrays.asList(DatastreamType.THUMBNAIL_SMALL, DatastreamType.THUMBNAIL_LARGE);

@Override
public void filter(DocumentIndexingPackage dip) throws IndexingException {
Expand All @@ -69,10 +71,14 @@ public void filter(DocumentIndexingPackage dip) throws IndexingException {
doc.setFilesizeSort(getFilesize(datastreams));

// Add list of derivatives associated from the representative file
addDerivatives(datastreams, fileObj.getPid(), ownedByOtherObject);
addDerivatives(datastreams, fileObj.getPid(), ownedByOtherObject, null);
} else {
// Add list of derivatives associated with the object
addDerivatives(datastreams, contentObj.getPid(), false);
addDerivatives(datastreams, contentObj.getPid(), false, null);
}

if (contentObj instanceof WorkObject) {
addThumbnailDerivatives((WorkObject) contentObj, datastreams);
}

// Add in metadata datastreams
Expand Down Expand Up @@ -151,7 +157,7 @@ private String getExtent(List<BinaryObject> binList) {
* @param ownedByOtherObject
*/
private void addDatastreams(List<Datastream> dsList, List<BinaryObject> binList, boolean ownedByOtherObject) {
binList.stream().forEach(binary -> {
binList.forEach(binary -> {
Resource binaryResc = binary.getResource();

String name = binaryResc.getURI();
Expand Down Expand Up @@ -211,30 +217,63 @@ private long getFilesize(List<Datastream> datastreams) throws IndexingException
.filter(ds -> ORIGINAL_FILE.getId().equals(ds.getName()))
.findFirst();

if (!original.isPresent()) {
if (original.isEmpty()) {
throw new IndexingException("File object in invalid state, cannot find original file binary");
}

Long size = original.get().getFilesize();
return size != null ? size : 0l;
}

private void addDerivatives(List<Datastream> dsList, PID pid, boolean ownedByOtherObject) {
derivativeService.getDerivatives(pid).stream()
.forEach(deriv -> {
String owner = (ownedByOtherObject ? pid.getId() : null);
private void addDerivatives(List<Datastream> dsList, PID pid, boolean ownedByOtherObject, List<DatastreamType> types) {
derivativeService.getDerivatives(pid).forEach(deriv -> {
DatastreamType type = deriv.getType();
// only add derivatives of types listed
if ((types != null) && !types.contains(type)) {
return;
}

DatastreamType type = deriv.getType();
String name = type.getId();
String mimetype = type.getMimetype();
String extension = type.getExtension();
String owner = (ownedByOtherObject ? pid.getId() : null);
dsList.add(createDatastream(deriv, owner));
});
}

File derivFile = deriv.getFile();
Long filesize = derivFile.length();
String filename = derivFile.getName();
/**
* Used to selectively add only thumbnail datastreams
*
* @param workObject the work object with the thumbnail relation
* @param datastreams work object's datastreams to add thumbnail streams to
*/
private void addThumbnailDerivatives(WorkObject workObject, List<Datastream> datastreams) {
FileObject thumbnailObject = workObject.getThumbnailObject();

dsList.add(new DatastreamImpl(owner, name, filesize, mimetype, filename, extension, null, null));
});
if (thumbnailObject != null) {
var updatedDatastreams = clearPreviousThumbnailDatastreams(datastreams);
addDerivatives(updatedDatastreams, thumbnailObject.getPid(), true, THUMBNAIL_DS_TYPES);
}
}

/**
* There may be thumbnail streams from the primary object, so we'll clear those
* before adding the assigned thumbnail datastreams
*
* @param datastreams full list of datastreams to index for the work object
* @return modified list of datastreams without thumbnail datastreams
*/
private List<Datastream> clearPreviousThumbnailDatastreams(List<Datastream> datastreams) {
sharonluong marked this conversation as resolved.
Show resolved Hide resolved
datastreams.removeIf(ds -> THUMBNAIL_DS_TYPES.contains(DatastreamType.getByIdentifier(ds.getName())));
return datastreams;
}

private DatastreamImpl createDatastream(DerivativeService.Derivative derivative, String owner) {
DatastreamType type = derivative.getType();
String name = type.getId();
String mimetype = type.getMimetype();
String extension = type.getExtension();
File file = derivative.getFile();
Long filesize = file.length();
String filename = file.getName();
return new DatastreamImpl(owner, name, filesize, mimetype, filename, extension, null, null);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,4 +180,42 @@ public void testWorkWithoutMemberOrder() {
assertTrue(listCaptor.getValue().contains(FacetConstants.MEMBERS_ARE_UNORDERED));
assertFalse(listCaptor.getValue().contains(FacetConstants.MEMBERS_ARE_ORDERED));
}

@Test
public void testWorkWithAssignedThumbnail() throws Exception {
when(dip.getContentObject()).thenReturn(workObj);
when(workObj.getResource()).thenReturn(resc);
when(resc.hasProperty(Cdr.useAsThumbnail)).thenReturn(true);

filter.filter(dip);

verify(idb).setContentStatus(listCaptor.capture());
assertTrue(listCaptor.getValue().contains(FacetConstants.THUMBNAIL_ASSIGNED));
assertFalse(listCaptor.getValue().contains(FacetConstants.NO_THUMBNAIL_ASSIGNED));
}

@Test
public void testWorkNoAssignedThumbnail() throws Exception {
when(dip.getContentObject()).thenReturn(workObj);
when(workObj.getResource()).thenReturn(resc);

filter.filter(dip);

verify(idb).setContentStatus(listCaptor.capture());
assertTrue(listCaptor.getValue().contains(FacetConstants.NO_THUMBNAIL_ASSIGNED));
}

@Test
public void testIsAssignedThumbnail() throws Exception {
when(workObj.getResource()).thenReturn(resc);
when(resc.hasProperty(Cdr.useAsThumbnail, fileResc)).thenReturn(true);

when(dip.getContentObject()).thenReturn(fileObj);
when(fileObj.getResource()).thenReturn(fileResc);

filter.filter(dip);

verify(idb).setContentStatus(listCaptor.capture());
assertTrue(listCaptor.getValue().contains(FacetConstants.IS_ASSIGNED_THUMBNAIL));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.mockito.Mock;

import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.List;
Expand All @@ -45,6 +46,7 @@
import static edu.unc.lib.boxc.model.fcrepo.ids.DatastreamPids.getOriginalFilePid;
import static org.apache.jena.rdf.model.ResourceFactory.createResource;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
Expand Down Expand Up @@ -315,6 +317,68 @@ public void workObjectWithoutPrimaryObjectTest() throws Exception {
assertNotNull(idb.getFilesizeTotal());
}

@Test
public void workObjectWithThumbnailNoPrimaryObjectTest() throws Exception {
WorkObject workObj = mock(WorkObject.class);
when(workObj.getThumbnailObject()).thenReturn(fileObj);
when(workObj.getPid()).thenReturn(pid);

String fileId = "055ed112-f548-479e-ab4b-bf1aad40d470";
PID filePid = PIDs.get(fileId);
when(fileObj.getPid()).thenReturn(filePid);
when(binObj.getPid()).thenReturn(getOriginalFilePid(filePid));
setUpDerivatives(filePid);

dip.setContentObject(workObj);
filter.filter(dip);

assertNotNull(idb.getDatastream());
assertNull(idb.getFilesizeSort());
assertNotNull(idb.getFilesizeTotal());
assertThumbnailDatastreams(fileId);
}

@Test
public void workObjectTestWithPrimaryAndThumbnailObjects() throws Exception {
sharonluong marked this conversation as resolved.
Show resolved Hide resolved
WorkObject workObj = mock(WorkObject.class);
when(workObj.getPrimaryObject()).thenReturn(fileObj);
when(workObj.getPid()).thenReturn(pid);
addMetadataDatastreams(workObj);

dip.setContentObject(workObj);

String fileId = "055ed112-f548-479e-ab4b-bf1aad40d470";
PID filePid = PIDs.get(fileId);
when(fileObj.getPid()).thenReturn(filePid);
when(binObj.getPid()).thenReturn(getOriginalFilePid(filePid));
setUpDerivatives(filePid);

// set up thumbnail file object
FileObject thumbnailObj = mock(FileObject.class);
when(workObj.getThumbnailObject()).thenReturn(thumbnailObj);
String thumbnailId = "066ed112-f548-479e-ab4b-bf1aad40d678";
PID thumbnailPid = PIDs.get(thumbnailId);
when(thumbnailObj.getPid()).thenReturn(thumbnailPid);
setUpDerivatives(thumbnailPid);

filter.filter(dip);

assertContainsDatastream(idb.getDatastream(), ORIGINAL_FILE.getId(),
FILE_SIZE, FILE_MIMETYPE, FILE_NAME, FILE_DIGEST, fileId, null);
assertThumbnailDatastreams(thumbnailId);
assertContainsMetadataDatastreams(idb.getDatastream());

// Sort size is based off primary object's size
assertEquals(FILE_SIZE, (long) idb.getFilesizeSort());
// Work has no datastreams of its own
assertEquals(FILE2_SIZE + MODS_SIZE + PREMIS_SIZE, (long) idb.getFilesizeTotal());

assertDoesNotContainDatastream(idb.getDatastream(), THUMBNAIL_SMALL.getId(),
7l, THUMBNAIL_SMALL.getMimetype(), "small.png", null, fileId, null);
assertDoesNotContainDatastream(idb.getDatastream(), THUMBNAIL_LARGE.getId(),
13l, THUMBNAIL_LARGE.getMimetype(), "large.png", null, fileId, null);
}

@Test
public void folderObjectWithMetadataTest() throws Exception {
FolderObject folderObj = mock(FolderObject.class);
Expand Down Expand Up @@ -389,6 +453,17 @@ private void assertContainsDatastream(List<String> values, String name, long fil
assertTrue(values.contains(joined), "Did not contain datastream " + name);
}

private void assertDoesNotContainDatastream(List<String> values, String name, long filesize, String mimetype,
String filename, String digest, String owner, String extent) {
String extension = filename.substring(filename.lastIndexOf('.') + 1);
List<Object> components = Arrays.asList(
name, mimetype, filename, extension, filesize, digest, owner, extent);
String joined = components.stream()
.map(c -> c == null ? "" : c.toString())
.collect(Collectors.joining("|"));
assertFalse(values.contains(joined), "Contains datastream " + name);
}

private void addMetadataDatastreams(ContentObject obj) throws Exception {
BinaryObject fitsBin = mock(BinaryObject.class);
when(fitsBin.getPid()).thenReturn(DatastreamPids.getTechnicalMetadataPid(pid));
Expand All @@ -415,4 +490,23 @@ private void assertContainsMetadataDatastreams(List<String> values) {
assertContainsDatastream(values, DatastreamType.MD_EVENTS.getId(),
PREMIS_SIZE, PREMIS_MIMETYPE, PREMIS_NAME, PREMIS_DIGEST, null, null);
}

private void setUpDerivatives(PID filePid) throws IOException {
File smallFile = derivDir.resolve("small.png").toFile();
FileUtils.write(smallFile, "content", "UTF-8");

File largeFile = derivDir.resolve("large.png").toFile();
FileUtils.write(largeFile, "large content", "UTF-8");

List<Derivative> derivs = Arrays.asList(new Derivative(THUMBNAIL_SMALL, smallFile),
new Derivative(THUMBNAIL_LARGE, largeFile));
when(derivativeService.getDerivatives(filePid)).thenReturn(derivs);
}

private void assertThumbnailDatastreams(String thumbnailId) {
assertContainsDatastream(idb.getDatastream(), THUMBNAIL_SMALL.getId(),
7l, THUMBNAIL_SMALL.getMimetype(), "small.png", null, thumbnailId, null);
assertContainsDatastream(idb.getDatastream(), THUMBNAIL_LARGE.getId(),
13l, THUMBNAIL_LARGE.getMimetype(), "large.png", null, thumbnailId, null);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -75,4 +75,11 @@ FileObject addDataFile(PID filePid, URI storageUri, String filename, String mime
* @return Ordered list of member ids, or an empty list if members aren't ordered.
*/
List<PID> getMemberOrder();

/**
* Get the thumbnail object for this work if one is assigned, otherwise return null.
*
* @return
*/
FileObject getThumbnailObject();
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import org.apache.commons.lang3.StringUtils;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.rdf.model.Property;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.Statement;
import org.apache.jena.vocabulary.DC;
Expand Down Expand Up @@ -102,20 +103,7 @@ public void setPrimaryObject(PID primaryPid) {
*/
@Override
public FileObject getPrimaryObject() {
Resource resc = getResource();
// Find the primary object relation if it is present
Statement primaryStmt = resc.getProperty(Cdr.primaryObject);
if (primaryStmt == null) {
return null;
}

PID primaryPid = PIDs.get(primaryStmt.getResource().getURI());
try {
return driver.getRepositoryObject(primaryPid, FileObject.class);
} catch (TombstoneFoundException e) {
log.debug("Cannot retrieve primary object for {}", getPid().getId(), e);
}
return null;
return getFileObjectByProperty(Cdr.primaryObject);
}

@Override
Expand Down Expand Up @@ -214,4 +202,26 @@ public List<PID> getMemberOrder() {
.map(PIDs::get)
.collect(Collectors.toList());
}

@Override
public FileObject getThumbnailObject() {
sharonluong marked this conversation as resolved.
Show resolved Hide resolved
return getFileObjectByProperty(Cdr.useAsThumbnail);
}

private FileObject getFileObjectByProperty(Property property) {
Resource resource = getResource();
// Find the object relation if it is present
Statement stmt = resource.getProperty(property);
if (stmt == null) {
return null;
}

PID pid = PIDs.get(stmt.getResource().getURI());
try {
return driver.getRepositoryObject(pid, FileObject.class);
} catch (TombstoneFoundException e) {
log.debug("Cannot retrieve {} object for {}", property.getLocalName(), getPid().getId(), e);
}
return null;
}
}
Loading