Skip to content

Commit

Permalink
BXC-4786 - Custom alt text (#1849)
Browse files Browse the repository at this point in the history
* Add enums for alt text datastreams, permissions. Add test coverage for datastream classes

* Add minimal service for updating alt text. Add minimal controller for performing service. Add relation

* Add alt text field to solr and add filter for indexing it. Add facet values to track when it is present. Add tests, plus some for cases that weren't previously covered

* Add serialization of alt text field. Some cleanup of serialization, and added some additional test coverage

* Add admin ui form for setting alt text

* Fix how alt text is retrieved. Add missing annotation so that alt text indexes. Add alt text to search result field lists

* Only submit text as body param, otherwise text gets duplicated

* Use custom alt text for thumbnails if available

* Start adding method for getting thumbnail record, so we can get the alt text at the same time

* Add alt text during deposit. Make sending a message configurable, and allow for shared transfer session

* Populate alt text for works from thumbnail object. Change accessCopiesService methods to work with object records and set alt text

* Codeclimate and add missing property injection

* Address comments and make input larger

* Use different verb for link text versus image text, but use custom alt text for both
  • Loading branch information
bbpennel authored Dec 6, 2024
1 parent 8bd7d00 commit 765809a
Show file tree
Hide file tree
Showing 51 changed files with 1,505 additions and 115 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ public class DatastreamPermissionUtil {
DS_PERMISSION_MAP.put(DatastreamType.JP2_ACCESS_COPY, Permission.viewAccessCopies);
DS_PERMISSION_MAP.put(DatastreamType.AUDIO_ACCESS_COPY, Permission.viewAccessCopies);
DS_PERMISSION_MAP.put(DatastreamType.ACCESS_SURROGATE, Permission.viewAccessCopies);
DS_PERMISSION_MAP.put(DatastreamType.ALT_TEXT, Permission.viewMetadata);
DS_PERMISSION_MAP.put(DatastreamType.ALT_TEXT_HISTORY, Permission.viewHidden);
DS_PERMISSION_MAP.put(DatastreamType.MD_DESCRIPTIVE, Permission.viewMetadata);
DS_PERMISSION_MAP.put(DatastreamType.MD_DESCRIPTIVE_HISTORY, Permission.viewHidden);
DS_PERMISSION_MAP.put(DatastreamType.MD_EVENTS, Permission.viewHidden);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
Expand All @@ -25,6 +26,8 @@
import java.util.Set;

import edu.unc.lib.boxc.model.api.exceptions.NotFoundException;
import edu.unc.lib.boxc.operations.impl.altText.AltTextUpdateService;
import edu.unc.lib.boxc.operations.jms.altText.AltTextUpdateRequest;
import org.apache.http.HttpStatus;
import org.apache.jena.datatypes.xsd.XSDDatatype;
import org.apache.jena.rdf.model.Bag;
Expand Down Expand Up @@ -137,6 +140,9 @@ public class IngestContentObjectsJob extends AbstractDepositJob {
@Autowired
private UpdateDescriptionService updateDescService;

@Autowired
private AltTextUpdateService altTextUpdateService;

private AccessGroupSet groupSet;
private AgentPrincipals agent;

Expand Down Expand Up @@ -342,6 +348,8 @@ private void ingestFileObject(ContentObject parent, Resource parentResc, Resourc
addPremisEvents(obj);
// add MODS
addDescription(obj, childResc);
// Add alt text if present
addAltText(obj);

overrideModifiedTimestamp(obj, childResc);
log.debug("Finished all updates for file {} in work {}", pid, work.getPid());
Expand Down Expand Up @@ -824,6 +832,21 @@ private void addAclProperties(Resource dResc, Resource aResc) {
}
}

private void addAltText(ContentObject obj) throws IOException {
Path altTextPath = getAltTextPath(obj.getPid(), false);
if (!Files.exists(altTextPath)) {
return;
}

var altText = Files.readString(altTextPath, StandardCharsets.UTF_8);
var request = new AltTextUpdateRequest();
request.setAgent(agent);
request.setPidString(obj.getPid().getId());
request.setAltText(altText);
request.setTransferSession(logTransferSession);
altTextUpdateService.updateAltText(request);
}

private void addDescription(ContentObject obj, Resource dResc) throws IOException {
addDescriptionHistory(obj, dResc);

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package edu.unc.lib.boxc.deposit.work;

import static edu.unc.lib.boxc.deposit.api.DepositConstants.ALT_TEXT_DIR;
import static edu.unc.lib.boxc.deposit.api.DepositConstants.DESCRIPTION_DIR;
import static edu.unc.lib.boxc.deposit.api.DepositConstants.HISTORY_DIR;
import static edu.unc.lib.boxc.deposit.api.DepositConstants.TECHMD_DIR;
Expand Down Expand Up @@ -239,6 +240,10 @@ public String getDepositField(DepositField field) {
return getDepositStatus().get(field.name());
}

public File getAltTextDir() {
return new File(getDepositDirectory(), ALT_TEXT_DIR);
}

public File getDescriptionDir() {
return new File(getDepositDirectory(), DESCRIPTION_DIR);
}
Expand Down Expand Up @@ -268,6 +273,16 @@ public Path getModsPath(PID pid, boolean createDirs) {
return getMetadataPath(getDescriptionDir(), pid, ".xml", createDirs);
}

/**
* Get the path where alt text should be stored for the given pid
* @param pid pid of the object
* @param createDirs if true, then parent directories for path will be created
* @return Path for the alt text
*/
public Path getAltTextPath(PID pid, boolean createDirs) {
return getMetadataPath(getAltTextDir(), pid, ".txt", createDirs);
}

/**
* Get path to where MODS history should be stored
*
Expand Down
8 changes: 8 additions & 0 deletions deposit-app/src/main/webapp/WEB-INF/deposit-jobs-context.xml
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,14 @@
<property name="versionedDatastreamService" ref="versionedDatastreamService" />
</bean>

<bean id="altTextUpdateService" class="edu.unc.lib.boxc.operations.impl.altText.AltTextUpdateService">
<property name="aclService" ref="aclService" />
<property name="repositoryObjectLoader" ref="repositoryObjectLoader" />
<property name="repositoryObjectFactory" ref="repositoryObjectFactory" />
<property name="versionedDatastreamService" ref="versionedDatastreamService" />
<property name="sendsMessages" value="false" />
</bean>

<bean id="metsSipSchema" class="edu.unc.lib.boxc.common.xml.SecureXMLFactory"
factory-method="createSchema">
<constructor-arg ref="schemaFactory" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import static edu.unc.lib.boxc.model.api.StreamingConstants.STREAMREAPER_PREFIX_URL;
import static edu.unc.lib.boxc.persist.impl.storage.StorageLocationTestHelper.LOC1_ID;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
import static org.mockito.ArgumentMatchers.any;
Expand All @@ -33,6 +34,8 @@
import java.util.Map;

import edu.unc.lib.boxc.model.api.exceptions.NotFoundException;
import edu.unc.lib.boxc.operations.impl.altText.AltTextUpdateService;
import edu.unc.lib.boxc.operations.jms.altText.AltTextUpdateRequest;
import org.apache.commons.io.FileUtils;
import org.apache.http.HttpStatus;
import org.apache.jena.rdf.model.Bag;
Expand Down Expand Up @@ -159,8 +162,12 @@ public class IngestContentObjectsJobTest extends AbstractDepositJobTest {
private BinaryTransferSession mockTransferSession;
@Mock
private UpdateDescriptionService updateDescService;
@Mock
private AltTextUpdateService altTextUpdateService;
@Captor
private ArgumentCaptor<Model> modelCaptor;
@Captor
private ArgumentCaptor<AltTextUpdateRequest> altTextRequestCaptor;

private Path storageLocPath;

Expand Down Expand Up @@ -192,6 +199,7 @@ public void init() throws Exception {
setField(job, "locationManager", storageLocationManager);
setField(job, "updateDescService", updateDescService);
setField(job, "depositModelManager", depositModelManager);
setField(job, "altTextUpdateService", altTextUpdateService);

job.init();

Expand Down Expand Up @@ -379,7 +387,6 @@ public void ingestWorkWithFileWithStreamingPropertiesAndOriginalFile() throws Ex
fileResc.addProperty(CdrDeposit.mimetype, "text/plain");
fileResc.addProperty(Cdr.streamingUrl, STREAMREAPER_PREFIX_URL);
fileResc.addProperty(Cdr.streamingType, STREAMING_TYPE);
workBag.add(fileResc);

job.closeModel();

Expand All @@ -394,7 +401,45 @@ public void ingestWorkWithFileWithStreamingPropertiesAndOriginalFile() throws Ex
verify(repoObjFactory).createWorkObject(eq(workPid), any(Model.class));
verify(destinationObj).addMember(eq(work));

verify(jobStatusFactory, times(3)).incrCompletion(eq(jobUUID), eq(1));
verify(jobStatusFactory, times(2)).incrCompletion(eq(jobUUID), eq(1));
}

@Test
public void ingestWorkWithFileWithAltText() throws Exception {
PID workPid = makePid(RepositoryPathConstants.CONTENT_BASE);
WorkObject work = mock(WorkObject.class);
Bag workBag = setupWork(workPid, work);

String loc = "image.jpg";
String mime = "image/jpeg";
PID filePid = addFileObject(workBag, loc, mime);

var fileResc = model.getResource(filePid.getRepositoryPath());
fileResc.addProperty(RDF.type, Cdr.FileObject);
fileResc.addProperty(CdrDeposit.mimetype, mime);

Path altTextPath = job.getAltTextPath(filePid, true);
FileUtils.writeStringToFile(altTextPath.toFile(), "Alternative text", UTF_8);

job.closeModel();

when(work.addDataFile(any(PID.class), any(URI.class),
anyString(), anyString(), isNull(), isNull(), any(Model.class)))
.thenReturn(mockFileObj);
when(mockFileObj.getPid()).thenReturn(filePid);
when(repoObjLoader.getWorkObject(eq(workPid))).thenReturn(work);

job.run();

verify(repoObjFactory).createWorkObject(eq(workPid), any(Model.class));
verify(destinationObj).addMember(eq(work));

verify(jobStatusFactory, times(2)).incrCompletion(eq(jobUUID), eq(1));

verify(altTextUpdateService).updateAltText(altTextRequestCaptor.capture());
AltTextUpdateRequest request = altTextRequestCaptor.getValue();
assertEquals("Alternative text", request.getAltText());
assertEquals(filePid.getId(), request.getPidString());
}

/**
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,14 @@
<property name="checksAccess" value="false" />
<property name="versionedDatastreamService" ref="versionedDatastreamService" />
</bean>

<bean id="altTextUpdateService" class="edu.unc.lib.boxc.operations.impl.altText.AltTextUpdateService">
<property name="aclService" ref="aclService" />
<property name="repositoryObjectLoader" ref="repositoryObjectLoader" />
<property name="repositoryObjectFactory" ref="repositoryObjectFactory" />
<property name="versionedDatastreamService" ref="versionedDatastreamService" />
<property name="sendsMessages" value="false" />
</bean>

<bean id="transactionManager" class="edu.unc.lib.boxc.fcrepo.utils.TransactionManager">
<property name="client" ref="fcrepoClient" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
*/
public class DepositConstants {
public static final String DESCRIPTION_DIR = "description";
public static final String ALT_TEXT_DIR = "altText";
public static final String HISTORY_DIR = "history";
public static final String JENA_TDB_DIR = "jena-tdb-model";
public static final String EVENTS_FILE = "events.xml";
Expand Down
3 changes: 3 additions & 0 deletions etc/solr-config/access/conf/schema.xml
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@
<field name="abstract" type="string" indexed="false" stored="true"/>
<!-- Groups which have administrative viewing rights to this object -->
<field name="adminGroup" type="string" indexed="true" stored="true" required="true" multiValued="true" default="" docValues="true"/>
<field name="altText" type="string" indexed="false" stored="true"/>
<!-- Sortable string representation of the path to this object. This object is included if it is a container -->
<field name="ancestorIds" type="string" indexed="true" stored="true" docValues="true"/>
<!--
Expand Down Expand Up @@ -291,6 +292,7 @@
<copyField source="rightsOaiPmh" dest="keywordIndex"/>
<copyField source="rightsUri" dest="keywordIndex"/>
<copyField source="genre" dest="keywordIndex"/>
<copyField source="altText" dest="keywordIndex"/>

<!-- Same as the keyword index plus full text. Too bad recursive copy fields don't work -->
<copyField source="title" dest="text"/>
Expand All @@ -310,4 +312,5 @@
<copyField source="rightsUri" dest="text"/>
<copyField source="fullText" dest="text"/>
<copyField source="genre" dest="text"/>
<copyField source="altText" dest="text"/>
</schema>
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package edu.unc.lib.boxc.indexing.solr.filter;

import edu.unc.lib.boxc.indexing.solr.exception.IndexingException;
import edu.unc.lib.boxc.indexing.solr.indexing.DocumentIndexingPackage;
import edu.unc.lib.boxc.model.api.exceptions.NotFoundException;
import edu.unc.lib.boxc.model.api.objects.ContentObject;
import edu.unc.lib.boxc.model.api.objects.FileObject;
import edu.unc.lib.boxc.model.api.objects.RepositoryObjectLoader;
import edu.unc.lib.boxc.model.fcrepo.ids.DatastreamPids;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;

import static java.nio.charset.StandardCharsets.UTF_8;

/**
* Filter which populates alt text for the object being indexed
*
* @author bbpennel
*/
public class SetAltTextFilter implements IndexDocumentFilter {
private static final Logger log = LoggerFactory.getLogger(SetAltTextFilter.class);
private RepositoryObjectLoader repositoryObjectLoader;

@Override
public void filter(DocumentIndexingPackage dip) throws IndexingException {
ContentObject contentObj = dip.getContentObject();
// object being indexed must be a file object
if (!(contentObj instanceof FileObject)) {
return;
}

try {
var altTextPid = DatastreamPids.getAltTextPid(contentObj.getPid());
var altTextBinary = repositoryObjectLoader.getBinaryObject(altTextPid);
var altText = IOUtils.toString(altTextBinary.getBinaryStream(), UTF_8);
dip.getDocument().setAltText(altText);
} catch (NotFoundException e) {
log.debug("No alt text datastream found for {}", dip.getPid());
} catch (IOException e) {
throw new IndexingException("Failed to retrieve alt text datastream for {}" + dip.getPid(), e);
}
}

public void setRepositoryObjectLoader(RepositoryObjectLoader repositoryObjectLoader) {
this.repositoryObjectLoader = repositoryObjectLoader;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -56,28 +56,37 @@ private List<String> determineContentStatus(DocumentIndexingPackage dip)
}

if (obj instanceof FileObject) {
Resource parentResc = obj.getParent().getResource();
if (parentResc.hasProperty(Cdr.primaryObject, resc)) {
status.add(FacetConstants.IS_PRIMARY_OBJECT);
}
if (parentResc.hasProperty(Cdr.useAsThumbnail, resc)) {
status.add(FacetConstants.ASSIGNED_AS_THUMBNAIL);
}
if (hasAccessSurrogate(obj.getPid())) {
status.add(FacetConstants.HAS_ACCESS_SURROGATE);
} else {
status.add(FacetConstants.NO_ACCESS_SURROGATE);
}
if (resc.hasProperty(Cdr.streamingUrl)) {
status.add(FacetConstants.HAS_STREAMING);
} else {
status.add(FacetConstants.NO_STREAMING);
}
addFileObjectStatuses(obj, resc, status);
}

return status;
}

private void addFileObjectStatuses(ContentObject obj, Resource resc, List<String> status) {
Resource parentResc = obj.getParent().getResource();
if (parentResc.hasProperty(Cdr.primaryObject, resc)) {
status.add(FacetConstants.IS_PRIMARY_OBJECT);
}
if (parentResc.hasProperty(Cdr.useAsThumbnail, resc)) {
status.add(FacetConstants.ASSIGNED_AS_THUMBNAIL);
}
if (hasAccessSurrogate(obj.getPid())) {
status.add(FacetConstants.HAS_ACCESS_SURROGATE);
} else {
status.add(FacetConstants.NO_ACCESS_SURROGATE);
}
if (resc.hasProperty(Cdr.streamingUrl)) {
status.add(FacetConstants.HAS_STREAMING);
} else {
status.add(FacetConstants.NO_STREAMING);
}
if (resc.hasProperty(Cdr.hasAltText)) {
status.add(FacetConstants.HAS_ALT_TEXT);
} else {
status.add(FacetConstants.NO_ALT_TEXT);
}
}

private void addWorkObjectStatuses(List<String> status, Resource resource) {
if (resource.hasProperty(Cdr.primaryObject)) {
status.add(FacetConstants.HAS_PRIMARY_OBJECT);
Expand Down
Loading

0 comments on commit 765809a

Please sign in to comment.