Skip to content

Commit

Permalink
Merge pull request #1642 from UNC-Libraries/bxc-4376-virus
Browse files Browse the repository at this point in the history
BXC-4376 - Virus scan for large files
  • Loading branch information
sharonluong authored Dec 13, 2023
2 parents 1a929e7 + b674317 commit dc7dea2
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 49 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ public class VirusScanJob extends AbstractConcurrentDepositJob {
.getLogger(VirusScanJob.class);

private static final int MAX_RETRIES = 5;
private long maxStreamSize;

private ClamAVClient clamClient;

Expand Down Expand Up @@ -97,18 +98,20 @@ public void runJob() {
Path file = Paths.get(fileURI);

ScanResult result;
// Clamd is unable to find files with unicode characters in their path
if (charactersInBoundsForClam(file)) {
result = clamClient.scanWithResult(file);
} else {
// Scan files with unicode in their paths via streaming
try {
try {
if (shouldScanByPath(file)) {
// Scan entire file by path
log.debug("Scanning file {} by path", file);
result = clamClient.scanWithResult(file);
} else {
// Scanning via InputStream up to the max number of bytes
log.debug("Scanning file {} by stream", file);
result = clamClient.scanWithResult(Files.newInputStream(file));
} catch (IOException e) {
failures.put(fileURI.toString(), "Failed to scan file");
log.error("Unable to scan file {}", file, e);
return;
}
} catch (IOException e) {
failures.put(fileURI.toString(), "Failed to scan file");
log.error("Unable to scan file {}", file, e);
return;
}

switch (result.getStatus()) {
Expand Down Expand Up @@ -180,8 +183,23 @@ private boolean charactersInBoundsForClam(Path path) {
return CharMatcher.ascii().matchesAllOf(path.toString());
}

/**
* Determines if we should scan a file by its file path or use streaming. Files larger than the scanning
* limit or with characters in their path that clamd can't handle will return false.
* @param path
* @return
* @throws IOException
*/
private boolean shouldScanByPath(Path path) throws IOException {
return Files.size(path) < this.maxStreamSize && charactersInBoundsForClam(path);
}

// unused, no results to flush
@Override
protected void registrationAction() {
}
}

public void setMaxStreamSize(long maxStreamSize) {
this.maxStreamSize = maxStreamSize;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@
<property name="clamClient" ref="clamClient" />
<property name="maxQueuedJobs" value="${job.fileValidation.maxQueuedJobs:5}" />
<property name="executorService" ref="fileValidationExecutor" />
<property name="maxStreamSize" value="${clamd.maxStreamSize:64000000}" />
</bean>

<bean id="FixityCheckJob" class="edu.unc.lib.boxc.deposit.validate.FixityCheckJob"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,44 +1,10 @@
package edu.unc.lib.boxc.deposit.validate;

import static edu.unc.lib.boxc.common.test.TestHelpers.setField;
import static org.junit.jupiter.api.Assertions.fail;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyBoolean;
import static org.mockito.Matchers.anyInt;
import static org.mockito.Matchers.anyString;
import static org.mockito.Matchers.eq;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;

import java.io.File;
import java.io.InputStream;
import java.nio.file.Path;
import java.util.UUID;
import java.util.concurrent.ExecutorService;

import org.apache.commons.io.FileUtils;
import org.apache.jena.rdf.model.Bag;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.vocabulary.RDF;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.Mock;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;

import com.google.common.util.concurrent.MoreExecutors;

import edu.unc.lib.boxc.common.util.URIUtil;
import edu.unc.lib.boxc.deposit.api.RedisWorkerConstants.DepositState;
import edu.unc.lib.boxc.deposit.fcrepo4.AbstractDepositJobTest;
import edu.unc.lib.boxc.deposit.impl.model.DepositModelHelpers;
import edu.unc.lib.boxc.deposit.validate.VirusScanJob;
import edu.unc.lib.boxc.deposit.work.JobFailedException;
import edu.unc.lib.boxc.deposit.work.JobInterruptedException;
import edu.unc.lib.boxc.model.api.exceptions.RepositoryException;
Expand All @@ -51,6 +17,37 @@
import fi.solita.clamav.ClamAVClient;
import fi.solita.clamav.ScanResult;
import fi.solita.clamav.ScanResult.Status;
import org.apache.commons.io.FileUtils;
import org.apache.jena.rdf.model.Bag;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.vocabulary.RDF;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.Mock;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;

import java.io.File;
import java.io.InputStream;
import java.nio.file.Path;
import java.util.UUID;
import java.util.concurrent.ExecutorService;

import static edu.unc.lib.boxc.common.test.TestHelpers.setField;
import static org.junit.jupiter.api.Assertions.fail;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyBoolean;
import static org.mockito.Matchers.anyInt;
import static org.mockito.Matchers.anyString;
import static org.mockito.Matchers.eq;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;

/**
*
Expand Down Expand Up @@ -92,6 +89,7 @@ public PID answer(InvocationOnMock invocation) throws Throwable {
});

when(clamClient.scanWithResult(any(Path.class))).thenReturn(scanResult);
when(clamClient.scanWithResult(any(InputStream.class))).thenReturn(scanResult);

File examplesFile = new File("src/test/resources/examples");
FileUtils.copyDirectory(examplesFile, depositDir);
Expand All @@ -104,6 +102,7 @@ private void initializeJob() {
job.setJobUUID(jobUUID);
job.setDepositUUID(depositUUID);
job.setDepositDirectory(depositDir);
job.setMaxStreamSize(300l);
setField(job, "pidMinter", pidMinter);
job.setClamClient(clamClient);
job.setPremisLoggerFactory(premisLoggerFactory);
Expand Down Expand Up @@ -142,7 +141,8 @@ public void passScanTest() throws Exception {

job.run();

verify(clamClient, times(3)).scanWithResult(any(Path.class));
verify(clamClient, times(1)).scanWithResult(any(InputStream.class));
verify(clamClient, times(2)).scanWithResult(any(Path.class));

verify(jobStatusFactory).setTotalCompletion(eq(jobUUID), eq(3));
verify(jobStatusFactory, times(3)).incrCompletion(eq(jobUUID), eq(1));
Expand Down Expand Up @@ -189,9 +189,8 @@ public void failOneScanTest() throws Exception {
when(result2.getStatus()).thenReturn(Status.FOUND);
File pdfFile = new File(depositDir, "pdf.pdf");
File textFile = new File(depositDir, "text.txt");
when(clamClient.scanWithResult(any(Path.class)))
.thenReturn(scanResult)
.thenReturn(result2);
when(clamClient.scanWithResult(any(InputStream.class))).thenReturn(scanResult);
when(clamClient.scanWithResult(any(Path.class))).thenReturn(result2);

Model model = job.getWritableModel();
Bag depBag = model.createBag(depositPid.getRepositoryPath());
Expand Down

0 comments on commit dc7dea2

Please sign in to comment.