diff --git a/src/main/java/com/github/dbmdz/solrocr/reader/BaseSourceReader.java b/src/main/java/com/github/dbmdz/solrocr/reader/BaseSourceReader.java index b4c4c6e1..52b66aaf 100644 --- a/src/main/java/com/github/dbmdz/solrocr/reader/BaseSourceReader.java +++ b/src/main/java/com/github/dbmdz/solrocr/reader/BaseSourceReader.java @@ -23,21 +23,21 @@ public abstract class BaseSourceReader implements SourceReader { * Array with a slot for every possible section in the source, of which only {@link * BaseSourceReader#maxCacheEntries} slots will ever be non-null */ - private CachedSection[] cache; + CachedSection[] cache; /** * Array of length {@link BaseSourceReader#maxCacheEntries} with the indexes of the sections that * are currently cached */ - private int[] cachedSectionIdxes; + int[] cachedSectionIdxes; - private int cacheSlotsUsed = 0; + int cacheSlotsUsed = 0; private enum AdjustDirection { LEFT, RIGHT } - private static final class CachedSection { + static final class CachedSection { public final Section section; public long lastUsedTimestampNs; diff --git a/src/main/java/solrocr/OcrHighlightComponent.java b/src/main/java/solrocr/OcrHighlightComponent.java index 4fb599cc..7390ff63 100644 --- a/src/main/java/solrocr/OcrHighlightComponent.java +++ b/src/main/java/solrocr/OcrHighlightComponent.java @@ -91,12 +91,23 @@ public void inform(SolrCore core) { Integer.parseInt(info.attributes.getOrDefault("maxQueuedPerThread", "8")); int sectionReadSize = Integer.parseInt(info.attributes.getOrDefault("sectionReadSizeKiB", "8")) * 1024; + if (sectionReadSize <= 0) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "Invalid sectionReadSizeKiB, must be > 0: " + sectionReadSize); + } int maxSectionCacheSize = - Integer.parseInt(info.attributes.getOrDefault("maxSectionCacheSizeKiB", "64")) * 1024; + Integer.parseInt(info.attributes.getOrDefault("maxSectionCacheSizeKiB", "-1")); + if (maxSectionCacheSize < 0) { + maxSectionCacheSize = sectionReadSize * 10; + } this.ocrHighlighter = new SolrOcrHighlighter( - numHlThreads, maxQueuedPerThread, sectionReadSize, maxSectionCacheSize); + numHlThreads, + maxQueuedPerThread, + sectionReadSize, + (int) Math.ceil((double) maxSectionCacheSize / sectionReadSize)); } @Override diff --git a/src/test/java/com/github/dbmdz/solrocr/reader/FileSourceReaderTest.java b/src/test/java/com/github/dbmdz/solrocr/reader/FileSourceReaderTest.java index 766c4605..8b88bc10 100644 --- a/src/test/java/com/github/dbmdz/solrocr/reader/FileSourceReaderTest.java +++ b/src/test/java/com/github/dbmdz/solrocr/reader/FileSourceReaderTest.java @@ -19,6 +19,28 @@ class FileSourceReaderTest { private final SourcePointer pointer = SourcePointer.parse(filePath.toString()); private final int maxCacheEntries = 10; + @Test + void shouldCacheSectionsProperly() throws IOException { + FileSourceReader reader = new FileSourceReader(filePath, pointer, 8192, 3); + reader.getAsciiSection(128); + assertThat(reader.cachedSectionIdxes).containsExactlyInAnyOrder(-1, -1, 0); + assertThat(reader.cache[0].section.start).isEqualTo(0); + assertThat(reader.cache[0].section.end).isEqualTo(8192); + reader.getAsciiSection(8192 + 128); + assertThat(reader.cachedSectionIdxes).containsExactlyInAnyOrder(-1, 1, 0); + assertThat(reader.cache[1]).isNotNull(); + reader.getAsciiSection(2 * 8192 + 128); + assertThat(reader.cachedSectionIdxes).containsExactlyInAnyOrder(2, 1, 0); + assertThat(reader.cache[2]).isNotNull(); + // Test cache eviction + reader.getAsciiSection(3 * 8192 + 128); + assertThat(reader.cachedSectionIdxes).containsExactlyInAnyOrder(2, 1, 3); + assertThat(reader.cache[0]).isNull(); + reader.getAsciiSection(16 * 8192 + 128); + assertThat(reader.cachedSectionIdxes).containsExactlyInAnyOrder(2, 16, 3); + assertThat(reader.cache[1]).isNull(); + } + @Test void shouldReadUtf8StringCorrectly() throws IOException { SourceReader reader = new FileSourceReader(filePath, pointer, 8192, maxCacheEntries);