From 8d684cee8e0d95e6a48a78e4370d4247d97f1139 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Mon, 25 Mar 2024 10:05:24 +0100 Subject: [PATCH 1/4] Allow `zim::Archive` to be created with a set of File descriptor. --- lib/build.gradle | 3 +- lib/src/main/cpp/libzim/archive.cpp | 65 +++++++++++++++++++ .../main/java/org/kiwix/libzim/Archive.java | 15 +++++ .../main/java/org/kiwix/libzim/FdInput.java | 29 +++++++++ 4 files changed, 111 insertions(+), 1 deletion(-) create mode 100644 lib/src/main/java/org/kiwix/libzim/FdInput.java diff --git a/lib/build.gradle b/lib/build.gradle index f912277..93b6029 100644 --- a/lib/build.gradle +++ b/lib/build.gradle @@ -354,7 +354,8 @@ String getLibzimFiles() { "${projectDir}/src/main/java/org/kiwix/libzim/SuggestionSearcher.java " + "${projectDir}/src/main/java/org/kiwix/libzim/SuggestionSearch.java " + "${projectDir}/src/main/java/org/kiwix/libzim/ZimFileFormatException.java " + - "${projectDir}/src/main/java/org/kiwix/libzim/EntryNotFoundException.java" + "${projectDir}/src/main/java/org/kiwix/libzim/EntryNotFoundException.java " + + "${projectDir}/src/main/java/org/kiwix/libzim/FdInput.java" } task buildLinuxBinding(type: Exec) { diff --git a/lib/src/main/cpp/libzim/archive.cpp b/lib/src/main/cpp/libzim/archive.cpp index 5d61a19..2b976e5 100644 --- a/lib/src/main/cpp/libzim/archive.cpp +++ b/lib/src/main/cpp/libzim/archive.cpp @@ -62,6 +62,22 @@ int jni2fd(const jobject& fdObj, JNIEnv* env) return env->GetIntField(fdObj, field_fd); } +zim::FdInput jni2fdInput(const jobject& fdInputObj, JNIEnv* env) +{ + jclass class_fdesc = env->FindClass("org/kiwix/FdInput"); + jfieldID field_id = env->GetFieldID(class_fdesc, "fd", "java/io/FileDescriptor"); + jobject fdObj = env->GetField(fdInputObj, field_id); + int fd = jni2fd(fdObj, env); + + field_id = env->GetFieldID(class_fdesc, "offset", "J"); + long offset = env->GetLongField(fdObj, field_id); + + field_id = env->GetFieldID(class_fdesc, "size", "J"); + long size = env->GetLongField(fdObj, field_id); + + return zim::FdInput(fd, offset, size); +} + } // unnamed namespace JNIEXPORT void JNICALL Java_org_kiwix_libzim_Archive_setNativeArchiveByFD( @@ -104,6 +120,55 @@ JNIEXPORT void JNICALL Java_org_kiwix_libzim_Archive_setNativeArchiveEmbedded( #endif } CATCH_EXCEPTION() +JNIEXPORT void JNICALL Java_org_kiwix_libzim_Archive_setNativeArchiveEmbeddedFd( + JNIEnv* env, jobject thisObj, jobject fdObj) try +{ +#ifndef _WIN32 + auto fdInput = jni2fdInput(fdObj, env); + + LOG("Attempting to create reader with fd: %d", fdInput); + try { + auto archive = std::make_shared(fdInput); + SET_PTR(archive); + } catch (std::exception& e) { + LOG("Error opening ZIM file"); + LOG("%s", e.what()); + } +#else + jclass exception = env->FindClass("java/lang/UnsupportedOperationException"); + env->ThrowNew(exception, "org.kiwix.libzim.Archive.setNativeArchiveEmbedded() is not supported under Windows"); +#endif +} CATCH_EXCEPTION() + + +JNIEXPORT void JNICALL Java_org_kiwix_libzim_Archive_setNativeArchiveEmbeddedFds( + JNIEnv* env, jobject thisObj, jobjectArray fdsObj) try +{ +#ifndef _WIN32 + + jsize length = env->GetArrayLength(fdsObj); + std::vector v(length); + + int i; + for(i = 0; iGetObjectArrayElement(fdsObj, i); + auto fdInput = jni2fdInput(fdObj, env); + v.push_pack(fdInput); + } + + try { + auto archive = std::make_shared(v); + SET_PTR(archive); + } catch (std::exception& e) { + LOG("Error opening ZIM file"); + LOG("%s", e.what()); + } +#else + jclass exception = env->FindClass("java/lang/UnsupportedOperationException"); + env->ThrowNew(exception, "org.kiwix.libzim.Archive.setNativeArchiveEmbedded() is not supported under Windows"); +#endif +} CATCH_EXCEPTION() + DISPOSE GETTER(jstring, getFilename) diff --git a/lib/src/main/java/org/kiwix/libzim/Archive.java b/lib/src/main/java/org/kiwix/libzim/Archive.java index 7ac9663..a15bf4d 100644 --- a/lib/src/main/java/org/kiwix/libzim/Archive.java +++ b/lib/src/main/java/org/kiwix/libzim/Archive.java @@ -23,6 +23,7 @@ import org.kiwix.libzim.Entry; import org.kiwix.libzim.Item; import org.kiwix.libzim.EntryIterator; +import org.kiwix.libzim.FdInput; import java.io.FileDescriptor; public class Archive @@ -44,6 +45,18 @@ public Archive(FileDescriptor fd, long offset, long size) setNativeArchiveEmbedded(fd, offset, size); } + public Archive(FdInput fd) + throws ZimFileFormatException + { + setNativeArchiveEmbeddedFd(fd); + } + + public Archive(FdInput[] fds) + throws ZimFileFormatException + { + setNativeArchiveEmbeddedFds(fds); + } + public native String getFilename(); public native long getFilesize(); public native int getAllEntryCount(); @@ -94,6 +107,8 @@ public Archive(FileDescriptor fd, long offset, long size) private native void setNativeArchive(String filename); private native void setNativeArchiveByFD(FileDescriptor fd); private native void setNativeArchiveEmbedded(FileDescriptor fd, long offset, long size); + private native void setNativeArchiveEmbeddedFd(FdInput fd); + private native void setNativeArchiveEmbeddedFds(FdInput[] fds); @Override protected void finalize() { dispose(); } diff --git a/lib/src/main/java/org/kiwix/libzim/FdInput.java b/lib/src/main/java/org/kiwix/libzim/FdInput.java new file mode 100644 index 0000000..c8f1c84 --- /dev/null +++ b/lib/src/main/java/org/kiwix/libzim/FdInput.java @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2017 Matthieu Gautier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +package org.kiwix.libzim; + +import java.io.FileDescriptor; + +public class FdInput +{ + public FileDescriptor fd; + public long offset; + public long size; +} From 6c0e60c9c8755824d59a8608539d0ddd2edc01f3 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Mon, 22 Apr 2024 11:05:23 +0200 Subject: [PATCH 2/4] fixup! Allow `zim::Archive` to be created with a set of File descriptor. --- lib/src/main/cpp/libzim/archive.cpp | 15 ++++++++------- lib/src/main/java/org/kiwix/libzim/FdInput.java | 6 ++++++ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/lib/src/main/cpp/libzim/archive.cpp b/lib/src/main/cpp/libzim/archive.cpp index 2b976e5..92e3f92 100644 --- a/lib/src/main/cpp/libzim/archive.cpp +++ b/lib/src/main/cpp/libzim/archive.cpp @@ -64,16 +64,17 @@ int jni2fd(const jobject& fdObj, JNIEnv* env) zim::FdInput jni2fdInput(const jobject& fdInputObj, JNIEnv* env) { - jclass class_fdesc = env->FindClass("org/kiwix/FdInput"); - jfieldID field_id = env->GetFieldID(class_fdesc, "fd", "java/io/FileDescriptor"); - jobject fdObj = env->GetField(fdInputObj, field_id); + jclass class_fdesc = env->FindClass("org/kiwix/libzim/FdInput"); + + jfieldID field_id = env->GetFieldID(class_fdesc, "fd", "Ljava/io/FileDescriptor;"); + jobject fdObj = env->GetObjectField(fdInputObj, field_id); int fd = jni2fd(fdObj, env); field_id = env->GetFieldID(class_fdesc, "offset", "J"); - long offset = env->GetLongField(fdObj, field_id); + long offset = env->GetLongField(fdInputObj, field_id); field_id = env->GetFieldID(class_fdesc, "size", "J"); - long size = env->GetLongField(fdObj, field_id); + long size = env->GetLongField(fdInputObj, field_id); return zim::FdInput(fd, offset, size); } @@ -147,13 +148,13 @@ JNIEXPORT void JNICALL Java_org_kiwix_libzim_Archive_setNativeArchiveEmbeddedFds #ifndef _WIN32 jsize length = env->GetArrayLength(fdsObj); - std::vector v(length); + std::vector v; int i; for(i = 0; iGetObjectArrayElement(fdsObj, i); auto fdInput = jni2fdInput(fdObj, env); - v.push_pack(fdInput); + v.push_back(fdInput); } try { diff --git a/lib/src/main/java/org/kiwix/libzim/FdInput.java b/lib/src/main/java/org/kiwix/libzim/FdInput.java index c8f1c84..f361236 100644 --- a/lib/src/main/java/org/kiwix/libzim/FdInput.java +++ b/lib/src/main/java/org/kiwix/libzim/FdInput.java @@ -26,4 +26,10 @@ public class FdInput public FileDescriptor fd; public long offset; public long size; + + public FdInput(FileDescriptor fd_, long offset_, long size_) { + fd = fd_; + offset = offset_; + size = size_; + } } From 1d49be8b99b6ab328cb45b40439705beecdce760 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Mon, 22 Apr 2024 11:05:57 +0200 Subject: [PATCH 3/4] Do not catch (and hide) exception inner exception. --- lib/src/main/cpp/libzim/archive.cpp | 37 +++++++---------------------- 1 file changed, 8 insertions(+), 29 deletions(-) diff --git a/lib/src/main/cpp/libzim/archive.cpp b/lib/src/main/cpp/libzim/archive.cpp index 92e3f92..c3b2fbc 100644 --- a/lib/src/main/cpp/libzim/archive.cpp +++ b/lib/src/main/cpp/libzim/archive.cpp @@ -88,13 +88,8 @@ JNIEXPORT void JNICALL Java_org_kiwix_libzim_Archive_setNativeArchiveByFD( int fd = jni2fd(fdObj, env); LOG("Attempting to create reader with fd: %d", fd); - try { - auto archive = std::make_shared(fd); - SET_PTR(archive); - } catch (std::exception& e) { - LOG("Error opening ZIM file"); - LOG("%s", e.what()); - } + auto archive = std::make_shared(fd); + SET_PTR(archive); #else jclass exception = env->FindClass("java/lang/UnsupportedOperationException"); env->ThrowNew(exception, "org.kiwix.libzim.Archive.setNativeArchiveByFD() is not supported under Windows"); @@ -108,13 +103,8 @@ JNIEXPORT void JNICALL Java_org_kiwix_libzim_Archive_setNativeArchiveEmbedded( int fd = jni2fd(fdObj, env); LOG("Attempting to create reader with fd: %d", fd); - try { - auto archive = std::make_shared(fd, offset, size); - SET_PTR(archive); - } catch (std::exception& e) { - LOG("Error opening ZIM file"); - LOG("%s", e.what()); - } + auto archive = std::make_shared(fd, offset, size); + SET_PTR(archive); #else jclass exception = env->FindClass("java/lang/UnsupportedOperationException"); env->ThrowNew(exception, "org.kiwix.libzim.Archive.setNativeArchiveEmbedded() is not supported under Windows"); @@ -127,14 +117,8 @@ JNIEXPORT void JNICALL Java_org_kiwix_libzim_Archive_setNativeArchiveEmbeddedFd( #ifndef _WIN32 auto fdInput = jni2fdInput(fdObj, env); - LOG("Attempting to create reader with fd: %d", fdInput); - try { - auto archive = std::make_shared(fdInput); - SET_PTR(archive); - } catch (std::exception& e) { - LOG("Error opening ZIM file"); - LOG("%s", e.what()); - } + auto archive = std::make_shared(fdInput); + SET_PTR(archive); #else jclass exception = env->FindClass("java/lang/UnsupportedOperationException"); env->ThrowNew(exception, "org.kiwix.libzim.Archive.setNativeArchiveEmbedded() is not supported under Windows"); @@ -157,13 +141,8 @@ JNIEXPORT void JNICALL Java_org_kiwix_libzim_Archive_setNativeArchiveEmbeddedFds v.push_back(fdInput); } - try { - auto archive = std::make_shared(v); - SET_PTR(archive); - } catch (std::exception& e) { - LOG("Error opening ZIM file"); - LOG("%s", e.what()); - } + auto archive = std::make_shared(v); + SET_PTR(archive); #else jclass exception = env->FindClass("java/lang/UnsupportedOperationException"); env->ThrowNew(exception, "org.kiwix.libzim.Archive.setNativeArchiveEmbedded() is not supported under Windows"); From d2ace702dfd295d48b2aa44577529963cd37363c Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Mon, 22 Apr 2024 11:07:23 +0200 Subject: [PATCH 4/4] Update tests. --- .../org/kiwix/test/libzim/TestArchive.java | 12 +++ lib/src/test/test.java | 74 ++++++++++++++++++- 2 files changed, 84 insertions(+), 2 deletions(-) diff --git a/lib/src/test/org/kiwix/test/libzim/TestArchive.java b/lib/src/test/org/kiwix/test/libzim/TestArchive.java index 90ba533..edba6e7 100644 --- a/lib/src/test/org/kiwix/test/libzim/TestArchive.java +++ b/lib/src/test/org/kiwix/test/libzim/TestArchive.java @@ -47,6 +47,18 @@ public TestArchive(FileDescriptor fd, long offset, long size) inner = new Archive(fd, offset, size); } + public TestArchive(FdInput fd) + throws ZimFileFormatException + { + inner = new Archive(fd); + } + + public TestArchive(FdInput[] fds) + throws ZimFileFormatException + { + inner = new Archive(fds); + } + public String getFilename() { return inner.getFilename(); } public long getFilesize() { return inner.getFilesize(); } public int getAllEntryCount() { return inner.getAllEntryCount(); } diff --git a/lib/src/test/test.java b/lib/src/test/test.java index af34fd9..ce41834 100644 --- a/lib/src/test/test.java +++ b/lib/src/test/test.java @@ -83,11 +83,9 @@ private void testArchive(TestArchive archive) assertTrue(Arrays.equals(faviconData, readData)); // Checking all metadata - assertFalse(archive.isMultiPart()); assertTrue(archive.hasNewNamespaceScheme()); assertTrue(archive.hasChecksum()); assertEquals("4a2709fddbee8c27db708c20b4952a06", archive.getChecksum()); - assertTrue(archive.hasTitleIndex()); assertTrue(archive.hasFulltextIndex()); assertTrue(archive.hasMainEntry()); long[] illuSizes = {48}; @@ -221,6 +219,8 @@ public void testArchiveDirect() { TestArchive archive = new TestArchive("small.zim"); testArchive(archive); + assertFalse(archive.isMultiPart()); + assertTrue(archive.hasTitleIndex()); assertTrue(archive.check()); assertEquals("small.zim", archive.getFilename()); } @@ -261,6 +261,26 @@ public void testArchiveByFd() FileInputStream fis = new FileInputStream("small.zim"); TestArchive archive = new TestArchive(fis.getFD()); testArchive(archive); + assertFalse(archive.isMultiPart()); + assertTrue(archive.hasTitleIndex()); + assertTrue(archive.check()); + assertEquals("", archive.getFilename()); + } + System.gc(); + System.runFinalization(); + } + + @Test + public void testArchiveByFdInput() + throws JNIKiwixException, IOException, ZimFileFormatException, EntryNotFoundException { + { + File plainArchive = new File("small.zim"); + FileInputStream fis = new FileInputStream("small.zim"); + FdInput fd = new FdInput(fis.getFD(), 0, plainArchive.length()); + TestArchive archive = new TestArchive(fd); + testArchive(archive); + assertFalse(archive.isMultiPart()); + assertTrue(archive.hasTitleIndex()); assertTrue(archive.check()); assertEquals("", archive.getFilename()); } @@ -278,6 +298,56 @@ public void testArchiveWithAnEmbeddedArchive() // This fails. See https://github.com/openzim/libzim/issues/812 //assertTrue(archive.check()); testArchive(archive); + assertFalse(archive.isMultiPart()); + assertTrue(archive.hasTitleIndex()); + assertEquals("", archive.getFilename()); + } + System.gc(); + System.runFinalization(); + } + + @Test + public void testArchiveWithAnEmbeddedArchiveFdInputNaive() + throws JNIKiwixException, IOException, ZimFileFormatException, EntryNotFoundException { + { + File plainArchive = new File("small.zim"); + FileInputStream fis = new FileInputStream("small.zim.embedded"); + FdInput fd1 = new FdInput(fis.getFD(), 8, plainArchive.length() / 2); + FdInput fd2 = new FdInput(fis.getFD(), fd1.offset + fd1.size, plainArchive.length() - fd1.size); + + FdInput fds[] = {fd1, fd2}; + + TestArchive archive = new TestArchive(fds); + // This fails. See https://github.com/openzim/libzim/issues/812 + //assertTrue(archive.check()); + testArchive(archive); + assertTrue(archive.isMultiPart()); + //Naive split cut the title index in the middle. libzim cannot read it. + assertFalse(archive.hasTitleIndex()); + assertEquals("", archive.getFilename()); + } + System.gc(); + System.runFinalization(); + } + + @Test + public void testArchiveWithAnEmbeddedArchiveFdInput() + throws JNIKiwixException, IOException, ZimFileFormatException, EntryNotFoundException { + { + File plainArchive = new File("small.zim"); + FileInputStream fis = new FileInputStream("small.zim.embedded"); + FdInput fd1 = new FdInput(fis.getFD(), 8, plainArchive.length() / 10); + FdInput fd2 = new FdInput(fis.getFD(), fd1.offset + fd1.size, plainArchive.length() - fd1.size); + + FdInput fds[] = {fd1, fd2}; + + TestArchive archive = new TestArchive(fds); + // This fails. See https://github.com/openzim/libzim/issues/812 + //assertTrue(archive.check()); + testArchive(archive); + assertTrue(archive.isMultiPart()); + //If we don't cut in the middle of xapian db, we can read it. + assertTrue(archive.hasTitleIndex()); assertEquals("", archive.getFilename()); } System.gc();