From 09447041deece0f9b31b4bdafd1a0782ccafa017 Mon Sep 17 00:00:00 2001 From: Will Boulter Date: Sun, 26 Jul 2020 12:08:44 +0100 Subject: [PATCH 1/3] Fixing modified utf8 issue --- src/main/c/jpostal_AddressExpander.c | 20 +++++++++++---- src/main/c/jpostal_AddressParser.c | 25 +++++++++++++------ .../com/mapzen/jpostal/AddressExpander.java | 11 ++++++-- .../com/mapzen/jpostal/AddressParser.java | 4 +-- .../com/mapzen/jpostal/ParsedComponent.java | 7 ++++++ 5 files changed, 51 insertions(+), 16 deletions(-) diff --git a/src/main/c/jpostal_AddressExpander.c b/src/main/c/jpostal_AddressExpander.c index 8e052c7..1cd43fd 100644 --- a/src/main/c/jpostal_AddressExpander.c +++ b/src/main/c/jpostal_AddressExpander.c @@ -26,8 +26,17 @@ JNIEXPORT void JNICALL Java_com_mapzen_jpostal_AddressExpander_setupDataDir } JNIEXPORT jobjectArray JNICALL Java_com_mapzen_jpostal_AddressExpander_libpostalExpand - (JNIEnv *env, jclass cls, jstring jAddress, jobject jOptions) { - const char *address = (*env)->GetStringUTFChars(env, jAddress, 0); + (JNIEnv *env, jclass cls, jbyteArray jAddress, jobject jOptions) { + jbyte* addressElements = (*env)->GetByteArrayElements(env, jAddress, NULL); + jsize size = (*env)->GetArrayLength(env, jAddress); + char address[size]; + + for (size_t z = 0; z < size; z++) { + address[z] = addressElements[z]; + } + (*env) -> ReleaseByteArrayElements(env, jAddress, addressElements, 0); + + address[size] = '\0'; size_t num_expansions = 0; libpostal_normalize_options_t options = libpostal_get_default_options(); @@ -210,16 +219,17 @@ JNIEXPORT jobjectArray JNICALL Java_com_mapzen_jpostal_AddressExpander_libpostal char **expansions = libpostal_expand_address((char *)address, options, &num_expansions); - (*env)->ReleaseStringUTFChars(env, jAddress, address); jobjectArray ret = (jobjectArray)(*env)->NewObjectArray(env, num_expansions, - (*env)->FindClass(env, "java/lang/String"), + (*env)->FindClass(env, "[B"), (*env)->NewStringUTF(env, "")); if (num_expansions > 0) { for (size_t i = 0; i < num_expansions; i++) { - (*env)->SetObjectArrayElement(env, ret, i, (*env)->NewStringUTF(env, expansions[i])); + jbyteArray bytes = (*env)->NewByteArray(env,strlen(expansions[i])); + (*env)->SetByteArrayRegion(env, bytes, 0, strlen(expansions[i]), (jbyte*) expansions[i]); + (*env)->SetObjectArrayElement(env, ret, i, bytes); } } diff --git a/src/main/c/jpostal_AddressParser.c b/src/main/c/jpostal_AddressParser.c index 8ecb7bc..cb9cd3d 100644 --- a/src/main/c/jpostal_AddressParser.c +++ b/src/main/c/jpostal_AddressParser.c @@ -1,5 +1,6 @@ #include #include +#include JNIEXPORT void JNICALL Java_com_mapzen_jpostal_AddressParser_setup (JNIEnv *env, jclass cls) { @@ -24,9 +25,19 @@ JNIEXPORT void JNICALL Java_com_mapzen_jpostal_AddressParser_setupDataDir } JNIEXPORT jobjectArray JNICALL Java_com_mapzen_jpostal_AddressParser_libpostalParse - (JNIEnv *env, jobject thisObj, jstring jAddress, jobject jOptions) { + (JNIEnv *env, jobject thisObj, jbyteArray jAddress, jobject jOptions) { + + jbyte* addressElements = (*env)->GetByteArrayElements(env, jAddress, NULL); + jsize size = (*env)->GetArrayLength(env, jAddress); + char address[size]; + + for (int i = 0; i < size; ++i) { + address[i] = addressElements[i]; + } + (*env) -> ReleaseByteArrayElements(env, jAddress, addressElements, 0); + + address[size] = '\0'; - const char *address = (*env)->GetStringUTFChars(env, jAddress, 0); libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options(); @@ -58,8 +69,6 @@ JNIEXPORT jobjectArray JNICALL Java_com_mapzen_jpostal_AddressParser_libpostalPa libpostal_address_parser_response_t *response = libpostal_parse_address((char *)address, options); - (*env)->ReleaseStringUTFChars(env, jAddress, address); - if (jLanguage != NULL) { (*env)->ReleaseStringUTFChars(env, jLanguage, 0); } @@ -71,7 +80,7 @@ JNIEXPORT jobjectArray JNICALL Java_com_mapzen_jpostal_AddressParser_libpostalPa jmethodID mid; jclass parsedComponentClass = (*env)->FindClass(env, "com/mapzen/jpostal/ParsedComponent"); - mid = (*env)->GetMethodID(env, parsedComponentClass, "", "(Ljava/lang/String;Ljava/lang/String;)V"); + mid = (*env)->GetMethodID(env, parsedComponentClass, "", "([BLjava/lang/String;)V"); size_t num_components = response != NULL ? response->num_components : 0; @@ -82,10 +91,12 @@ JNIEXPORT jobjectArray JNICALL Java_com_mapzen_jpostal_AddressParser_libpostalPa if (num_components > 0) { for (size_t i = 0; i < num_components; i++) { - jstring jComponent = (*env)->NewStringUTF(env, response->components[i]); jstring jLabel = (*env)->NewStringUTF(env, response->labels[i]); - jobject jParsedComponent = (*env)->NewObject(env, parsedComponentClass, mid, jComponent, jLabel); + jbyteArray bytes = (*env)->NewByteArray(env,strlen(response->components[i])); + (*env)->SetByteArrayRegion(env, bytes, 0, strlen(response->components[i]), (jbyte*) response->components[i]); + + jobject jParsedComponent = (*env)->NewObject(env, parsedComponentClass, mid, bytes, jLabel); (*env)->SetObjectArrayElement(env, ret, i, jParsedComponent); } diff --git a/src/main/java/com/mapzen/jpostal/AddressExpander.java b/src/main/java/com/mapzen/jpostal/AddressExpander.java index cc46099..3a569e6 100644 --- a/src/main/java/com/mapzen/jpostal/AddressExpander.java +++ b/src/main/java/com/mapzen/jpostal/AddressExpander.java @@ -2,6 +2,8 @@ import com.mapzen.jpostal.ExpanderOptions; +import java.nio.charset.StandardCharsets; + public class AddressExpander { static { System.loadLibrary("jpostal_expander"); // Load native library at runtime @@ -27,7 +29,7 @@ public static AddressExpander getInstance() { static native synchronized void setup(); static native synchronized void setupDataDir(String dataDir); - private static native synchronized String[] libpostalExpand(String address, ExpanderOptions options); + private static native synchronized byte[][] libpostalExpand(byte[] address, ExpanderOptions options); static native synchronized void teardown(); public String[] expandAddress(String address) { @@ -43,7 +45,12 @@ public String[] expandAddressWithOptions(String address, ExpanderOptions options } synchronized(this) { - return libpostalExpand(address, options); + byte[][] expansionBytes = libpostalExpand(address.getBytes(), options); + String expansions[] = new String[expansionBytes.length]; + for (int i = 0; i < expansionBytes.length; i++) { + expansions[i] = new String(expansionBytes[i], StandardCharsets.UTF_8); + } + return expansions; } } diff --git a/src/main/java/com/mapzen/jpostal/AddressParser.java b/src/main/java/com/mapzen/jpostal/AddressParser.java index fddc3f0..6ec633a 100644 --- a/src/main/java/com/mapzen/jpostal/AddressParser.java +++ b/src/main/java/com/mapzen/jpostal/AddressParser.java @@ -10,7 +10,7 @@ public class AddressParser { static native synchronized void setup(); static native synchronized void setupDataDir(String dataDir); - private native synchronized ParsedComponent[] libpostalParse(String address, ParserOptions options); + private native synchronized ParsedComponent[] libpostalParse(byte[] address, ParserOptions options); static native synchronized void teardown(); private volatile static AddressParser instance = null; @@ -43,7 +43,7 @@ public ParsedComponent[] parseAddressWithOptions(String address, ParserOptions o } synchronized(this) { - return libpostalParse(address, options); + return libpostalParse(address.getBytes(), options); } } diff --git a/src/main/java/com/mapzen/jpostal/ParsedComponent.java b/src/main/java/com/mapzen/jpostal/ParsedComponent.java index cae4f45..2440eb3 100644 --- a/src/main/java/com/mapzen/jpostal/ParsedComponent.java +++ b/src/main/java/com/mapzen/jpostal/ParsedComponent.java @@ -1,5 +1,7 @@ package com.mapzen.jpostal; +import java.nio.charset.StandardCharsets; + public class ParsedComponent { private String value; private String label; @@ -24,4 +26,9 @@ public ParsedComponent(String value, String label) { this.value = value; this.label = label; } + + public ParsedComponent(byte[] value, String label) { + this.value = new String(value, StandardCharsets.UTF_8); + this.label = label; + } } \ No newline at end of file From 0528780173103522d25bacc619ee43b345b2b39b Mon Sep 17 00:00:00 2001 From: Will Boulter Date: Sun, 26 Jul 2020 15:21:32 +0100 Subject: [PATCH 2/3] Adding tests for NUL and 4 byte issue --- .../mapzen/jpostal/TestAddressExpander.java | 13 +++++++++ .../com/mapzen/jpostal/TestAddressParser.java | 27 +++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/src/test/java/com/mapzen/jpostal/TestAddressExpander.java b/src/test/java/com/mapzen/jpostal/TestAddressExpander.java index 6fc944c..ba499b2 100644 --- a/src/test/java/com/mapzen/jpostal/TestAddressExpander.java +++ b/src/test/java/com/mapzen/jpostal/TestAddressExpander.java @@ -60,5 +60,18 @@ public void testEnglishExpansions() { assertTrue(containsExpansionWithOptions("30 West Twenty-sixth St Fl No. 7", "30 west 26th street floor number 7", englishOptions)); } + @Test + public void testNulTerminatedExpansion() { + assertTrue(containsExpansion("123 Main St\u0000", "123 main street")); + } + @Test + public void testAltNulTerminatedExpansion() { + assertTrue(containsExpansion("123 Main St\0", "123 main street")); + } + + @Test + public void test4ByteCharacterExpansion() { + assertTrue(containsExpansion("123 Main St, 𠜎𠜱𠝹𠱓, 😀🤠", "123 main street 𠜎𠜱𠝹𠱓 😀🤠")); + } } diff --git a/src/test/java/com/mapzen/jpostal/TestAddressParser.java b/src/test/java/com/mapzen/jpostal/TestAddressParser.java index 9c7dcb6..daca3d8 100644 --- a/src/test/java/com/mapzen/jpostal/TestAddressParser.java +++ b/src/test/java/com/mapzen/jpostal/TestAddressParser.java @@ -56,4 +56,31 @@ public void testParseUSAddress() { new ParsedComponent("usa", "country") ); } + + @Test + public void testParseNulTerminatedAddress() { + testParse("Rue du Médecin-Colonel Calbairac Toulouse France\u0000", + new ParsedComponent("rue du médecin-colonel calbairac", "road"), + new ParsedComponent("toulouse", "city"), + new ParsedComponent("france", "country") + ); + } + + @Test + public void testParseAltNulTerminatedAddress() { + testParse("Rue du Médecin-Colonel Calbairac Toulouse France\0", + new ParsedComponent("rue du médecin-colonel calbairac", "road"), + new ParsedComponent("toulouse", "city"), + new ParsedComponent("france", "country") + ); + } + + @Test + public void testParse4ByteCharacterAddress() { + testParse("𠜎𠜱𠝹𠱓, 😀🤠, London, UK", + new ParsedComponent("𠜎𠜱𠝹𠱓 😀🤠", "house"), + new ParsedComponent("london", "city"), + new ParsedComponent("uk", "country") + ); + } } \ No newline at end of file From 238bed7409c8bc49ec2ae613f5ae9e7694194e73 Mon Sep 17 00:00:00 2001 From: Will Boulter Date: Tue, 28 Jul 2020 13:21:21 +0100 Subject: [PATCH 3/3] Trying to fix TLS issue --- gradle.properties | 1 + 1 file changed, 1 insertion(+) create mode 100644 gradle.properties diff --git a/gradle.properties b/gradle.properties new file mode 100644 index 0000000..8e19841 --- /dev/null +++ b/gradle.properties @@ -0,0 +1 @@ +systemProp.https.protocols=TLSv1.2