Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix modified utf-8 issues #38

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions gradle.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
systemProp.https.protocols=TLSv1.2
20 changes: 15 additions & 5 deletions src/main/c/jpostal_AddressExpander.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,17 @@ JNIEXPORT void JNICALL Java_com_mapzen_jpostal_AddressExpander_setupDataDir
}

JNIEXPORT jobjectArray JNICALL Java_com_mapzen_jpostal_AddressExpander_libpostalExpand
(JNIEnv *env, jclass cls, jstring jAddress, jobject jOptions) {
const char *address = (*env)->GetStringUTFChars(env, jAddress, 0);
(JNIEnv *env, jclass cls, jbyteArray jAddress, jobject jOptions) {
jbyte* addressElements = (*env)->GetByteArrayElements(env, jAddress, NULL);
jsize size = (*env)->GetArrayLength(env, jAddress);
char address[size];

for (size_t z = 0; z < size; z++) {
address[z] = addressElements[z];
}
(*env) -> ReleaseByteArrayElements(env, jAddress, addressElements, 0);

address[size] = '\0';

size_t num_expansions = 0;
libpostal_normalize_options_t options = libpostal_get_default_options();
Expand Down Expand Up @@ -210,16 +219,17 @@ JNIEXPORT jobjectArray JNICALL Java_com_mapzen_jpostal_AddressExpander_libpostal

char **expansions = libpostal_expand_address((char *)address, options, &num_expansions);

(*env)->ReleaseStringUTFChars(env, jAddress, address);

jobjectArray ret = (jobjectArray)(*env)->NewObjectArray(env,
num_expansions,
(*env)->FindClass(env, "java/lang/String"),
(*env)->FindClass(env, "[B"),
(*env)->NewStringUTF(env, ""));

if (num_expansions > 0) {
for (size_t i = 0; i < num_expansions; i++) {
(*env)->SetObjectArrayElement(env, ret, i, (*env)->NewStringUTF(env, expansions[i]));
jbyteArray bytes = (*env)->NewByteArray(env,strlen(expansions[i]));
(*env)->SetByteArrayRegion(env, bytes, 0, strlen(expansions[i]), (jbyte*) expansions[i]);
(*env)->SetObjectArrayElement(env, ret, i, bytes);
}

}
Expand Down
25 changes: 18 additions & 7 deletions src/main/c/jpostal_AddressParser.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <jni.h>
#include <libpostal/libpostal.h>
#include <string.h>

JNIEXPORT void JNICALL Java_com_mapzen_jpostal_AddressParser_setup
(JNIEnv *env, jclass cls) {
Expand All @@ -24,9 +25,19 @@ JNIEXPORT void JNICALL Java_com_mapzen_jpostal_AddressParser_setupDataDir
}

JNIEXPORT jobjectArray JNICALL Java_com_mapzen_jpostal_AddressParser_libpostalParse
(JNIEnv *env, jobject thisObj, jstring jAddress, jobject jOptions) {
(JNIEnv *env, jobject thisObj, jbyteArray jAddress, jobject jOptions) {

jbyte* addressElements = (*env)->GetByteArrayElements(env, jAddress, NULL);
jsize size = (*env)->GetArrayLength(env, jAddress);
char address[size];

for (int i = 0; i < size; ++i) {
address[i] = addressElements[i];
}
(*env) -> ReleaseByteArrayElements(env, jAddress, addressElements, 0);

address[size] = '\0';

const char *address = (*env)->GetStringUTFChars(env, jAddress, 0);

libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();

Expand Down Expand Up @@ -58,8 +69,6 @@ JNIEXPORT jobjectArray JNICALL Java_com_mapzen_jpostal_AddressParser_libpostalPa

libpostal_address_parser_response_t *response = libpostal_parse_address((char *)address, options);

(*env)->ReleaseStringUTFChars(env, jAddress, address);

if (jLanguage != NULL) {
(*env)->ReleaseStringUTFChars(env, jLanguage, 0);
}
Expand All @@ -71,7 +80,7 @@ JNIEXPORT jobjectArray JNICALL Java_com_mapzen_jpostal_AddressParser_libpostalPa
jmethodID mid;

jclass parsedComponentClass = (*env)->FindClass(env, "com/mapzen/jpostal/ParsedComponent");
mid = (*env)->GetMethodID(env, parsedComponentClass, "<init>", "(Ljava/lang/String;Ljava/lang/String;)V");
mid = (*env)->GetMethodID(env, parsedComponentClass, "<init>", "([BLjava/lang/String;)V");

size_t num_components = response != NULL ? response->num_components : 0;

Expand All @@ -82,10 +91,12 @@ JNIEXPORT jobjectArray JNICALL Java_com_mapzen_jpostal_AddressParser_libpostalPa

if (num_components > 0) {
for (size_t i = 0; i < num_components; i++) {
jstring jComponent = (*env)->NewStringUTF(env, response->components[i]);
jstring jLabel = (*env)->NewStringUTF(env, response->labels[i]);

jobject jParsedComponent = (*env)->NewObject(env, parsedComponentClass, mid, jComponent, jLabel);
jbyteArray bytes = (*env)->NewByteArray(env,strlen(response->components[i]));
(*env)->SetByteArrayRegion(env, bytes, 0, strlen(response->components[i]), (jbyte*) response->components[i]);

jobject jParsedComponent = (*env)->NewObject(env, parsedComponentClass, mid, bytes, jLabel);

(*env)->SetObjectArrayElement(env, ret, i, jParsedComponent);
}
Expand Down
11 changes: 9 additions & 2 deletions src/main/java/com/mapzen/jpostal/AddressExpander.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import com.mapzen.jpostal.ExpanderOptions;

import java.nio.charset.StandardCharsets;

public class AddressExpander {
static {
System.loadLibrary("jpostal_expander"); // Load native library at runtime
Expand All @@ -27,7 +29,7 @@ public static AddressExpander getInstance() {

static native synchronized void setup();
static native synchronized void setupDataDir(String dataDir);
private static native synchronized String[] libpostalExpand(String address, ExpanderOptions options);
private static native synchronized byte[][] libpostalExpand(byte[] address, ExpanderOptions options);
static native synchronized void teardown();

public String[] expandAddress(String address) {
Expand All @@ -43,7 +45,12 @@ public String[] expandAddressWithOptions(String address, ExpanderOptions options
}

synchronized(this) {
return libpostalExpand(address, options);
byte[][] expansionBytes = libpostalExpand(address.getBytes(), options);
String expansions[] = new String[expansionBytes.length];
for (int i = 0; i < expansionBytes.length; i++) {
expansions[i] = new String(expansionBytes[i], StandardCharsets.UTF_8);
}
return expansions;
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/main/java/com/mapzen/jpostal/AddressParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ public class AddressParser {

static native synchronized void setup();
static native synchronized void setupDataDir(String dataDir);
private native synchronized ParsedComponent[] libpostalParse(String address, ParserOptions options);
private native synchronized ParsedComponent[] libpostalParse(byte[] address, ParserOptions options);
static native synchronized void teardown();

private volatile static AddressParser instance = null;
Expand Down Expand Up @@ -43,7 +43,7 @@ public ParsedComponent[] parseAddressWithOptions(String address, ParserOptions o
}

synchronized(this) {
return libpostalParse(address, options);
return libpostalParse(address.getBytes(), options);
}
}

Expand Down
7 changes: 7 additions & 0 deletions src/main/java/com/mapzen/jpostal/ParsedComponent.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package com.mapzen.jpostal;

import java.nio.charset.StandardCharsets;

public class ParsedComponent {
private String value;
private String label;
Expand All @@ -24,4 +26,9 @@ public ParsedComponent(String value, String label) {
this.value = value;
this.label = label;
}

public ParsedComponent(byte[] value, String label) {
this.value = new String(value, StandardCharsets.UTF_8);
this.label = label;
}
}
13 changes: 13 additions & 0 deletions src/test/java/com/mapzen/jpostal/TestAddressExpander.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,5 +60,18 @@ public void testEnglishExpansions() {
assertTrue(containsExpansionWithOptions("30 West Twenty-sixth St Fl No. 7", "30 west 26th street floor number 7", englishOptions));
}

@Test
public void testNulTerminatedExpansion() {
assertTrue(containsExpansion("123 Main St\u0000", "123 main street"));
}

@Test
public void testAltNulTerminatedExpansion() {
assertTrue(containsExpansion("123 Main St\0", "123 main street"));
}

@Test
public void test4ByteCharacterExpansion() {
assertTrue(containsExpansion("123 Main St, 𠜎𠜱𠝹𠱓, 😀🤠", "123 main street 𠜎𠜱𠝹𠱓 😀🤠"));
}
}
27 changes: 27 additions & 0 deletions src/test/java/com/mapzen/jpostal/TestAddressParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -56,4 +56,31 @@ public void testParseUSAddress() {
new ParsedComponent("usa", "country")
);
}

@Test
public void testParseNulTerminatedAddress() {
testParse("Rue du Médecin-Colonel Calbairac Toulouse France\u0000",
new ParsedComponent("rue du médecin-colonel calbairac", "road"),
new ParsedComponent("toulouse", "city"),
new ParsedComponent("france", "country")
);
}

@Test
public void testParseAltNulTerminatedAddress() {
testParse("Rue du Médecin-Colonel Calbairac Toulouse France\0",
new ParsedComponent("rue du médecin-colonel calbairac", "road"),
new ParsedComponent("toulouse", "city"),
new ParsedComponent("france", "country")
);
}

@Test
public void testParse4ByteCharacterAddress() {
testParse("𠜎𠜱𠝹𠱓, 😀🤠, London, UK",
new ParsedComponent("𠜎𠜱𠝹𠱓 😀🤠", "house"),
new ParsedComponent("london", "city"),
new ParsedComponent("uk", "country")
);
}
}