From 96f126dd59ff8f3b12e57b96539ad2262b9360a2 Mon Sep 17 00:00:00 2001 From: Erica Fischer Date: Tue, 13 Feb 2024 14:18:30 -0800 Subject: [PATCH] FSL-style expressions can use unidecode data to smash case and diacritics (#197) * Read unidecode data, do some plumbing of it * More unidecode plumbing * Do the unidecode smashing, but it doesn't seem to be working * Ah, that's better! * Add missing header * And reorder the includes too * Shortcut when there is no unidecode data to work with * Update version and changelog * Avoid repeated unidecode smashing of the same constant string --- CHANGELOG.md | 4 +++ clip.cpp | 10 +++--- evaluator.cpp | 80 +++++++++++++++++++++++++++++++++------------ evaluator.hpp | 4 +-- geometry.hpp | 6 ++-- jsonpull/jsonpull.c | 1 + jsonpull/jsonpull.h | 1 + main.cpp | 6 +++- overzoom.cpp | 9 ++++- text.cpp | 74 ++++++++++++++++++++++++++++++++++++++++- text.hpp | 2 ++ tile-join.cpp | 8 +++-- tile.cpp | 18 ++++++---- tile.hpp | 2 +- version.hpp | 2 +- 15 files changed, 183 insertions(+), 44 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5666c996e..e39dc8066 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# 2.44.0 + +* Add --unidecode-data option to allow case-insensitive filter comparisons of transliterated strings + # 2.43.0 * Change -fraction-as-needed feature dropping to be consistent across tiles and zoom levels, and to follow the same pattern as point dropping by zoom level diff --git a/clip.cpp b/clip.cpp index ee8bb9e8f..4972afa24 100644 --- a/clip.cpp +++ b/clip.cpp @@ -758,7 +758,7 @@ static std::vector> clip_poly1(std::vector const &keep, bool do_compress, std::vector> *next_overzoomed_tiles, - bool demultiply, json_object *filter, bool preserve_input_order, std::unordered_map const &attribute_accum) { + bool demultiply, json_object *filter, bool preserve_input_order, std::unordered_map const &attribute_accum, std::vector const &unidecode_data) { mvt_tile tile; try { @@ -772,7 +772,7 @@ std::string overzoom(const std::string &s, int oz, int ox, int oy, int nz, int n exit(EXIT_PROTOBUF); } - return overzoom(tile, oz, ox, oy, nz, nx, ny, detail, buffer, keep, do_compress, next_overzoomed_tiles, demultiply, filter, preserve_input_order, attribute_accum); + return overzoom(tile, oz, ox, oy, nz, nx, ny, detail, buffer, keep, do_compress, next_overzoomed_tiles, demultiply, filter, preserve_input_order, attribute_accum, unidecode_data); } struct tile_feature { @@ -873,7 +873,7 @@ static struct preservecmp { std::string overzoom(const mvt_tile &tile, int oz, int ox, int oy, int nz, int nx, int ny, int detail, int buffer, std::set const &keep, bool do_compress, std::vector> *next_overzoomed_tiles, - bool demultiply, json_object *filter, bool preserve_input_order, std::unordered_map const &attribute_accum) { + bool demultiply, json_object *filter, bool preserve_input_order, std::unordered_map const &attribute_accum, std::vector const &unidecode_data) { mvt_tile outtile; std::shared_ptr tile_stringpool = std::make_shared(); @@ -924,7 +924,7 @@ std::string overzoom(const mvt_tile &tile, int oz, int ox, int oy, int nz, int n } std::set exclude_attributes; - if (filter != NULL && !evaluate(feature, layer, filter, exclude_attributes, nz)) { + if (filter != NULL && !evaluate(feature, layer, filter, exclude_attributes, nz, unidecode_data)) { continue; } @@ -1048,7 +1048,7 @@ std::string overzoom(const mvt_tile &tile, int oz, int ox, int oy, int nz, int n std::string child = overzoom(outtile, nz, nx, ny, nz + 1, nx * 2 + x, ny * 2 + y, detail, buffer, keep, false, NULL, - demultiply, filter, preserve_input_order, attribute_accum); + demultiply, filter, preserve_input_order, attribute_accum, unidecode_data); if (child.size() > 0) { next_overzoomed_tiles->emplace_back(nx * 2 + x, ny * 2 + y); } diff --git a/evaluator.cpp b/evaluator.cpp index 7508f46cb..a027ef3aa 100644 --- a/evaluator.cpp +++ b/evaluator.cpp @@ -7,11 +7,16 @@ #include "evaluator.hpp" #include "errors.hpp" #include "milo/dtoa_milo.h" +#include "text.hpp" -static std::string mvt_value_to_string(mvt_value const &one, bool &fail) { +static std::string mvt_value_to_string(mvt_value const &one, bool &fail, std::vector const &unidecode_data) { switch (one.type) { case mvt_string: - return one.get_string_value(); + if (unidecode_data.size() > 0) { + return unidecode_smash(unidecode_data, one.c_str()); + } else { + return one.get_string_value(); + } case mvt_float: return milo::dtoa_milo(one.numeric_value.float_value); case mvt_double: @@ -34,7 +39,27 @@ static std::string mvt_value_to_string(mvt_value const &one, bool &fail) { } } -int compare_fsl(mvt_value const &one, json_object *two, bool &fail) { +// Alter the JSON parse tree in place to replace the original match string +// with its unidecode-smashed version. +// +// To avoid repeated re-smashings of the same JSON object, objects that have +// already been smashed are marked by setting their refcon to the unidecode data. +static void smash(std::vector const &unidecode_data, json_object *j) { + if (j->value.string.refcon == (void *) &unidecode_data) { + return; + } + + std::string s = unidecode_smash(unidecode_data, j->value.string.string); + j->value.string.string = (char *) realloc(j->value.string.string, s.size() + 1); + if (j->value.string.string == NULL) { + perror("realloc for unidecode_smash"); + exit(EXIT_MEMORY); + } + strcpy(j->value.string.string, s.c_str()); + j->value.string.refcon = (void *) &unidecode_data; +} + +int compare_fsl(mvt_value const &one, json_object *two, bool &fail, std::vector const &unidecode_data) { // In FSL expressions, the attribute value is coerced to the type // of the JSON literal value it is being compared to. // @@ -96,7 +121,11 @@ int compare_fsl(mvt_value const &one, json_object *two, bool &fail) { } if (two->type == JSON_STRING) { - std::string lhs = mvt_value_to_string(one, fail); + std::string lhs = mvt_value_to_string(one, fail, unidecode_data); + + if (unidecode_data.size() > 0) { + smash(unidecode_data, two); + } return strcmp(lhs.c_str(), two->value.string.string); } @@ -226,7 +255,7 @@ int compare(mvt_value const &one, json_object *two, bool &fail) { // 0: false // 1: true // -1: incomparable (sql null), treated as false in final output -static int eval(std::function feature, json_object *f, std::set &exclude_attributes) { +static int eval(std::function feature, json_object *f, std::set &exclude_attributes, std::vector const &unidecode_data) { if (f != NULL) { if (f->type == JSON_TRUE) { return 1; @@ -283,10 +312,10 @@ static int eval(std::function feature, json_obje lhs = -1; // not found: null } } else { - lhs = eval(feature, f->value.array.array[0], exclude_attributes); + lhs = eval(feature, f->value.array.array[0], exclude_attributes, unidecode_data); } - int rhs = eval(feature, f->value.array.array[2], exclude_attributes); + int rhs = eval(feature, f->value.array.array[2], exclude_attributes, unidecode_data); if (lhs < 0 && rhs < 0) { return -1; // null op null => null } @@ -337,12 +366,17 @@ static int eval(std::function feature, json_obje if (f->value.array.array[2]->type == JSON_STRING && (strcmp(f->value.array.array[1]->value.string.string, "cn") == 0 || strcmp(f->value.array.array[1]->value.string.string, "nc") == 0)) { - std::string s = mvt_value_to_string(lhs, fail); + std::string s = mvt_value_to_string(lhs, fail, unidecode_data); if (fail) { return -1; // null cn anything => false } - bool contains = strstr(s.c_str(), f->value.array.array[2]->value.string.string); + bool contains; + if (unidecode_data.size() > 0) { + smash(unidecode_data, f->value.array.array[2]); + } + contains = strstr(s.c_str(), f->value.array.array[2]->value.string.string); + if (strcmp(f->value.array.array[1]->value.string.string, "cn") == 0) { return contains; } else { @@ -353,7 +387,7 @@ static int eval(std::function feature, json_obje if (f->value.array.array[2]->type == JSON_ARRAY && (strcmp(f->value.array.array[1]->value.string.string, "in") == 0 || strcmp(f->value.array.array[1]->value.string.string, "ni") == 0)) { - std::string s = mvt_value_to_string(lhs, fail); + std::string s = mvt_value_to_string(lhs, fail, unidecode_data); if (fail) { return -1; // null in anything => false } @@ -364,7 +398,11 @@ static int eval(std::function feature, json_obje return -1; // anything in [not-a-string] => null } - if (s == f->value.array.array[2]->value.array.array[i]->value.string.string) { + if (unidecode_data.size() > 0) { + smash(unidecode_data, f->value.array.array[2]->value.array.array[i]); + } + + if (strcmp(s.c_str(), f->value.array.array[2]->value.array.array[i]->value.string.string) == 0) { contains = true; break; } @@ -377,7 +415,7 @@ static int eval(std::function feature, json_obje } } - int cmp = compare_fsl(ff, f->value.array.array[2], fail); + int cmp = compare_fsl(ff, f->value.array.array[2], fail, unidecode_data); if (fail) { return -1; // null } @@ -516,7 +554,7 @@ static int eval(std::function feature, json_obje } for (size_t i = 1; i < f->value.array.length; i++) { - int out = eval(feature, f->value.array.array[i], exclude_attributes); + int out = eval(feature, f->value.array.array[i], exclude_attributes, unidecode_data); if (out >= 0) { // nulls are ignored in boolean and/or expressions if (strcmp(f->value.array.array[0]->value.string.string, "all") == 0) { @@ -607,7 +645,7 @@ static int eval(std::function feature, json_obje exit(EXIT_FILTER); } - bool ok = eval(feature, f->value.array.array[2], exclude_attributes) > 0; + bool ok = eval(feature, f->value.array.array[2], exclude_attributes, unidecode_data) > 0; if (!ok) { exclude_attributes.insert(f->value.array.array[1]->value.string.string); } @@ -619,7 +657,7 @@ static int eval(std::function feature, json_obje exit(EXIT_FILTER); } -bool evaluate(std::function feature, std::string const &layer, json_object *filter, std::set &exclude_attributes) { +bool evaluate(std::function feature, std::string const &layer, json_object *filter, std::set &exclude_attributes, std::vector const &unidecode_data) { if (filter == NULL || filter->type != JSON_HASH) { fprintf(stderr, "Error: filter is not a hash: %s\n", json_stringify(filter)); exit(EXIT_JSON); @@ -630,12 +668,12 @@ bool evaluate(std::function feature, std::string f = json_hash_get(filter, layer.c_str()); if (ok && f != NULL) { - ok = eval(feature, f, exclude_attributes) > 0; + ok = eval(feature, f, exclude_attributes, unidecode_data) > 0; } f = json_hash_get(filter, "*"); if (ok && f != NULL) { - ok = eval(feature, f, exclude_attributes) > 0; + ok = eval(feature, f, exclude_attributes, unidecode_data) > 0; } return ok; @@ -673,7 +711,7 @@ json_object *parse_filter(const char *s) { return filter; } -bool evaluate(std::unordered_map const &feature, std::string const &layer, json_object *filter, std::set &exclude_attributes) { +bool evaluate(std::unordered_map const &feature, std::string const &layer, json_object *filter, std::set &exclude_attributes, std::vector const &unidecode_data) { std::function getter = [&](std::string const &key) { auto f = feature.find(key); if (f != feature.end()) { @@ -686,10 +724,10 @@ bool evaluate(std::unordered_map const &feature, std::st } }; - return evaluate(getter, layer, filter, exclude_attributes); + return evaluate(getter, layer, filter, exclude_attributes, unidecode_data); } -bool evaluate(mvt_feature const &feat, mvt_layer const &layer, json_object *filter, std::set &exclude_attributes, int z) { +bool evaluate(mvt_feature const &feat, mvt_layer const &layer, json_object *filter, std::set &exclude_attributes, int z, std::vector const &unidecode_data) { std::function getter = [&](std::string const &key) { const static std::string dollar_id = "$id"; if (key == dollar_id && feat.has_id) { @@ -737,5 +775,5 @@ bool evaluate(mvt_feature const &feat, mvt_layer const &layer, json_object *filt return v; }; - return evaluate(getter, layer.name, filter, exclude_attributes); + return evaluate(getter, layer.name, filter, exclude_attributes, unidecode_data); } diff --git a/evaluator.hpp b/evaluator.hpp index b64fb0955..bc6be00ce 100644 --- a/evaluator.hpp +++ b/evaluator.hpp @@ -7,10 +7,10 @@ #include "jsonpull/jsonpull.h" #include "mvt.hpp" -bool evaluate(std::unordered_map const &feature, std::string const &layer, json_object *filter, std::set &exclude_attributes); +bool evaluate(std::unordered_map const &feature, std::string const &layer, json_object *filter, std::set &exclude_attributes, std::vector const &unidecode_data); json_object *parse_filter(const char *s); json_object *read_filter(const char *fname); -bool evaluate(mvt_feature const &feat, mvt_layer const &layer, json_object *filter, std::set &exclude_attributes, int z); +bool evaluate(mvt_feature const &feat, mvt_layer const &layer, json_object *filter, std::set &exclude_attributes, int z, std::vector const &unidecode_data); #endif diff --git a/geometry.hpp b/geometry.hpp index ea5081e16..850f7e798 100644 --- a/geometry.hpp +++ b/geometry.hpp @@ -104,12 +104,14 @@ std::string overzoom(const mvt_tile &tile, int oz, int ox, int oy, int nz, int n int detail, int buffer, std::set const &keep, bool do_compress, std::vector> *next_overzoomed_tiles, bool demultiply, json_object *filter, bool preserve_input_order, - std::unordered_map const &attribute_accum); + std::unordered_map const &attribute_accum, + std::vector const &unidecode_data); std::string overzoom(const std::string &s, int oz, int ox, int oy, int nz, int nx, int ny, int detail, int buffer, std::set const &keep, bool do_compress, std::vector> *next_overzoomed_tiles, bool demultiply, json_object *filter, bool preserve_input_order, - std::unordered_map const &attribute_accum); + std::unordered_map const &attribute_accum, + std::vector const &unidecode_data); #endif diff --git a/jsonpull/jsonpull.c b/jsonpull/jsonpull.c index 4c6a33fb1..04ca0f296 100644 --- a/jsonpull/jsonpull.c +++ b/jsonpull/jsonpull.c @@ -755,6 +755,7 @@ json_object *json_read_separators(json_pull *j, json_separator_callback cb, void json_object *s = add_object(j, JSON_STRING); if (s != NULL) { s->value.string.string = val.buf; + s->value.string.refcon = NULL; } else { string_free(&val); } diff --git a/jsonpull/jsonpull.h b/jsonpull/jsonpull.h index a45773420..b19e0e8e9 100644 --- a/jsonpull/jsonpull.h +++ b/jsonpull/jsonpull.h @@ -38,6 +38,7 @@ typedef struct json_object { struct { char *string; + void *refcon; // reference constant for caller's use } string; struct { diff --git a/main.cpp b/main.cpp index 3c5eeb9a6..152121840 100644 --- a/main.cpp +++ b/main.cpp @@ -96,6 +96,7 @@ std::map set_attributes; unsigned long long preserve_point_density_threshold = 0; long long extend_zooms_max = 0; int retain_points_multiplier = 1; +std::vector unidecode_data; std::vector order_by; bool order_reverse; @@ -2759,7 +2760,7 @@ std::pair read_input(std::vector &sources, char *fname, i std::atomic midx(0); std::atomic midy(0); std::vector strategies; - int written = traverse_zooms(fd, size, stringpool, &midx, &midy, maxzoom, minzoom, outdb, outdir, buffer, fname, tmpdir, gamma, full_detail, low_detail, min_detail, pool_off, initial_x, initial_y, simplification, maxzoom_simplification, layermaps, prefilter, postfilter, attribute_accum, filter, strategies, iz, shared_nodes_map, nodepos, basezoom, droprate); + int written = traverse_zooms(fd, size, stringpool, &midx, &midy, maxzoom, minzoom, outdb, outdir, buffer, fname, tmpdir, gamma, full_detail, low_detail, min_detail, pool_off, initial_x, initial_y, simplification, maxzoom_simplification, layermaps, prefilter, postfilter, attribute_accum, filter, strategies, iz, shared_nodes_map, nodepos, basezoom, droprate, unidecode_data); if (maxzoom != written) { if (written > minzoom) { @@ -3074,6 +3075,7 @@ int main(int argc, char **argv) { {"Filtering features by attributes", 0, 0, 0}, {"feature-filter-file", required_argument, 0, 'J'}, {"feature-filter", required_argument, 0, 'j'}, + {"unidecode-data", required_argument, 0, '~'}, {"Dropping a fixed fraction of features by zoom level", 0, 0, 0}, {"drop-rate", required_argument, 0, 'r'}, @@ -3300,6 +3302,8 @@ int main(int argc, char **argv) { extend_zooms_max = atoll_require(optarg, "Maximum number by which to extend zooms"); } else if (strcmp(opt, "retain-points-multiplier") == 0) { retain_points_multiplier = atoll_require(optarg, "Multiply the fraction of points retained by zoom level"); + } else if (strcmp(opt, "unidecode-data") == 0) { + unidecode_data = read_unidecode(optarg); } else { fprintf(stderr, "%s: Unrecognized option --%s\n", argv[0], opt); exit(EXIT_ARGS); diff --git a/overzoom.cpp b/overzoom.cpp index f1033d022..67037f918 100644 --- a/overzoom.cpp +++ b/overzoom.cpp @@ -8,6 +8,7 @@ #include "geometry.hpp" #include "evaluator.hpp" #include "attribute.hpp" +#include "text.hpp" extern char *optarg; extern int optind; @@ -18,6 +19,7 @@ bool demultiply = false; std::string filter; bool preserve_input_order = false; std::unordered_map attribute_accum; +std::vector unidecode_data; std::set keep; @@ -40,6 +42,7 @@ int main(int argc, char **argv) { {"feature-filter", required_argument, 0, 'j'}, {"preserve-input-order", no_argument, 0, 'o' & 0x1F}, {"accumulate-attribute", required_argument, 0, 'E'}, + {"unidecode-data", required_argument, 0, 'u' & 0x1F}, {0, 0, 0, 0}, }; @@ -90,6 +93,10 @@ int main(int argc, char **argv) { set_attribute_accum(attribute_accum, optarg, argv); break; + case 'u' & 0x1F: + unidecode_data = read_unidecode(optarg); + break; + default: fprintf(stderr, "Unrecognized flag -%c\n", i); usage(argv); @@ -144,7 +151,7 @@ int main(int argc, char **argv) { json_filter = parse_filter(filter.c_str()); } - std::string out = overzoom(tile, oz, ox, oy, nz, nx, ny, detail, buffer, keep, true, NULL, demultiply, json_filter, preserve_input_order, attribute_accum); + std::string out = overzoom(tile, oz, ox, oy, nz, nx, ny, detail, buffer, keep, true, NULL, demultiply, json_filter, preserve_input_order, attribute_accum, unidecode_data); fwrite(out.c_str(), sizeof(char), out.size(), f); fclose(f); diff --git a/text.cpp b/text.cpp index 83cbac151..88d18979a 100644 --- a/text.cpp +++ b/text.cpp @@ -1,8 +1,10 @@ -#include "text.hpp" #include #include #include #include +#include +#include +#include "text.hpp" #include "milo/dtoa_milo.h" #include "milo/milo.h" #include "errors.hpp" @@ -185,6 +187,76 @@ char *dtoa_milo(double val) { return dup; } +// to work with data from https://github.com/kmike/text-unidecode +std::vector read_unidecode(const char *fname) { + std::string data; + + FILE *f = fopen(fname, "rb"); + if (f == NULL) { + perror(fname); + exit(EXIT_OPEN); + } + + std::string buf; + buf.resize(2000); + + while (true) { + size_t nread = fread((void *) buf.c_str(), sizeof(char), buf.size(), f); + if (nread == 0) { + break; + } + data.append(buf.c_str(), nread); + } + + fclose(f); + + std::vector out; + out.emplace_back(); // because the data file is 1-indexed + out.emplace_back(); // ascii 001 + + for (size_t i = 0; i < data.size(); i++) { + if (data[i] == '\0') { + out.emplace_back(); + } else { + if (data[i] >= '\0' && data[i] <= '~') { + data[i] = tolower(data[i]); + } + out.back().push_back(data[i]); + } + } + + return out; +} + +std::string unidecode_smash(std::vector const &unidecode_data, const char *s) { + if (unidecode_data.size() == 0) { + return s; + } + + std::string out; + out.reserve(strlen(s)); + + long c; + while (true) { + const char *os = s; + s = utf8_next(s, &c); + if (s == NULL) { + break; + } + + if (c >= 0 && c < (long) unidecode_data.size()) { + out.append(unidecode_data[c]); + } else { + // pass through anything that is out of unidecode range literally + for (; os != s; os++) { + out.push_back(*os); + } + } + } + + return out; +} + unsigned long long fnv1a(std::string const &s) { // Store tiles by a hash of their contents (fnv1a 64-bit) // http://www.isthe.com/chongo/tech/comp/fnv/ diff --git a/text.hpp b/text.hpp index a89386daa..7d97ff7d9 100644 --- a/text.hpp +++ b/text.hpp @@ -8,6 +8,8 @@ const char *utf8_next(const char *s, long *c); std::string truncate16(std::string const &s, size_t runes); int integer_zoom(std::string where, std::string text); std::string format_commandline(int argc, char **argv); +std::vector read_unidecode(const char *fname); +std::string unidecode_smash(std::vector const &unidecode_data, const char *s); unsigned long long fnv1a(std::string const &s); unsigned long long fnv1a(const char *s, char additional); unsigned long long fnv1a(size_t size, void *p); diff --git a/tile-join.cpp b/tile-join.cpp index f0636ed4d..c3e2b3fec 100644 --- a/tile-join.cpp +++ b/tile-join.cpp @@ -54,6 +54,7 @@ int maxzoom = 32; int minzoom = 0; std::map renames; bool exclude_all = false; +std::vector unidecode_data; bool want_overzoom = false; int buffer = 5; @@ -143,7 +144,7 @@ void append_tile(std::string message, int z, unsigned x, unsigned y, std::map exclude_attributes; - if (filter != NULL && !evaluate(feat, layer, filter, exclude_attributes, z)) { + if (filter != NULL && !evaluate(feat, layer, filter, exclude_attributes, z, unidecode_data)) { continue; } @@ -702,7 +703,7 @@ struct tileset_reader { } if (source.layers.size() != 0) { - std::string ret = overzoom(source, parent_tile.z, parent_tile.x, parent_tile.y, tile.z, tile.x, tile.y, -1, buffer, std::set(), false, &next_overzoomed_tiles, false, NULL, false, std::unordered_map()); + std::string ret = overzoom(source, parent_tile.z, parent_tile.x, parent_tile.y, tile.z, tile.x, tile.y, -1, buffer, std::set(), false, &next_overzoomed_tiles, false, NULL, false, std::unordered_map(), unidecode_data); return ret; } @@ -1234,6 +1235,7 @@ int main(int argc, char **argv) { {"tile-stats-attributes-limit", required_argument, 0, '~'}, {"tile-stats-sample-values-limit", required_argument, 0, '~'}, {"tile-stats-values-limit", required_argument, 0, '~'}, + {"unidecode-data", required_argument, 0, '~'}, {0, 0, 0, 0}, }; @@ -1408,6 +1410,8 @@ int main(int argc, char **argv) { max_tilestats_sample_values = atoi(optarg); } else if (strcmp(opt, "tile-stats-values-limit") == 0) { max_tilestats_values = atoi(optarg); + } else if (strcmp(opt, "unidecode-data") == 0) { + unidecode_data = read_unidecode(optarg); } else { fprintf(stderr, "%s: Unrecognized option --%s\n", argv[0], opt); exit(EXIT_ARGS); diff --git a/tile.cpp b/tile.cpp index fef8fad57..d8528bdbb 100644 --- a/tile.cpp +++ b/tile.cpp @@ -902,6 +902,7 @@ struct write_tile_args { int wrote_zoom = 0; size_t tiling_seg = 0; json_object *filter = NULL; + std::vector const *unidecode_data; std::atomic *dropped_count = NULL; atomic_strategy *strategy = NULL; int zoom = -1; @@ -1027,7 +1028,7 @@ struct multiplier_state { // This function is called repeatedly from write_tile() to retrieve the next feature // from the input stream. If the stream is at an end, it returns a feature with the // geometry type set to -2. -static serial_feature next_feature(decompressor *geoms, std::atomic *geompos_in, int z, unsigned tx, unsigned ty, unsigned *initial_x, unsigned *initial_y, long long *original_features, long long *unclipped_features, int nextzoom, int maxzoom, int minzoom, int max_zoom_increment, size_t pass, std::atomic *along, long long alongminus, int buffer, int *within, compressor **geomfile, std::atomic *geompos, std::atomic *oprogress, double todo, const char *fname, int child_shards, json_object *filter, const char *global_stringpool, long long *pool_off, std::vector> *layer_unmaps, bool first_time, bool compressed, multiplier_state *multiplier_state, std::shared_ptr &tile_stringpool) { +static serial_feature next_feature(decompressor *geoms, std::atomic *geompos_in, int z, unsigned tx, unsigned ty, unsigned *initial_x, unsigned *initial_y, long long *original_features, long long *unclipped_features, int nextzoom, int maxzoom, int minzoom, int max_zoom_increment, size_t pass, std::atomic *along, long long alongminus, int buffer, int *within, compressor **geomfile, std::atomic *geompos, std::atomic *oprogress, double todo, const char *fname, int child_shards, json_object *filter, const char *global_stringpool, long long *pool_off, std::vector> *layer_unmaps, bool first_time, bool compressed, multiplier_state *multiplier_state, std::shared_ptr &tile_stringpool, std::vector const &unidecode_data) { while (1) { serial_feature sf; long long len; @@ -1148,7 +1149,7 @@ static serial_feature next_feature(decompressor *geoms, std::atomic * attributes.insert(std::pair("$zoom", v2)); - if (!evaluate(attributes, layername, filter, exclude_attributes)) { + if (!evaluate(attributes, layername, filter, exclude_attributes, unidecode_data)) { continue; } @@ -1236,6 +1237,7 @@ struct run_prefilter_args { long long *pool_off = NULL; FILE *prefilter_fp = NULL; json_object *filter = NULL; + std::vector const *unidecode_data; bool first_time = false; bool compressed = false; }; @@ -1247,7 +1249,7 @@ void *run_prefilter(void *v) { std::shared_ptr tile_stringpool = std::make_shared(); while (1) { - serial_feature sf = next_feature(rpa->geoms, rpa->geompos_in, rpa->z, rpa->tx, rpa->ty, rpa->initial_x, rpa->initial_y, rpa->original_features, rpa->unclipped_features, rpa->nextzoom, rpa->maxzoom, rpa->minzoom, rpa->max_zoom_increment, rpa->pass, rpa->along, rpa->alongminus, rpa->buffer, rpa->within, rpa->geomfile, rpa->geompos, rpa->oprogress, rpa->todo, rpa->fname, rpa->child_shards, rpa->filter, rpa->global_stringpool, rpa->pool_off, rpa->layer_unmaps, rpa->first_time, rpa->compressed, &multiplier_state, tile_stringpool); + serial_feature sf = next_feature(rpa->geoms, rpa->geompos_in, rpa->z, rpa->tx, rpa->ty, rpa->initial_x, rpa->initial_y, rpa->original_features, rpa->unclipped_features, rpa->nextzoom, rpa->maxzoom, rpa->minzoom, rpa->max_zoom_increment, rpa->pass, rpa->along, rpa->alongminus, rpa->buffer, rpa->within, rpa->geomfile, rpa->geompos, rpa->oprogress, rpa->todo, rpa->fname, rpa->child_shards, rpa->filter, rpa->global_stringpool, rpa->pool_off, rpa->layer_unmaps, rpa->first_time, rpa->compressed, &multiplier_state, tile_stringpool, *(rpa->unidecode_data)); if (sf.t < 0) { break; } @@ -1465,7 +1467,7 @@ return; } } -long long write_tile(decompressor *geoms, std::atomic *geompos_in, char *global_stringpool, int z, const unsigned tx, const unsigned ty, const int detail, int min_detail, sqlite3 *outdb, const char *outdir, int buffer, const char *fname, compressor **geomfile, int minzoom, int maxzoom, double todo, std::atomic *along, long long alongminus, double gamma, int child_shards, long long *pool_off, unsigned *initial_x, unsigned *initial_y, std::atomic *running, double simplification, std::vector> *layermaps, std::vector> *layer_unmaps, size_t tiling_seg, size_t pass, unsigned long long mingap, long long minextent, unsigned long long mindrop_sequence, const char *prefilter, const char *postfilter, json_object *filter, write_tile_args *arg, atomic_strategy *strategy, bool compressed_input, node *shared_nodes_map, size_t nodepos) { +long long write_tile(decompressor *geoms, std::atomic *geompos_in, char *global_stringpool, int z, const unsigned tx, const unsigned ty, const int detail, int min_detail, sqlite3 *outdb, const char *outdir, int buffer, const char *fname, compressor **geomfile, int minzoom, int maxzoom, double todo, std::atomic *along, long long alongminus, double gamma, int child_shards, long long *pool_off, unsigned *initial_x, unsigned *initial_y, std::atomic *running, double simplification, std::vector> *layermaps, std::vector> *layer_unmaps, size_t tiling_seg, size_t pass, unsigned long long mingap, long long minextent, unsigned long long mindrop_sequence, const char *prefilter, const char *postfilter, json_object *filter, write_tile_args *arg, atomic_strategy *strategy, bool compressed_input, node *shared_nodes_map, size_t nodepos, std::vector const &unidecode_data) { double merge_fraction = 1; double mingap_fraction = 1; double minextent_fraction = 1; @@ -1623,6 +1625,7 @@ long long write_tile(decompressor *geoms, std::atomic *geompos_in, ch rpa.global_stringpool = global_stringpool; rpa.pool_off = pool_off; rpa.filter = filter; + rpa.unidecode_data = &unidecode_data; rpa.first_time = first_time; rpa.compressed = compressed_input; @@ -1651,7 +1654,7 @@ long long write_tile(decompressor *geoms, std::atomic *geompos_in, ch ssize_t which_serial_feature = -1; if (prefilter == NULL) { - sf = next_feature(geoms, geompos_in, z, tx, ty, initial_x, initial_y, &original_features, &unclipped_features, nextzoom, maxzoom, minzoom, max_zoom_increment, pass, along, alongminus, buffer, within, geomfile, geompos, &oprogress, todo, fname, child_shards, filter, global_stringpool, pool_off, layer_unmaps, first_time, compressed_input, &multiplier_state, tile_stringpool); + sf = next_feature(geoms, geompos_in, z, tx, ty, initial_x, initial_y, &original_features, &unclipped_features, nextzoom, maxzoom, minzoom, max_zoom_increment, pass, along, alongminus, buffer, within, geomfile, geompos, &oprogress, todo, fname, child_shards, filter, global_stringpool, pool_off, layer_unmaps, first_time, compressed_input, &multiplier_state, tile_stringpool, unidecode_data); } else { sf = parse_feature(prefilter_jp, z, tx, ty, layermaps, tiling_seg, layer_unmaps, postfilter != NULL); } @@ -2575,7 +2578,7 @@ exit(EXIT_IMPOSSIBLE); // fprintf(stderr, "%d/%u/%u\n", z, x, y); - long long len = write_tile(&dc, &geompos, arg->global_stringpool, z, x, y, z == arg->maxzoom ? arg->full_detail : arg->low_detail, arg->min_detail, arg->outdb, arg->outdir, arg->buffer, arg->fname, arg->geomfile, arg->minzoom, arg->maxzoom, arg->todo, arg->along, geompos, arg->gamma, arg->child_shards, arg->pool_off, arg->initial_x, arg->initial_y, arg->running, arg->simplification, arg->layermaps, arg->layer_unmaps, arg->tiling_seg, arg->pass, arg->mingap, arg->minextent, arg->mindrop_sequence, arg->prefilter, arg->postfilter, arg->filter, arg, arg->strategy, arg->compressed, arg->shared_nodes_map, arg->nodepos); + long long len = write_tile(&dc, &geompos, arg->global_stringpool, z, x, y, z == arg->maxzoom ? arg->full_detail : arg->low_detail, arg->min_detail, arg->outdb, arg->outdir, arg->buffer, arg->fname, arg->geomfile, arg->minzoom, arg->maxzoom, arg->todo, arg->along, geompos, arg->gamma, arg->child_shards, arg->pool_off, arg->initial_x, arg->initial_y, arg->running, arg->simplification, arg->layermaps, arg->layer_unmaps, arg->tiling_seg, arg->pass, arg->mingap, arg->minextent, arg->mindrop_sequence, arg->prefilter, arg->postfilter, arg->filter, arg, arg->strategy, arg->compressed, arg->shared_nodes_map, arg->nodepos, (*arg->unidecode_data)); if (pthread_mutex_lock(&var_lock) != 0) { perror("pthread_mutex_lock"); @@ -2640,7 +2643,7 @@ exit(EXIT_IMPOSSIBLE); return err_or_null; } -int traverse_zooms(int *geomfd, off_t *geom_size, char *global_stringpool, std::atomic *midx, std::atomic *midy, int &maxzoom, int minzoom, sqlite3 *outdb, const char *outdir, int buffer, const char *fname, const char *tmpdir, double gamma, int full_detail, int low_detail, int min_detail, long long *pool_off, unsigned *initial_x, unsigned *initial_y, double simplification, double maxzoom_simplification, std::vector> &layermaps, const char *prefilter, const char *postfilter, std::unordered_map const *attribute_accum, json_object *filter, std::vector &strategies, int iz, node *shared_nodes_map, size_t nodepos, int basezoom, double droprate) { +int traverse_zooms(int *geomfd, off_t *geom_size, char *global_stringpool, std::atomic *midx, std::atomic *midy, int &maxzoom, int minzoom, sqlite3 *outdb, const char *outdir, int buffer, const char *fname, const char *tmpdir, double gamma, int full_detail, int low_detail, int min_detail, long long *pool_off, unsigned *initial_x, unsigned *initial_y, double simplification, double maxzoom_simplification, std::vector> &layermaps, const char *prefilter, const char *postfilter, std::unordered_map const *attribute_accum, json_object *filter, std::vector &strategies, int iz, node *shared_nodes_map, size_t nodepos, int basezoom, double droprate, std::vector const &unidecode_data) { last_progress = 0; // The existing layermaps are one table per input thread. @@ -2838,6 +2841,7 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *global_stringpool, std:: args[thread].postfilter = postfilter; args[thread].attribute_accum = attribute_accum; args[thread].filter = filter; + args[thread].unidecode_data = &unidecode_data; args[thread].tasks = dispatches[thread].tasks; args[thread].running = &running; diff --git a/tile.hpp b/tile.hpp index ee4cd4d01..f679f85d9 100644 --- a/tile.hpp +++ b/tile.hpp @@ -55,7 +55,7 @@ struct strategy { // long long write_tile(char **geom, char *stringpool, unsigned *file_bbox, int z, unsigned x, unsigned y, int detail, int min_detail, int basezoom, sqlite3 *outdb, const char *outdir, double droprate, int buffer, const char *fname, FILE **geomfile, int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along, double gamma, int nlayers, std::atomic *strategy); -int traverse_zooms(int *geomfd, off_t *geom_size, char *stringpool, std::atomic *midx, std::atomic *midy, int &maxzoom, int minzoom, sqlite3 *outdb, const char *outdir, int buffer, const char *fname, const char *tmpdir, double gamma, int full_detail, int low_detail, int min_detail, long long *pool_off, unsigned *initial_x, unsigned *initial_y, double simplification, double maxzoom_simplification, std::vector > &layermap, const char *prefilter, const char *postfilter, std::unordered_map const *attribute_accum, struct json_object *filter, std::vector &strategies, int iz, struct node *shared_nodes_map, size_t nodepos, int basezoom, double droprate); +int traverse_zooms(int *geomfd, off_t *geom_size, char *stringpool, std::atomic *midx, std::atomic *midy, int &maxzoom, int minzoom, sqlite3 *outdb, const char *outdir, int buffer, const char *fname, const char *tmpdir, double gamma, int full_detail, int low_detail, int min_detail, long long *pool_off, unsigned *initial_x, unsigned *initial_y, double simplification, double maxzoom_simplification, std::vector > &layermap, const char *prefilter, const char *postfilter, std::unordered_map const *attribute_accum, struct json_object *filter, std::vector &strategies, int iz, struct node *shared_nodes_map, size_t nodepos, int basezoom, double droprate, std::vector const &unidecode_data); int manage_gap(unsigned long long index, unsigned long long *previndex, double scale, double gamma, double *gap); diff --git a/version.hpp b/version.hpp index b99ce27fe..10e762081 100644 --- a/version.hpp +++ b/version.hpp @@ -1,6 +1,6 @@ #ifndef VERSION_HPP #define VERSION_HPP -#define VERSION "v2.43.0" +#define VERSION "v2.44.0" #endif