Make feature ordering cooperate with --retain-points-multiplier (#188)

* Make feature ordering cooperate with --retain-points-multiplier * Forgot to check in the actual code changes??? * Sort within each multiplier cluster as well as between clusters * Correct description of behavior in changelog * Drag original feature sequence along in megatiles for post-filter sort * Plumb the preserve-input-order flag through overzoom * Sort in overzoom if requested * Use within-tile input sequence numbers, not global sequence numbers * Documentation * Reverse direction of search to prevent accidental skipping * Add some comments about converting between attribute representations
felt · Jan 21, 2024 · 679a0d6 · 679a0d6
1 parent 5d92a17
commit 679a0d6
Show file tree

Hide file tree

Showing 23 changed files with 41,781 additions and 41,134 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,7 @@
+# 2.41.1
+
+* Make --preserve-input-order, --order-by, --order-descending-by, --order-smallest-first, and --order-largest-first cooperate with --retain-points-multiplier. The clusters will be ordered by their lead feature in the specified sequence. The other features in each cluster will continue to be physically near the lead feature, but ordered as specified within the cluster.
+
 # 2.41.0
 
 * Add Felt-style expression support for -j feature filters

diff --git a/Makefile b/Makefile
@@ -310,6 +310,12 @@ overzoom-test: tippecanoe-overzoom
 	./tippecanoe-decode tests/pbf/0-0-0-filter-mult.pbf 0 0 0 > tests/pbf/0-0-0-filter-mult.pbf.json.check
 	cmp tests/pbf/0-0-0-filter-mult.pbf.json.check tests/pbf/0-0-0-filter-mult.pbf.json
 	rm tests/pbf/0-0-0-filter-mult.pbf tests/pbf/0-0-0-filter-mult.pbf.json.check
+	# Filtering with multiplier and preserve-input-order
+	# 243 features in the source tile tests/pbf/0-0-0-pop.pbf
+	./tippecanoe-overzoom -y NAME -y SCALERANK -j'{"*":["NAME","cn","e"]}' -m --preserve-input-order -o tests/pbf/0-0-0-filter-mult-order.pbf tests/pbf/0-0-0-pop.pbf 0/0/0 0/0/0
+	./tippecanoe-decode tests/pbf/0-0-0-filter-mult-order.pbf 0 0 0 > tests/pbf/0-0-0-filter-mult-order.pbf.json.check
+	cmp tests/pbf/0-0-0-filter-mult-order.pbf.json.check tests/pbf/0-0-0-filter-mult-order.pbf.json
+	rm tests/pbf/0-0-0-filter-mult-order.pbf tests/pbf/0-0-0-filter-mult-order.pbf.json.check
 	# Test that overzooming with a multiplier exactly reverses the effect of tiling with a multiplier
 	./tippecanoe -q -z5 --preserve-point-density-threshold 8 --retain-points-multiplier 3 -f -e tests/muni/out/out.dir tests/muni/muni.json
 	./tippecanoe -q -z5 --preserve-point-density-threshold 8 -f -o tests/muni/out/out.mbtiles tests/muni/muni.json

diff --git a/README.md b/README.md
@@ -996,4 +996,4 @@ reads tile `inz/inx/iny` of `in.mvt.gz` and produces tile `outz/outx/outy` of `o
  * `-y` *attribute*: Retain the specified *attribute* in the output features. All attributes that are not named in a `-y` option will be removed.
  * `-j` *filter*: Filter features using the same expression syntax as in tippecanoe.
  * `-m`: If a tile was created with the `--retain-points-multiplier` option, thin the tile back down to its normal feature count during overzooming. The first feature from each cluster will be retained, unless `-j` is used to specify a filter, in which case the first matching filter from each cluster will be retained instead.
-
+ * `--preserve-input-order`: Restore a set of filtered features to its original input order
diff --git a/clip.cpp b/clip.cpp
@@ -8,6 +8,7 @@
 #include "compression.hpp"
 #include "mvt.hpp"
 #include "evaluator.hpp"
+#include "serial.hpp"
 
 static std::vector<std::pair<double, double>> clip_poly1(std::vector<std::pair<double, double>> &geom,
 							 long long minx, long long miny, long long maxx, long long maxy,
@@ -756,7 +757,7 @@ static std::vector<std::pair<double, double>> clip_poly1(std::vector<std::pair<d
 std::string overzoom(std::string s, int oz, int ox, int oy, int nz, int nx, int ny,
 		     int detail, int buffer, std::set<std::string> const &keep, bool do_compress,
 		     std::vector<std::pair<unsigned, unsigned>> *next_overzoomed_tiles,
-		     bool demultiply, json_object *filter) {
+		     bool demultiply, json_object *filter, bool preserve_input_order) {
 	mvt_tile tile;
 
 	try {
@@ -770,7 +771,7 @@ std::string overzoom(std::string s, int oz, int ox, int oy, int nz, int nx, int
 		exit(EXIT_PROTOBUF);
 	}
 
-	return overzoom(tile, oz, ox, oy, nz, nx, ny, detail, buffer, keep, do_compress, next_overzoomed_tiles, demultiply, filter);
+	return overzoom(tile, oz, ox, oy, nz, nx, ny, detail, buffer, keep, do_compress, next_overzoomed_tiles, demultiply, filter, preserve_input_order);
 }
 
 struct tile_feature {
@@ -780,6 +781,7 @@ struct tile_feature {
 	unsigned long long id;
 	std::vector<unsigned> tags;
 	mvt_layer const *layer;
+	size_t seq = 0;
 };
 
 void feature_out(tile_feature const &feature, mvt_layer &outlayer, std::set<std::string> const &keep) {
@@ -799,6 +801,8 @@ void feature_out(tile_feature const &feature, mvt_layer &outlayer, std::set<std:
 			outfeature.id = feature.id;
 		}
 
+		outfeature.seq = feature.seq;
+
 		for (size_t i = 0; i + 1 < feature.tags.size(); i += 2) {
 			if (keep.size() == 0 || keep.find(feature.layer->keys[feature.tags[i]]) != keep.end()) {
 				outlayer.tag(outfeature, feature.layer->keys[feature.tags[i]], feature.layer->values[feature.tags[i + 1]]);
@@ -809,10 +813,16 @@ void feature_out(tile_feature const &feature, mvt_layer &outlayer, std::set<std:
 	}
 }
 
+static struct preservecmp {
+	bool operator()(const mvt_feature &a, const mvt_feature &b) {
+		return a.seq < b.seq;
+	}
+} preservecmp;
+
 std::string overzoom(mvt_tile tile, int oz, int ox, int oy, int nz, int nx, int ny,
 		     int detail, int buffer, std::set<std::string> const &keep, bool do_compress,
 		     std::vector<std::pair<unsigned, unsigned>> *next_overzoomed_tiles,
-		     bool demultiply, json_object *filter) {
+		     bool demultiply, json_object *filter, bool preserve_input_order) {
 	mvt_tile outtile;
 
 	for (auto const &layer : tile.layers) {
@@ -832,15 +842,20 @@ std::string overzoom(mvt_tile tile, int oz, int ox, int oy, int nz, int nx, int
 		for (auto feature : layer.features) {
 			bool flush_multiplier_cluster = false;
 			if (demultiply) {
-				for (size_t i = 0; i + 1 < feature.tags.size(); i += 2) {
+				for (ssize_t i = feature.tags.size() - 2; i >= 0; i -= 2) {
 					if (layer.keys[feature.tags[i]] == "tippecanoe:retain_points_multiplier_first") {
 						mvt_value v = layer.values[feature.tags[i + 1]];
 						if (v.type == mvt_bool && v.numeric_value.bool_value) {
 							flush_multiplier_cluster = true;
 							feature.tags.erase(feature.tags.begin() + i, feature.tags.begin() + i + 2);
-							break;
 						}
 					}
+
+					if (layer.keys[feature.tags[i]] == "tippecanoe:retain_points_multiplier_sequence") {
+						mvt_value v = layer.values[feature.tags[i + 1]];
+						feature.seq = atoll(mvt_value_to_serial_val(v).s.c_str());
+						feature.tags.erase(feature.tags.begin() + i, feature.tags.begin() + i + 2);
+					}
 				}
 			} else {
 				flush_multiplier_cluster = true;
@@ -937,6 +952,7 @@ std::string overzoom(mvt_tile tile, int oz, int ox, int oy, int nz, int nx, int
 			tf.id = feature.id;
 			tf.tags = feature.tags;
 			tf.layer = &layer;
+			tf.seq = feature.seq;
 
 			pending_tile_features.push_back(tf);
 		}
@@ -946,6 +962,10 @@ std::string overzoom(mvt_tile tile, int oz, int ox, int oy, int nz, int nx, int
 			pending_tile_features.clear();
 		}
 
+		if (preserve_input_order) {
+			std::sort(outlayer.features.begin(), outlayer.features.end(), preservecmp);
+		}
+
 		if (outlayer.features.size() > 0) {
 			outtile.layers.push_back(outlayer);
 		}
@@ -965,7 +985,7 @@ std::string overzoom(mvt_tile tile, int oz, int ox, int oy, int nz, int nx, int
 					std::string child = overzoom(outtile, nz, nx, ny,
 								     nz + 1, nx * 2 + x, ny * 2 + y,
 								     detail, buffer, keep, false, NULL,
-								     demultiply, filter);
+								     demultiply, filter, preserve_input_order);
 					if (child.size() > 0) {
 						next_overzoomed_tiles->emplace_back(nx * 2 + x, ny * 2 + y);
 					}

diff --git a/geometry.hpp b/geometry.hpp
@@ -102,11 +102,11 @@ double distance_from_line(long long point_x, long long point_y, long long segA_x
 std::string overzoom(mvt_tile tile, int oz, int ox, int oy, int nz, int nx, int ny,
 		     int detail, int buffer, std::set<std::string> const &keep, bool do_compress,
 		     std::vector<std::pair<unsigned, unsigned>> *next_overzoomed_tiles,
-		     bool demultiply, json_object *filter);
+		     bool demultiply, json_object *filter, bool preserve_input_order);
 
 std::string overzoom(std::string s, int oz, int ox, int oy, int nz, int nx, int ny,
 		     int detail, int buffer, std::set<std::string> const &keep, bool do_compress,
 		     std::vector<std::pair<unsigned, unsigned>> *next_overzoomed_tiles,
-		     bool demultiply, json_object *filter);
+		     bool demultiply, json_object *filter, bool preserve_input_order);
 
 #endif
diff --git a/man/tippecanoe.1 b/man/tippecanoe.1
@@ -1266,4 +1266,6 @@ reads tile \fB\fCinz/inx/iny\fR of \fB\fCin.mvt.gz\fR and produces tile \fB\fCou
 \fB\fC\-j\fR \fIfilter\fP: Filter features using the same expression syntax as in tippecanoe.
 .IP \(bu 2
 \fB\fC\-m\fR: If a tile was created with the \fB\fC\-\-retain\-points\-multiplier\fR option, thin the tile back down to its normal feature count during overzooming. The first feature from each cluster will be retained, unless \fB\fC\-j\fR is used to specify a filter, in which case the first matching filter from each cluster will be retained instead.
+.IP \(bu 2
+\fB\fC\-\-preserve\-input\-order\fR: Restore a set of filtered features to its original input order
 .RE
diff --git a/mvt.cpp b/mvt.cpp
@@ -14,6 +14,7 @@
 #include "protozero/pbf_writer.hpp"
 #include "milo/dtoa_milo.h"
 #include "errors.hpp"
+#include "serial.hpp"
 
 mvt_geometry::mvt_geometry(int nop, long long nx, long long ny) {
 	this->op = nop;
@@ -620,6 +621,11 @@ bool is_unsigned_integer(const char *s, unsigned long long *v) {
 	return 1;
 }
 
+// This converts a serial_val-style attribute value to an mvt_value
+// to store in a tile. If the value is numeric, it tries to choose
+// the type (int, uint, sint, float, or double) that will give the
+// smallest representation in the tile without losing precision,
+// regardless of how the value was represented in the original source.
 mvt_value stringified_to_mvt_value(int type, const char *s) {
 	mvt_value tv;
 
@@ -675,3 +681,43 @@ mvt_value stringified_to_mvt_value(int type, const char *s) {
 
 	return tv;
 }
+
+// This converts a mvt_value attribute value from a tile back into
+// a serial_val for more convenient parsing and comparison without
+// having to handle all of the vector tile numeric types separately.
+// All numeric types are given the type mvt_double in the serial_val
+// whether the actual value is integer or floating point.
+serial_val mvt_value_to_serial_val(mvt_value const &v) {
+	serial_val sv;
+
+	if (v.type == mvt_string) {
+		sv.type = mvt_string;
+		sv.s = v.string_value;
+	} else if (v.type == mvt_float) {
+		sv.type = mvt_double;
+		sv.s = milo::dtoa_milo(v.numeric_value.float_value);
+	} else if (v.type == mvt_double) {
+		sv.type = mvt_double;
+		sv.s = milo::dtoa_milo(v.numeric_value.double_value);
+	} else if (v.type == mvt_int) {
+		sv.type = mvt_double;
+		sv.s = std::to_string(v.numeric_value.int_value);
+	} else if (v.type == mvt_uint) {
+		sv.type = mvt_double;
+		sv.s = std::to_string(v.numeric_value.uint_value);
+	} else if (v.type == mvt_sint) {
+		sv.type = mvt_double;
+		sv.s = std::to_string(v.numeric_value.sint_value);
+	} else if (v.type == mvt_bool) {
+		sv.type = mvt_bool;
+		sv.s = v.numeric_value.bool_value ? "true" : "false";
+	} else if (v.type == mvt_null) {
+		sv.type = mvt_null;
+		sv.s = "null";
+	} else {
+		fprintf(stderr, "unhandled mvt_type %d\n", v.type);
+		exit(EXIT_IMPOSSIBLE);
+	}
+
+	return sv;
+}
diff --git a/mvt.hpp b/mvt.hpp
@@ -49,6 +49,7 @@ struct mvt_feature {
 	unsigned long long id = 0;
 	bool has_id = false;
 	bool dropped = false;
+	size_t seq = 0;	 // used for ordering in overzoom
 
 	mvt_feature() {
 		has_id = false;
@@ -122,4 +123,7 @@ mvt_value stringified_to_mvt_value(int type, const char *s);
 
 bool is_integer(const char *s, long long *v);
 bool is_unsigned_integer(const char *s, unsigned long long *v);
+
+struct serial_val;
+serial_val mvt_value_to_serial_val(mvt_value const &v);
 #endif
diff --git a/overzoom.cpp b/overzoom.cpp
@@ -15,6 +15,7 @@ int detail = 12;  // tippecanoe-style: mvt extent == 1 << detail
 int buffer = 5;	  // tippecanoe-style: mvt buffer == extent * buffer / 256;
 bool demultiply = false;
 std::string filter;
+bool preserve_input_order = false;
 
 std::set<std::string> keep;
 
@@ -28,7 +29,31 @@ int main(int argc, char **argv) {
 	int i;
 	const char *outfile = NULL;
 
-	while ((i = getopt(argc, argv, "y:d:b:o:mj:")) != -1) {
+	struct option long_options[] = {
+		{"include", required_argument, 0, 'y'},
+		{"full-detail", required_argument, 0, 'd'},
+		{"buffer", required_argument, 0, 'b'},
+		{"output", required_argument, 0, 'o'},
+		{"filter-points-multiplier", no_argument, 0, 'm'},
+		{"feature-filter", required_argument, 0, 'j'},
+		{"preserve-input-order", no_argument, 0, 'o' & 0x1F},
+
+		{0, 0, 0, 0},
+	};
+
+	std::string getopt_str;
+	for (size_t lo = 0; long_options[lo].name != NULL; lo++) {
+		if (long_options[lo].val > ' ') {
+			getopt_str.push_back(long_options[lo].val);
+
+			if (long_options[lo].has_arg == required_argument) {
+				getopt_str.push_back(':');
+			}
+		}
+	}
+
+	int option_index = 0;
+	while ((i = getopt_long(argc, argv, getopt_str.c_str(), long_options, &option_index)) != -1) {
 		switch (i) {
 		case 'y':
 			keep.insert(optarg);
@@ -54,6 +79,10 @@ int main(int argc, char **argv) {
 			filter = optarg;
 			break;
 
+		case 'o' & 0x1F:
+			preserve_input_order = true;
+			break;
+
 		default:
 			fprintf(stderr, "Unrecognized flag -%c\n", i);
 			usage(argv);
@@ -108,7 +137,7 @@ int main(int argc, char **argv) {
 		json_filter = parse_filter(filter.c_str());
 	}
 
-	std::string out = overzoom(tile, oz, ox, oy, nz, nx, ny, detail, buffer, keep, true, NULL, demultiply, json_filter);
+	std::string out = overzoom(tile, oz, ox, oy, nz, nx, ny, detail, buffer, keep, true, NULL, demultiply, json_filter, preserve_input_order);
 	fwrite(out.c_str(), sizeof(char), out.size(), f);
 	fclose(f);
 

diff --git a/read_json.cpp b/read_json.cpp
@@ -116,6 +116,10 @@ void parse_geometry(int t, json_object *j, drawvec &out, int op, const char *fna
 	}
 }
 
+// This is used to convert a JSON attribute value into a serial_val-style
+// type and stringified value. All numeric values, even if they are integers,
+// even integers that are too large to fit in a double but will still be
+// stringified with their original precision, are recorded here as mvt_double.
 void stringify_value(json_object *value, int &type, std::string &stringified, const char *reading, int line, json_object *feature) {
 	if (value != NULL) {
 		int vt = value->type;

diff --git a/serial.cpp b/serial.cpp
@@ -187,7 +187,6 @@ std::string serialize_feature(serial_feature *sf, long long wx, long long wy) {
 #define FLAG_LAYER 7
 
 #define FLAG_LABEL_POINT 6
-#define FLAG_SEQ 5
 #define FLAG_INDEX 4
 #define FLAG_EXTENT 3
 #define FLAG_ID 2
@@ -197,17 +196,14 @@ std::string serialize_feature(serial_feature *sf, long long wx, long long wy) {
 	long long layer = 0;
 	layer |= sf->layer << FLAG_LAYER;
 	layer |= (sf->label_point != 0) << FLAG_LABEL_POINT;
-	layer |= (sf->seq != 0) << FLAG_SEQ;
 	layer |= (sf->index != 0) << FLAG_INDEX;
 	layer |= (sf->extent != 0) << FLAG_EXTENT;
 	layer |= sf->has_id << FLAG_ID;
 	layer |= sf->has_tippecanoe_minzoom << FLAG_MINZOOM;
 	layer |= sf->has_tippecanoe_maxzoom << FLAG_MAXZOOM;
 
 	serialize_long_long(s, layer);
-	if (sf->seq != 0) {
-		serialize_long_long(s, sf->seq);
-	}
+	serialize_long_long(s, sf->seq);
 	if (sf->has_tippecanoe_minzoom) {
 		serialize_int(s, sf->tippecanoe_minzoom);
 	}
@@ -252,9 +248,7 @@ serial_feature deserialize_feature(std::string &geoms, unsigned z, unsigned tx,
 	deserialize_long_long(&cp, &sf.layer);
 
 	sf.seq = 0;
-	if (sf.layer & (1 << FLAG_SEQ)) {
-		deserialize_long_long(&cp, &sf.seq);
-	}
+	deserialize_long_long(&cp, &sf.seq);
 
 	sf.tippecanoe_minzoom = -1;
 	sf.tippecanoe_maxzoom = -1;
@@ -683,10 +677,6 @@ int serialize_feature(struct serialization_state *sst, serial_feature &sf) {
 		*(sst->area_sum) += extent;
 	}
 
-	if (!prevent[P_INPUT_ORDER]) {
-		sf.seq = 0;
-	}
-
 	unsigned long long bbox_index;
 	long long midx, midy;
 

diff --git a/serial.hpp b/serial.hpp
@@ -31,6 +31,10 @@ void deserialize_ulong_long(char **f, unsigned long long *n);
 void deserialize_uint(char **f, unsigned *n);
 void deserialize_byte(char **f, signed char *n);
 
+// This is the main representation of attribute values in memory and
+// in the string pool. The type is one of the mvt_value type (mvt_string,
+// mvt_double, mvt_bool, or mvt_null). Note that all numeric values,
+// whether integer or floating point, use mvt_double here.
 struct serial_val {
 	int type = 0;
 	std::string s = "";