Skip to content

Commit

Permalink
be more robust in WKT parsing, in particular support arbitrary whites…
Browse files Browse the repository at this point in the history
…pace
  • Loading branch information
patrickbr committed Sep 30, 2024
1 parent bb73b3e commit 191ac70
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 69 deletions.
86 changes: 57 additions & 29 deletions src/spatialjoin/WKTParse.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,14 @@ typedef std::vector<ParseJob> ParseBatch;
// _____________________________________________________________________________
inline util::geo::I32Line parseLineString(const char* c, const char** endr) {
util::geo::I32Line line;

c = strchr(c, '(');
if (!c) return line; // parse error
c++;

auto end = strchr(c, ')');
if (endr) (*endr) = end;
if (!end) return line;
if (!end) return line; // parse error

line.reserve((end - c) / 20);

Expand All @@ -33,8 +38,11 @@ inline util::geo::I32Line parseLineString(const char* c, const char** endr) {

const char* next = strchr(c, ' ');

if (!next || next >= end) return {};
double y = util::atof(next + 1, 10);
if (!next || next >= end) return {}; // parse error

while (*next && *next != ')' && isspace(*next)) next++;

double y = util::atof(next, 10);
auto projPoint = latLngToWebMerc(util::geo::DPoint(x, y));

line.push_back({projPoint.getX() * PREC, projPoint.getY() * PREC});
Expand All @@ -49,22 +57,31 @@ inline util::geo::I32Line parseLineString(const char* c, const char** endr) {

// _____________________________________________________________________________
inline util::geo::I32Point parsePoint(const char* c) {
c = strchr(c, '(');
if (!c) return {0, 0}; // TODO!

c += 1;
while (*c && *c != ')' && isspace(*c)) c++;

double x = util::atof(c, 10);
const char* next = strchr(c, ' ');
if (!next) return {0, 0}; // TODO!
double y = util::atof(next + 1, 10);
while (*next && *next != ')' && isspace(*next)) next++;
double y = util::atof(next, 10);
auto point = latLngToWebMerc(util::geo::DPoint(x, y));

return {point.getX() * PREC, point.getY() * PREC};
}

// _____________________________________________________________________________
inline util::geo::I32Polygon parsePolygon(const char* c, const char** endr) {
c = strchr(c, '(');
if (!c) return {}; // parse error
c += 1;

size_t i = 0;
util::geo::I32Polygon poly;
while ((c = strchr(c, '('))) {
c++;
const char* end = 0;
const auto& line = parseLineString(c, &end);

Expand Down Expand Up @@ -96,9 +113,12 @@ inline util::geo::I32Polygon parsePolygon(const char* c, const char** endr) {
// _____________________________________________________________________________
inline util::geo::I32MultiLine parseMultiLineString(const char* c,
const char** endr) {
c = strchr(c, '(');
if (!c) return {}; // parse error
c += 1;

util::geo::I32MultiLine ml;
while ((c = strchr(c, '('))) {
c++;
const char* end = 0;
const auto& line = parseLineString(c, &end);
if (line.size() != 0) ml.push_back(std::move(line));
Expand All @@ -121,6 +141,10 @@ inline util::geo::I32MultiLine parseMultiLineString(const char* c,
// _____________________________________________________________________________
inline util::geo::I32MultiPolygon parseMultiPolygon(const char* c,
const char** endr) {
c = strchr(c, '(');
if (!c) return {}; // parse error
c += 1;

util::geo::I32MultiPolygon mp;
do {
c = strchr(c, '(');
Expand Down Expand Up @@ -198,42 +222,45 @@ inline void parseLine(char* c, size_t len, size_t gid, sj::Sweeper* sweeper,
{std::numeric_limits<int32_t>::max(),
std::numeric_limits<int32_t>::max()}),
id, subId, side, batch);
} else if (len > 6 && memcmp(c, "POINT(", 6) == 0) {
c += 6;
} else if (len > 5 && memcmp(c, "POINT", 5) == 0) {
c += 5;
const auto& point = parsePoint(c);
sweeper->add(point, id, side, batch);
} else if (len > 11 && memcmp(c, "MULTIPOINT(", 11) == 0) {
c += 11;
} else if (len > 10 && memcmp(c, "MULTIPOINT", 10) == 0) {
c += 10;
const auto& mp = util::geo::I32MultiPoint(parseLineString(c, 0));
if (mp.size() != 0) sweeper->add(mp, id, side, batch);
} else if (len > 11 && memcmp(c, "LINESTRING(", 11) == 0) {
c += 11;
} else if (len > 10 && memcmp(c, "LINESTRING", 10) == 0) {
c += 10;
const auto& line = parseLineString(c, 0);
if (line.size() > 1) sweeper->add(line, id, side, batch);
} else if (len > 16 && memcmp(c, "MULTILINESTRING(", 16) == 0) {
c += 16;
} else if (len > 15 && memcmp(c, "MULTILINESTRING", 15) == 0) {
c += 15;
const auto& ml = parseMultiLineString(c, 0);
sweeper->add(ml, id, side, batch);
} else if (len > 8 && memcmp(c, "POLYGON(", 8) == 0) {
} else if (len > 7 && memcmp(c, "POLYGON", 7) == 0) {
c += 7;
const auto& poly = parsePolygon(c, 0);
if (poly.getOuter().size() > 1) sweeper->add(poly, id, side, batch);
} else if (len > 13 && memcmp(c, "MULTIPOLYGON(", 13) == 0) {
c += 13;
} else if (len > 12 && memcmp(c, "MULTIPOLYGON", 12) == 0) {
c += 12;
const auto& mp = parseMultiPolygon(c, 0);
if (mp.size()) sweeper->add(mp, id, side, batch);
} else if (len > 19 && memcmp(c, "GEOMETRYCOLLECTION(", 19) == 0) {
} else if (len > 18 && memcmp(c, "GEOMETRYCOLLECTION", 18) == 0) {
c += 18;

c = strchr(c, '(');
if (!c) return; // parse error

util::geo::I32Collection col;
size_t numGeoms = 0;

do {
c++;
while (*c == ' ') c++; // skip possible whitespace

if (memcmp(c, "POINT(", 6) == 0) {
c += 6;
if (memcmp(c, "POINT", 5) == 0) {
c += 5;
const char* end = strchr(c, ')');
const auto& point = parsePoint(c);

Expand All @@ -242,7 +269,7 @@ inline void parseLine(char* c, size_t len, size_t gid, sj::Sweeper* sweeper,
col.push_back(point);
numGeoms++;
c = const_cast<char*>(strchr(end, ','));
} else if (memcmp(c, "POLYGON(", 8) == 0) {
} else if (memcmp(c, "POLYGON", 7) == 0) {
c += 7;
const char* end = 0;
const auto& poly = parsePolygon(c, &end);
Expand All @@ -253,8 +280,8 @@ inline void parseLine(char* c, size_t len, size_t gid, sj::Sweeper* sweeper,
numGeoms++;
}
c = const_cast<char*>(strchr(end, ','));
} else if (memcmp(c, "LINESTRING(", 11) == 0) {
c += 11;
} else if (memcmp(c, "LINESTRING", 10) == 0) {
c += 10;
const char* end = 0;
const auto& line = parseLineString(c, &end);

Expand All @@ -264,8 +291,8 @@ inline void parseLine(char* c, size_t len, size_t gid, sj::Sweeper* sweeper,
numGeoms++;
}
c = const_cast<char*>(strchr(end, ','));
} else if (memcmp(c, "MULTIPOINT(", 11) == 0) {
c += 11;
} else if (memcmp(c, "MULTIPOINT", 10) == 0) {
c += 10;
const char* end = 0;
const auto& line = parseLineString(c, &end);

Expand All @@ -275,8 +302,8 @@ inline void parseLine(char* c, size_t len, size_t gid, sj::Sweeper* sweeper,
numGeoms += line.size();
}
c = const_cast<char*>(strchr(end, ','));
} else if (memcmp(c, "MULTIPOLYGON(", 13) == 0) {
c += 13;
} else if (memcmp(c, "MULTIPOLYGON", 12) == 0) {
c += 12;
const char* end = 0;
const auto& mp = parseMultiPolygon(c, &end);

Expand All @@ -286,8 +313,9 @@ inline void parseLine(char* c, size_t len, size_t gid, sj::Sweeper* sweeper,
numGeoms += mp.size();
}
c = const_cast<char*>(strchr(end, ','));
} else if (memcmp(c, "MULTILINESTRING(", 16) == 0) {
c += 16;
} else if (memcmp(c, "MULTILINESTRING", 15) == 0) {
c += 15;

const char* end = 0;
const auto& ml = parseMultiLineString(c, &end);

Expand Down
46 changes: 23 additions & 23 deletions src/spatialjoin/tests/datasets/collectiontests
Original file line number Diff line number Diff line change
@@ -1,38 +1,38 @@
POINT(0.0010 0.0010)
GEOMETRYCOLLECTION( POINT( 0.0010 0.0010 ) )
GEOMETRYCOLLECTION ( POINT ( 0.0010 0.0010 ) )
MULTIPOINT(0.0010 0.0010, 0.0011 0.0011)
GEOMETRYCOLLECTION(MULTIPOINT(0.0010 0.0010, 0.0011 0.0011))
MULTIPOINT(0.0011 0.0011, 0.0010 0.0010)
LINESTRING(0.0010 0.0010, 0.0009 0.0009)
LINESTRING(0.0010 0.0010, 0.0009 0.0009, 0.0011 0.0011)
GEOMETRYCOLLECTION(POLYGON((0.00 0.00, 0.000 0.001, 0.001 0.001, 0.001 0.000, 0 0)))
MULTIPOINT (0.0011 0.0011, 0.0010 0.0010)
LINESTRING (0.0010 0.0010, 0.0009 0.0009)
LINESTRING (0.0010 0.0010, 0.0009 0.0009, 0.0011 0.0011)
GEOMETRYCOLLECTION ( POLYGON ( ( 0.00 0.00, 0.000 0.001, 0.001 0.001, 0.001 0.000, 0 0)))
GEOMETRYCOLLECTION(POLYGON((0.00 0.00, 0.000 0.001, -0.001 0.001, -0.001 0.000, 0 0)))
GEOMETRYCOLLECTION(POLYGON((0.00 0.00, 0.000 0.001, -0.001 0.001, -0.001 0.000, 0 0)), POLYGON((0.0005 0.00, 0.0005 0.001, 0.001 0.001, 0.001 0.0005, 00.0005 0)))
GEOMETRYCOLLECTION(POLYGON((0.00 0.00, 0.000 0.001, -0.001 0.001, -0.001 0.000, 0 0)) , POLYGON ((0.0005 0.00, 0.0005 0.001, 0.001 0.001, 0.001 0.0005, 00.0005 0)))
MULTIPOLYGON(((0.00 0.00, 0.000 0.001, 0.001 0.001, 0.001 0.000, 0 0)))
GEOMETRYCOLLECTION(MULTIPOINT(0.00 0.00, 0.0005 0.00))
GEOMETRYCOLLECTION(MULTIPOLYGON(((0.0005 0.00, 0.0005 0.001, 0.001 0.001, 0.001 0.0005, 00.0005 0)), ((0.00 0.00, 0.000 0.001, -0.001 0.001, -0.001 0.000, 0 0))))
GEOMETRYCOLLECTION (MULTIPOINT(0.00 0.00, 0.0005 0.00))
GEOMETRYCOLLECTION(MULTIPOLYGON(((0.0005 0.00, 0.0005 0.001, 0.001 0.001, 0.001 0.0005, 00.0005 0)) , ( (0.00 0.00, 0.000 0.001, -0.001 0.001, -0.001 0.000, 0 0))))
LINESTRING(0.0010 0.0010, 0.0009 0.0009)
MULTILINESTRING((0.005 0.005, 0.006 0.006))
MULTIPOINT(0.0010 0.0010, 0.005 0.005)
MULTILINESTRING((0.0010 0.0010, 0.0009 0.0009), (0.005 0.005, 0.006 0.006))
MULTILINESTRING((0.0010 0.0010, 0.0009 0.0009), (0.005 0.005, 0.006 0.006 ))
POINT(0.005 0.005)
GEOMETRYCOLLECTION(LINESTRING(0.005 0.005, 0.006 0.006))
GEOMETRYCOLLECTION( LINESTRING ( 0.005 0.005, 0.006 0.006))
GEOMETRYCOLLECTION( POLYGON((0.0005 0.0005, 0.0005 0.0015, 0.0015 0.001, 0.0015 0.0005, 0.0005 0.0005)))
MULTILINESTRING((0.0010 0.0011, 0.0009 0.0008), (0.005 0.006, 0.006 0.005))
GEOMETRYCOLLECTION(LINESTRING(0.0010 0.0011, 0.0009 0.0008), LINESTRING(0.0055 0.0055, 0.006 0.006))
GEOMETRYCOLLECTION(LINESTRING(0.0010 0.0011, 0.0009 0.0008), LINESTRING(0.0055 0.0055, 0.006 0.006))
MULTILINESTRING((0.0011 0.0011, 0.0009 0.0009))
MULTILINESTRING((0.0011 0.0011, 0.0007 0.0007), (0.0055 0.0055, 0.006 0.006))
MULTILINESTRING((0.0011 0.0011, 0.0008 0.0008))
MULTILINESTRING ((0.0011 0.0011, 0.0008 0.0008))
MULTILINESTRING((0.0011 0.0011, 0.0006 0.0006))
MULTILINESTRING((0.0011 0.0011, 0.0008 0.0008), (0.003 0.003, 0.005 0.003))
GEOMETRYCOLLECTION(MULTILINESTRING((0.0011 0.0011, 0.0007 0.0007), (0.003 0.003, 0.005 0.003)))
MULTIPOLYGON(((0.00 0.00, 0.000 0.001, 0.001 0.001, 0.001 0.000, 0 0)))
GEOMETRYCOLLECTION(POLYGON((0.00 0.00, 0.000 0.001, 0.001 0.001, 0.001 0.000, 0 0)), POLYGON((1.00 1.00, 1.000 1.001, 1.001 1.001, 1.001 1.000, 1 1)))
MULTIPOLYGON(((0.00 0.00, 0.000 0.001, 0.001 0.001, 0.001 0.000, 0 0)), ((2.00 2.00, 2.000 2.001, 2.001 2.001, 2.001 2.000, 2 2)))
GEOMETRYCOLLECTION (MULTILINESTRING ((0.0011 0.0011, 0.0007 0.0007), (0.003 0.003, 0.005 0.003)))
MULTIPOLYGON (((0.00 0.00, 0.000 0.001, 0.001 0.001, 0.001 0.000, 0 0)))
GEOMETRYCOLLECTION (POLYGON ((0.00 0.00, 0.000 0.001, 0.001 0.001, 0.001 0.000, 0 0)), POLYGON((1.00 1.00, 1.000 1.001, 1.001 1.001, 1.001 1.000, 1 1)))
MULTIPOLYGON ( ( ( 0.00 0.00, 0.000 0.001, 0.001 0.001, 0.001 0.000, 0 0)), ((2.00 2.00, 2.000 2.001, 2.001 2.001, 2.001 2.000, 2 2)))
MULTIPOLYGON()
LINESTRING(0.0005 0.0005)
POLYGON((0.0005 0.0005, 0.0005 0.0005), (), (), ())
POLYGON((0.0005 0.0005, 0.0005 0.0005), (0.0005 0.0005), (), ())
POLYGON ((0.0005 0.0005, 0.0005 0.0005), (0.0005 0.0005), (), ())
POLYGON((0.0005))
POINT()
POINT(diesistnichtkorrekt)
Expand All @@ -44,14 +44,14 @@ POLYGON((a), (b), (c), (d))
POLYGON()
MULTIPOLYGON((), (), ((), (), ()))
GEOMETRYCOLLECTION()
GEOMETRYCOLLECTION(POLYGON())
GEOMETRYCOLLECTION(POLYGON(),POLYGON())
GEOMETRYCOLLECTION( POLYGON() , POLYGON() )
GEOMETRYCOLLECTION( )
GEOMETRYCOLLECTION(,)
GEOMETRYCOLLECTION ( POLYGON())
GEOMETRYCOLLECTION ( POLYGON(),POLYGON())
GEOMETRYCOLLECTION ( POLYGON() , POLYGON() )
GEOMETRYCOLLECTION ( )
GEOMETRYCOLLECTION ( ,)

GEOMETRYCOLLECTION(MULTIPOLYGON(((0.00 0.00, 0.000 0.001, -0.001 0.001, -0.001 0.000, 0 0))), MULTIPOLYGON(((0.0005 0.00, 0.0005 0.001, 0.001 0.001, 0.001 0.0005, 00.0005 0))))
GEOMETRYCOLLECTION(MULTILINESTRING((0.0011 0.0011, 0.0007 0.0007)), MULTILINESTRING((0.003 0.003, 0.005 0.003)))
GEOMETRYCOLLECTION ( MULTILINESTRING((0.0011 0.0011, 0.0007 0.0007)), MULTILINESTRING((0.003 0.003, 0.005 0.003)))

GEOMETRYCOLLECTION(
GEOMETRYCOLLECTION(POLYGON(
Expand Down
14 changes: 7 additions & 7 deletions src/spatialjoin/tests/datasets/freiburg

Large diffs are not rendered by default.

20 changes: 10 additions & 10 deletions src/spatialjoin/tests/datasets/multitests
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
POINT(0.0010 0.0010)
POINT(0.0010 0.0010)
MULTIPOINT(0.0010 0.0010, 0.0011 0.0011)
POINT (0.0010 0.0010)
MULTIPOINT (0.0010 0.0010, 0.0011 0.0011)
MULTIPOINT(0.0010 0.0010, 0.0011 0.0011)
MULTIPOINT(0.0011 0.0011, 0.0010 0.0010)
LINESTRING(0.0010 0.0010, 0.0009 0.0009)
LINESTRING (0.0010 0.0010, 0.0009 0.0009)
LINESTRING(0.0010 0.0010, 0.0009 0.0009, 0.0011 0.0011)
MULTIPOLYGON(((0.00 0.00, 0.000 0.001, 0.001 0.001, 0.001 0.000, 0 0)))
MULTIPOLYGON(((0.00 0.00, 0.000 0.001, -0.001 0.001, -0.001 0.000, 0 0)))
Expand All @@ -16,23 +16,23 @@ MULTILINESTRING((0.005 0.005, 0.006 0.006))
MULTIPOINT(0.0010 0.0010, 0.005 0.005)
MULTILINESTRING((0.0010 0.0010, 0.0009 0.0009), (0.005 0.005, 0.006 0.006))
POINT(0.005 0.005)
MULTILINESTRING((0.005 0.005, 0.006 0.006))
MULTILINESTRING ((0.005 0.005, 0.006 0.006))
MULTIPOLYGON(((0.0005 0.0005, 0.0005 0.0015, 0.0015 0.001, 0.0015 0.0005, 0.0005 0.0005)))
MULTILINESTRING((0.0010 0.0011, 0.0009 0.0008), (0.005 0.006, 0.006 0.005))
MULTILINESTRING((0.0010 0.0011, 0.0009 0.0008), (0.0055 0.0055, 0.006 0.006))
MULTILINESTRING ((0.0010 0.0011, 0.0009 0.0008), (0.0055 0.0055, 0.006 0.006))
MULTILINESTRING((0.0011 0.0011, 0.0009 0.0009))
MULTILINESTRING((0.0011 0.0011, 0.0007 0.0007), (0.0055 0.0055, 0.006 0.006))
MULTILINESTRING ((0.0011 0.0011, 0.0007 0.0007), (0.0055 0.0055, 0.006 0.006))
MULTILINESTRING((0.0011 0.0011, 0.0008 0.0008))
MULTILINESTRING((0.0011 0.0011, 0.0006 0.0006))
MULTILINESTRING ((0.0011 0.0011, 0.0006 0.0006))
MULTILINESTRING((0.0011 0.0011, 0.0008 0.0008), (0.003 0.003, 0.005 0.003))
MULTILINESTRING((0.0011 0.0011, 0.0007 0.0007), (0.003 0.003, 0.005 0.003))
MULTILINESTRING ((0.0011 0.0011, 0.0007 0.0007), (0.003 0.003, 0.005 0.003))
MULTIPOLYGON(((0.00 0.00, 0.000 0.001, 0.001 0.001, 0.001 0.000, 0 0)))
MULTIPOLYGON(((0.00 0.00, 0.000 0.001, 0.001 0.001, 0.001 0.000, 0 0)), ((1.00 1.00, 1.000 1.001, 1.001 1.001, 1.001 1.000, 1 1)))
MULTIPOLYGON(((0.00 0.00, 0.000 0.001, 0.001 0.001, 0.001 0.000, 0 0)), ((2.00 2.00, 2.000 2.001, 2.001 2.001, 2.001 2.000, 2 2)))
MULTIPOLYGON()
LINESTRING(0.0005 0.0005)
LINESTRING (0.0005 0.0005)
POLYGON((0.0005 0.0005, 0.0005 0.0005), (), (), ())
POLYGON((0.0005 0.0005, 0.0005 0.0005), (0.0005 0.0005), (), ())
POLYGON ((0.0005 0.0005, 0.0005 0.0005), (0.0005 0.0005), (), ())
POLYGON((0.0005))
POINT()
POINT(diesistnichtkorrekt)
Expand Down

0 comments on commit 191ac70

Please sign in to comment.