Skip to content

Commit

Permalink
(1) Use unique tmp file names, unlink them immediately, (2) Parsing s…
Browse files Browse the repository at this point in the history
…peedup: faster box id calculation, faster diag box calc, faster OBB calc, avoid unnecessary pre-calcs, (3) slightly pad OBB to avoid floating point precision issues when used as pre-filter, (4) avoid unecessary copying of output lines, output to stdout is now also buffered
  • Loading branch information
patrickbr committed May 17, 2024
1 parent c2711d3 commit 946a9a6
Show file tree
Hide file tree
Showing 8 changed files with 742 additions and 509 deletions.
135 changes: 90 additions & 45 deletions src/spatialjoin/BoxIds.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ const static double WORLD_H = 20037508.3427892 * PREC * 2;
const static double GRID_W = WORLD_W / NUM_GRID_CELLS;
const static double GRID_H = WORLD_H / NUM_GRID_CELLS;

const static double GRID_AREA = GRID_W * GRID_H;

typedef std::pair<int32_t, uint8_t> BoxId;

typedef std::vector<BoxId> BoxIdList;
Expand All @@ -43,62 +45,68 @@ inline int32_t getBoxId(const util::geo::I32Point& p) {
}

// ____________________________________________________________________________
inline BoxIdList getBoxIds(const util::geo::I32XSortedLine& line,
const util::geo::I32Box& envelope,
std::map<int32_t, size_t>* cutouts) {
int32_t startX = std::floor(
(1.0 * envelope.getLowerLeft().getX() + WORLD_W / 2.0) / GRID_W);
int32_t startY = std::floor(
(1.0 * envelope.getLowerLeft().getY() + WORLD_H / 2.0) / GRID_H);
inline void getBoxIds(const util::geo::I32XSortedLine& line,
const util::geo::I32Box& envelope, int xFrom, int xTo,
int yFrom, int yTo, int xWidth, int yHeight,
BoxIdList* ret, std::map<int32_t, size_t>* cutouts,
size_t startA, size_t startB) {

int32_t endX =
std::floor((1.0 * envelope.getUpperRight().getX() + WORLD_W / 2.0) /
GRID_W) +
1;
int32_t endY =
std::floor((1.0 * envelope.getUpperRight().getY() + WORLD_H / 2.0) /
GRID_H) +
1;
for (int32_t y = yFrom; y < yTo; y += yHeight) {
size_t firstInA = startA;
size_t firstInB = startB;

BoxIdList boxIds;
for (int32_t x = xFrom; x < xTo; x += xWidth) {
int localXWidth = std::min(xTo - x, xWidth);
int localYHeight = std::min(yTo - y, yHeight);

for (int32_t y = startY; y < endY; y++) {
for (int32_t x = startX; x < endX; x++) {
util::geo::I32Box box(
{(x * GRID_W - WORLD_W / 2), (y * GRID_H - WORLD_H / 2)},
{(x + 1) * GRID_W - WORLD_W / 2, (y + 1) * GRID_H - WORLD_H / 2});
{(x + localXWidth + 1) * GRID_W - WORLD_W / 2,
(y + localYHeight + 1) * GRID_H - WORLD_H / 2});

if (!util::geo::intersects(box, envelope)) continue;

const util::geo::I32XSortedPolygon boxPoly{util::geo::I32Polygon(box)};

size_t firstInA = 0;
size_t firstInB = 0;
auto check = util::geo::intersectsContainsCovers(
line, envelope, boxPoly, box, &firstInA, &firstInB);

if (std::get<0>(util::geo::intersectsContainsCovers(
line, envelope, boxPoly, box, &firstInA, &firstInB))) {
int32_t newId = y * NUM_GRID_CELLS + x + 1;
if (!boxIds.empty() && boxIds.back().second < 254 &&
boxIds.back().first + boxIds.back().second == newId - 1) {
boxIds.back().second++;
if (std::get<0>(check)) {
if (localXWidth == 1 && localYHeight == 1) {
int32_t newId = y * NUM_GRID_CELLS + x + 1;

if (cutouts) (*cutouts)[newId] = firstInB;

if (!ret->empty() && ret->back().second < 254 &&
ret->back().first - ret->back().second == newId + 1) {
ret->back().second++;
} else {
ret->push_back({newId, 0});
}
} else {
boxIds.push_back({newId, 0});
if (cutouts) (*cutouts)[newId] = firstInB;
// we need to check in detail on a smaller level!
// recurse down...
int newXWidth = (localXWidth + 1) / 2;
int newYHeight = (localYHeight + 1) / 2;

getBoxIds(line, envelope, x, x + localXWidth, y, y + localYHeight,
newXWidth, newYHeight, ret, cutouts, firstInA, firstInB);
}
}
}
}

return boxIds;
}

// ____________________________________________________________________________
inline void getBoxIds(const util::geo::I32XSortedPolygon& poly,
const util::geo::I32Polygon& polyr,
const util::geo::I32Box& envelope, double area, int xFrom,
int xTo, int yFrom, int yTo, int xWidth, int yHeight,
BoxIdList* ret, std::map<int32_t, size_t>* cutouts) {
BoxIdList* ret, std::map<int32_t, size_t>* cutouts,
size_t startA, size_t startB) {
for (int32_t y = yFrom; y < yTo; y += yHeight) {
size_t firstInA = startA;
size_t firstInB = startB;

for (int32_t x = xFrom; x < xTo; x += xWidth) {
int localXWidth = std::min(xTo - x, xWidth);
int localYHeight = std::min(yTo - y, yHeight);
Expand All @@ -108,18 +116,14 @@ inline void getBoxIds(const util::geo::I32XSortedPolygon& poly,
{(x + localXWidth + 1) * GRID_W - WORLD_W / 2,
(y + localYHeight + 1) * GRID_H - WORLD_H / 2});

if (!util::geo::intersects(box, envelope)) {
continue;
}
if (!util::geo::intersects(box, envelope)) continue;

const util::geo::I32XSortedPolygon boxPoly{util::geo::I32Polygon(box)};

size_t firstInA = 0;
size_t firstInB = 0;
double boxArea = GRID_AREA * (localXWidth + 1) * (localYHeight + 1);

auto check = util::geo::intersectsContainsCovers(
boxPoly, box, util::geo::area(box), poly, envelope, area, &firstInA,
&firstInB);
boxPoly, box, boxArea, poly, envelope, area, &firstInA, &firstInB);

if (std::get<1>(check)) {
// we can insert all at once
Expand Down Expand Up @@ -152,19 +156,54 @@ inline void getBoxIds(const util::geo::I32XSortedPolygon& poly,
int newXWidth = (localXWidth + 1) / 2;
int newYHeight = (localYHeight + 1) / 2;

getBoxIds(poly, polyr, envelope, area, x, x + localXWidth, y,
y + localYHeight, newXWidth, newYHeight, ret, cutouts);
getBoxIds(poly, envelope, area, x, x + localXWidth, y,
y + localYHeight, newXWidth, newYHeight, ret, cutouts,
firstInA, firstInB);
}
}
}
}
}

// ____________________________________________________________________________
inline BoxIdList getBoxIds(const util::geo::I32XSortedLine& line,
const util::geo::I32Box& envelope,
std::map<int32_t, size_t>* cutouts) {
int32_t a = getBoxId(envelope.getLowerLeft());
int32_t b = getBoxId(envelope.getUpperRight());
if (a == b) return {{a, 0}}; // shortcut

int32_t startX = std::floor(
(1.0 * envelope.getLowerLeft().getX() + WORLD_W / 2.0) / GRID_W);
int32_t startY = std::floor(
(1.0 * envelope.getLowerLeft().getY() + WORLD_H / 2.0) / GRID_H);

int32_t endX =
std::floor((1.0 * envelope.getUpperRight().getX() + WORLD_W / 2.0) /
GRID_W) +
1;
int32_t endY =
std::floor((1.0 * envelope.getUpperRight().getY() + WORLD_H / 2.0) /
GRID_H) +
1;

BoxIdList boxIds;

getBoxIds(line, envelope, startX, endX, startY, endY, (endX - startX + 3) / 4,
(endY - startY + 3) / 4, &boxIds, cutouts, 0, 0);
std::sort(boxIds.begin(), boxIds.end(), BoxIdCmp());

return boxIds;
}

// ____________________________________________________________________________
inline BoxIdList getBoxIds(const util::geo::I32XSortedPolygon& poly,
const util::geo::I32Polygon& polyr,
const util::geo::I32Box& envelope, double area,
std::map<int32_t, size_t>* cutouts) {
int32_t a = getBoxId(envelope.getLowerLeft());
int32_t b = getBoxId(envelope.getUpperRight());
if (a == b) return {{-a, 0}}; // shortcut

int32_t startX = std::floor(
(1.0 * envelope.getLowerLeft().getX() + WORLD_W / 2.0) / GRID_W);
int32_t startY = std::floor(
Expand All @@ -181,8 +220,9 @@ inline BoxIdList getBoxIds(const util::geo::I32XSortedPolygon& poly,

BoxIdList boxIds;

getBoxIds(poly, polyr, envelope, area, startX, endX, startY, endY,
(endX - startX + 3) / 4, (endY - startY + 3) / 4, &boxIds, cutouts);
getBoxIds(poly, envelope, area, startX, endX, startY, endY,
(endX - startX + 3) / 4, (endY - startY + 3) / 4, &boxIds, cutouts,
0, 0);
std::sort(boxIds.begin(), boxIds.end(), BoxIdCmp());

return boxIds;
Expand All @@ -193,6 +233,11 @@ inline BoxIdList packBoxIds(const BoxIdList& ids) {
if (ids.empty()) {
return {{0, 0}};
}

if (ids.size() == 1) {
return {{1, 0}, ids[0]};
}

// assume the list is sorted!

BoxIdList ret;
Expand Down
46 changes: 34 additions & 12 deletions src/spatialjoin/GeometryCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,7 @@ sj::Area sj::GeometryCache<sj::Area>::getFromDisk(size_t off,
// geom
readPoly(_geomsFReads[tid], ret.geom);

// TODO: careful, resize initializes entire vector!

// envelopes
// envelope
_geomsFReads[tid].read(reinterpret_cast<char*>(&ret.box),
sizeof(util::geo::I32Box));

Expand Down Expand Up @@ -212,9 +210,21 @@ sj::Area sj::GeometryCache<sj::Area>::getFromDisk(size_t off,
// simplified inner
readPoly(_geomsFReads[tid], ret.inner);

_geomsFReads[tid].read(reinterpret_cast<char*>(&ret.innerBox),
sizeof(util::geo::I32Box));

_geomsFReads[tid].read(reinterpret_cast<char*>(&ret.innerOuterArea),
sizeof(double));

// simplified outer
readPoly(_geomsFReads[tid], ret.outer);

_geomsFReads[tid].read(reinterpret_cast<char*>(&ret.outerBox),
sizeof(util::geo::I32Box));

_geomsFReads[tid].read(reinterpret_cast<char*>(&ret.outerOuterArea),
sizeof(double));

return ret;
}

Expand Down Expand Up @@ -327,7 +337,7 @@ size_t sj::GeometryCache<sj::Area>::add(const sj::Area& val) {
// geoms
writePoly(val.geom);

// envelopes
// envelope
_geomsF.write(reinterpret_cast<const char*>(&val.box),
sizeof(util::geo::I32Box));
_geomsOffset += sizeof(util::geo::I32Box);
Expand Down Expand Up @@ -381,9 +391,25 @@ size_t sj::GeometryCache<sj::Area>::add(const sj::Area& val) {
// innerGeom
writePoly(val.inner);

_geomsF.write(reinterpret_cast<const char*>(&val.innerBox),
sizeof(util::geo::I32Box));
_geomsOffset += sizeof(util::geo::I32Box);

// inner area
_geomsF.write(reinterpret_cast<const char*>(&val.innerOuterArea), sizeof(double));
_geomsOffset += sizeof(double);

// outerGeom
writePoly(val.outer);

_geomsF.write(reinterpret_cast<const char*>(&val.outerBox),
sizeof(util::geo::I32Box));
_geomsOffset += sizeof(util::geo::I32Box);

// outer area
_geomsF.write(reinterpret_cast<const char*>(&val.outerOuterArea), sizeof(double));
_geomsOffset += sizeof(double);

return ret;
}

Expand All @@ -394,10 +420,6 @@ void sj::GeometryCache<W>::flush() {
_geomsF.flush();
_geomsF.close();
}

for (size_t i = 0; i < _geomsFReads.size(); i++) {
_geomsFReads[i].open(getFName(), std::ios::in | std::ios::binary);
}
}

// ____________________________________________________________________________
Expand Down Expand Up @@ -548,25 +570,25 @@ void sj::GeometryCache<W>::writeLine(const util::geo::I32XSortedLine& geom) {
// ____________________________________________________________________________
template <>
std::string sj::GeometryCache<sj::Area>::getFName() const {
return _dir + "/areas";
return util::getTmpFName(_dir, ".spatialjoin", "areas");
}

// ____________________________________________________________________________
template <>
std::string sj::GeometryCache<sj::Line>::getFName() const {
return _dir + "/lines";
return util::getTmpFName(_dir, ".spatialjoin", "lines");
}

// ____________________________________________________________________________
template <>
std::string sj::GeometryCache<sj::Point>::getFName() const {
return _dir + "/points";
return util::getTmpFName(_dir, ".spatialjoin", "points");
}

// ____________________________________________________________________________
template <>
std::string sj::GeometryCache<sj::SimpleLine>::getFName() const {
return _dir + "/simplelines";
return util::getTmpFName(_dir, ".spatialjoin", "simplelines");
}

// ____________________________________________________________________________
Expand Down
Loading

0 comments on commit 946a9a6

Please sign in to comment.