Skip to content

Commit

Permalink
option for fast sweep skip, fsync sweep event file after sorting
Browse files Browse the repository at this point in the history
  • Loading branch information
patrickbr committed May 13, 2024
1 parent 3ccce6d commit 618d8f5
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 22 deletions.
9 changes: 7 additions & 2 deletions src/spatialjoin/SpatialJoinMain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,9 @@ void printHelp(int argc, char** argv) {
<< std::setw(41) << " --no-cutouts"
<< "disable cutouts\n"
<< std::setw(41) << " --no-diag-box"
<< "disable diagonal bounding-box based pre-filter" << std::endl;
<< "disable diagonal bounding-box based pre-filter\n"
<< std::setw(41) << " --no-fast-sweep-skip"
<< "disable fast sweep skip using binary search" << std::endl;
}

// _____________________________________________________________________________
Expand Down Expand Up @@ -103,6 +105,7 @@ int main(int argc, char** argv) {
bool useOBB = true;
bool useCutouts = true;
bool useDiagBox = true;
bool useFastSweepSkip = true;

for (int i = 1; i < argc; i++) {
std::string cur = argv[i];
Expand Down Expand Up @@ -147,6 +150,8 @@ int main(int argc, char** argv) {
useCutouts = false;
} else if (cur == "--no-diag-box") {
useDiagBox = false;
} else if (cur == "--no-fast-sweep-skip") {
useFastSweepSkip = false;
} else {
std::cerr << "Unknown option '" << cur << "', see -h" << std::endl;
exit(1);
Expand Down Expand Up @@ -208,7 +213,7 @@ int main(int argc, char** argv) {

Sweeper sweeper({NUM_THREADS, prefix, intersects, contains, covers, touches,
equals, overlaps, crosses, suffix, useBoxIds, useArea,
useOBB, useCutouts, useDiagBox},
useOBB, useCutouts, useDiagBox, useFastSweepSkip},
useCache, cache, output);

if (!useCache) {
Expand Down
41 changes: 26 additions & 15 deletions src/spatialjoin/Sweeper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ using sj::boxids::BoxIdList;
using sj::boxids::getBoxId;
using sj::boxids::getBoxIds;
using sj::boxids::packBoxIds;
using util::writeAll;
using util::geo::area;
using util::geo::getBoundingBox;
using util::geo::I32Line;
Expand Down Expand Up @@ -117,7 +118,7 @@ void Sweeper::multiAdd(const std::string& gid, int32_t xLeft, int32_t xRight) {
} else {
size_t id = _multiGidToId[gid];
if (xRight > _multiRightX[id]) _multiRightX[id] = xRight;
if (xLeft > _multiLeftX[id]) _multiLeftX[id] = xLeft;
if (xLeft < _multiLeftX[id]) _multiLeftX[id] = xLeft;
}
}

Expand All @@ -131,7 +132,16 @@ void Sweeper::add(const I32Polygon& poly, const std::string& gid,
size_t subid) {
const auto& box = getBoundingBox(poly);
const auto& hull = util::geo::convexHull(poly);
const I32XSortedPolygon spoly(poly);
I32XSortedPolygon spoly(poly);

if (!_cfg.useFastSweepSkip) {
spoly.setInnerMaxSegLen(std::numeric_limits<int32_t>::max());
spoly.getOuter().setMaxSegLen(std::numeric_limits<int32_t>::max());
for (auto& inner : spoly.getInners()) {
inner.setMaxSegLen(std::numeric_limits<int32_t>::max());
}
}

double areaSize = area(poly);
double outerAreaSize = outerArea(poly);
BoxIdList boxIds;
Expand Down Expand Up @@ -213,13 +223,15 @@ void Sweeper::add(const I32Line& line, const std::string& gid, size_t subid) {
size_t id = _simpleLineCache.add({line.front(), line.back(), gid});

diskAdd({id, box.getLowerLeft().getY(), box.getUpperRight().getY(),
box.getLowerLeft().getX(), false, SIMPLE_LINE, len,
box45});
box.getLowerLeft().getX(), false, SIMPLE_LINE, len, box45});
diskAdd({id, box.getLowerLeft().getY(), box.getUpperRight().getY(),
box.getUpperRight().getX(), true, SIMPLE_LINE, len,
box45});
box.getUpperRight().getX(), true, SIMPLE_LINE, len, box45});
} else {
const I32XSortedLine sline(line);
I32XSortedLine sline(line);

if (!_cfg.useFastSweepSkip) {
sline.setMaxSegLen(std::numeric_limits<int32_t>::max());
}

size_t id =
_lineCache.add({sline, box, gid, subid, len, boxIds, cutouts, obb});
Expand Down Expand Up @@ -429,21 +441,20 @@ void Sweeper::multiOut(size_t t, const std::string& gidA) {

// _____________________________________________________________________________
void Sweeper::flush() {
LOG(INFO) << _multiIds.size() << " multi geometries";
LOGTO(INFO, std::cerr) << _multiIds.size() << " multi geometries";
for (size_t i = 0; i < _multiIds.size(); i++) {
diskAdd({i, 1, 0, _multiLeftX[i] - 1, false, POINT, 0.0, {}});
}

ssize_t r = write(_file, _outBuffer, _obufpos);
if (r < 0) throw std::runtime_error("Could not write to file.");
writeAll(_file, _outBuffer, _obufpos);

_pointCache.flush();
_areaCache.flush();
_lineCache.flush();
_simpleLineCache.flush();

LOGTO(INFO, std::cerr) << "Sorting events...";

// now the individial parts are sorted
std::string newFName = _cache + "/.sortTmp";
int newFile = open(newFName.c_str(), O_RDWR | O_CREAT, 0666);

Expand All @@ -457,6 +468,8 @@ void Sweeper::flush() {
#endif
util::externalSort(_file, newFile, sizeof(BoxVal), _curSweepId, boxCmp);

fsync(newFile);

// remove old file
std::remove((_cache + "/events").c_str());
std::rename((_cache + "/.sortTmp").c_str(), (_cache + "/events").c_str());
Expand All @@ -476,8 +489,7 @@ void Sweeper::diskAdd(const BoxVal& bv) {
_obufpos += sizeof(BoxVal);

if (_obufpos >= BUFFER_S) {
ssize_t r = write(_file, _outBuffer, BUFFER_S);
if (r < 0) throw std::runtime_error("Could not write to file.");
writeAll(_file, _outBuffer, BUFFER_S);
_obufpos = 0;
}
_curSweepId++;
Expand Down Expand Up @@ -530,8 +542,7 @@ void Sweeper::sweep() {
_activeMultis.insert(cur->id);
} else if (!cur->out) {
// IN event
actives.insert({cur->loY, cur->upY},
{cur->id, cur->type, cur->b45});
actives.insert({cur->loY, cur->upY}, {cur->id, cur->type, cur->b45});

if (jj % 500000 == 0) {
auto lon = webMercToLatLng<double>((1.0 * cur->val) / PREC, 0).getX();
Expand Down
8 changes: 5 additions & 3 deletions src/spatialjoin/Sweeper.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,12 @@ struct SweeperCfg {
bool useOBB;
bool useCutouts;
bool useDiagBox;
bool useFastSweepSkip;
};

// buffer sizes _must_ be multiples of sizeof(BoxVal)
static const size_t BUFFER_S = sizeof(BoxVal) * 64 * 1024 * 1024;
// buffer size _must_ be multiples of sizeof(BoxVal)
static const ssize_t BUFFER_S = sizeof(BoxVal) * 64 * 1024 * 1024;

static const size_t BUFFER_S_PAIRS = 1024 * 1024 * 10;

class Sweeper {
Expand Down Expand Up @@ -167,7 +169,7 @@ class Sweeper {
size_t _curSweepId = 0;
int _file;
unsigned char* _outBuffer;
size_t _obufpos;
ssize_t _obufpos;

OutMode _outMode;

Expand Down
2 changes: 1 addition & 1 deletion src/spatialjoin/tests/TestMain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ using sj::Sweeper;
std::string fullRun(const std::string& file) {
Sweeper sweeper({1, "$", " intersects ", " contains ", " covers ",
" touches ", " equals ", " overlaps ", " crosses ", "$\n",
true, true, true, true, true},
true, true, true, true, true, true},
false, ".", ".resTmp");

size_t gid = 0;
Expand Down
2 changes: 1 addition & 1 deletion src/util
Submodule util updated 4 files
+12 −4 Misc.h
+54 −29 geo/Geo.h
+4 −4 geo/Line.h
+7 −7 geo/Polygon.h

0 comments on commit 618d8f5

Please sign in to comment.