diff --git a/src/spatialjoin/SpatialJoinMain.cpp b/src/spatialjoin/SpatialJoinMain.cpp index 9c8ba52..11f6d02 100755 --- a/src/spatialjoin/SpatialJoinMain.cpp +++ b/src/spatialjoin/SpatialJoinMain.cpp @@ -71,7 +71,9 @@ void printHelp(int argc, char** argv) { << std::setw(41) << " --no-cutouts" << "disable cutouts\n" << std::setw(41) << " --no-diag-box" - << "disable diagonal bounding-box based pre-filter" << std::endl; + << "disable diagonal bounding-box based pre-filter\n" + << std::setw(41) << " --no-fast-sweep-skip" + << "disable fast sweep skip using binary search" << std::endl; } // _____________________________________________________________________________ @@ -103,6 +105,7 @@ int main(int argc, char** argv) { bool useOBB = true; bool useCutouts = true; bool useDiagBox = true; + bool useFastSweepSkip = true; for (int i = 1; i < argc; i++) { std::string cur = argv[i]; @@ -147,6 +150,8 @@ int main(int argc, char** argv) { useCutouts = false; } else if (cur == "--no-diag-box") { useDiagBox = false; + } else if (cur == "--no-fast-sweep-skip") { + useFastSweepSkip = false; } else { std::cerr << "Unknown option '" << cur << "', see -h" << std::endl; exit(1); @@ -208,7 +213,7 @@ int main(int argc, char** argv) { Sweeper sweeper({NUM_THREADS, prefix, intersects, contains, covers, touches, equals, overlaps, crosses, suffix, useBoxIds, useArea, - useOBB, useCutouts, useDiagBox}, + useOBB, useCutouts, useDiagBox, useFastSweepSkip}, useCache, cache, output); if (!useCache) { diff --git a/src/spatialjoin/Sweeper.cpp b/src/spatialjoin/Sweeper.cpp index a5004e0..921ea12 100644 --- a/src/spatialjoin/Sweeper.cpp +++ b/src/spatialjoin/Sweeper.cpp @@ -24,6 +24,7 @@ using sj::boxids::BoxIdList; using sj::boxids::getBoxId; using sj::boxids::getBoxIds; using sj::boxids::packBoxIds; +using util::writeAll; using util::geo::area; using util::geo::getBoundingBox; using util::geo::I32Line; @@ -117,7 +118,7 @@ void Sweeper::multiAdd(const std::string& gid, int32_t xLeft, int32_t xRight) { } else { size_t id = _multiGidToId[gid]; if (xRight > _multiRightX[id]) _multiRightX[id] = xRight; - if (xLeft > _multiLeftX[id]) _multiLeftX[id] = xLeft; + if (xLeft < _multiLeftX[id]) _multiLeftX[id] = xLeft; } } @@ -131,7 +132,16 @@ void Sweeper::add(const I32Polygon& poly, const std::string& gid, size_t subid) { const auto& box = getBoundingBox(poly); const auto& hull = util::geo::convexHull(poly); - const I32XSortedPolygon spoly(poly); + I32XSortedPolygon spoly(poly); + + if (!_cfg.useFastSweepSkip) { + spoly.setInnerMaxSegLen(std::numeric_limits::max()); + spoly.getOuter().setMaxSegLen(std::numeric_limits::max()); + for (auto& inner : spoly.getInners()) { + inner.setMaxSegLen(std::numeric_limits::max()); + } + } + double areaSize = area(poly); double outerAreaSize = outerArea(poly); BoxIdList boxIds; @@ -213,13 +223,15 @@ void Sweeper::add(const I32Line& line, const std::string& gid, size_t subid) { size_t id = _simpleLineCache.add({line.front(), line.back(), gid}); diskAdd({id, box.getLowerLeft().getY(), box.getUpperRight().getY(), - box.getLowerLeft().getX(), false, SIMPLE_LINE, len, - box45}); + box.getLowerLeft().getX(), false, SIMPLE_LINE, len, box45}); diskAdd({id, box.getLowerLeft().getY(), box.getUpperRight().getY(), - box.getUpperRight().getX(), true, SIMPLE_LINE, len, - box45}); + box.getUpperRight().getX(), true, SIMPLE_LINE, len, box45}); } else { - const I32XSortedLine sline(line); + I32XSortedLine sline(line); + + if (!_cfg.useFastSweepSkip) { + sline.setMaxSegLen(std::numeric_limits::max()); + } size_t id = _lineCache.add({sline, box, gid, subid, len, boxIds, cutouts, obb}); @@ -429,21 +441,20 @@ void Sweeper::multiOut(size_t t, const std::string& gidA) { // _____________________________________________________________________________ void Sweeper::flush() { - LOG(INFO) << _multiIds.size() << " multi geometries"; + LOGTO(INFO, std::cerr) << _multiIds.size() << " multi geometries"; for (size_t i = 0; i < _multiIds.size(); i++) { diskAdd({i, 1, 0, _multiLeftX[i] - 1, false, POINT, 0.0, {}}); } - ssize_t r = write(_file, _outBuffer, _obufpos); - if (r < 0) throw std::runtime_error("Could not write to file."); + writeAll(_file, _outBuffer, _obufpos); _pointCache.flush(); _areaCache.flush(); _lineCache.flush(); _simpleLineCache.flush(); + LOGTO(INFO, std::cerr) << "Sorting events..."; - // now the individial parts are sorted std::string newFName = _cache + "/.sortTmp"; int newFile = open(newFName.c_str(), O_RDWR | O_CREAT, 0666); @@ -457,6 +468,8 @@ void Sweeper::flush() { #endif util::externalSort(_file, newFile, sizeof(BoxVal), _curSweepId, boxCmp); + fsync(newFile); + // remove old file std::remove((_cache + "/events").c_str()); std::rename((_cache + "/.sortTmp").c_str(), (_cache + "/events").c_str()); @@ -476,8 +489,7 @@ void Sweeper::diskAdd(const BoxVal& bv) { _obufpos += sizeof(BoxVal); if (_obufpos >= BUFFER_S) { - ssize_t r = write(_file, _outBuffer, BUFFER_S); - if (r < 0) throw std::runtime_error("Could not write to file."); + writeAll(_file, _outBuffer, BUFFER_S); _obufpos = 0; } _curSweepId++; @@ -530,8 +542,7 @@ void Sweeper::sweep() { _activeMultis.insert(cur->id); } else if (!cur->out) { // IN event - actives.insert({cur->loY, cur->upY}, - {cur->id, cur->type, cur->b45}); + actives.insert({cur->loY, cur->upY}, {cur->id, cur->type, cur->b45}); if (jj % 500000 == 0) { auto lon = webMercToLatLng((1.0 * cur->val) / PREC, 0).getX(); diff --git a/src/spatialjoin/Sweeper.h b/src/spatialjoin/Sweeper.h index 622ce3c..0556723 100644 --- a/src/spatialjoin/Sweeper.h +++ b/src/spatialjoin/Sweeper.h @@ -84,10 +84,12 @@ struct SweeperCfg { bool useOBB; bool useCutouts; bool useDiagBox; + bool useFastSweepSkip; }; -// buffer sizes _must_ be multiples of sizeof(BoxVal) -static const size_t BUFFER_S = sizeof(BoxVal) * 64 * 1024 * 1024; +// buffer size _must_ be multiples of sizeof(BoxVal) +static const ssize_t BUFFER_S = sizeof(BoxVal) * 64 * 1024 * 1024; + static const size_t BUFFER_S_PAIRS = 1024 * 1024 * 10; class Sweeper { @@ -167,7 +169,7 @@ class Sweeper { size_t _curSweepId = 0; int _file; unsigned char* _outBuffer; - size_t _obufpos; + ssize_t _obufpos; OutMode _outMode; diff --git a/src/spatialjoin/tests/TestMain.cpp b/src/spatialjoin/tests/TestMain.cpp index 69dbf84..4f4e981 100644 --- a/src/spatialjoin/tests/TestMain.cpp +++ b/src/spatialjoin/tests/TestMain.cpp @@ -17,7 +17,7 @@ using sj::Sweeper; std::string fullRun(const std::string& file) { Sweeper sweeper({1, "$", " intersects ", " contains ", " covers ", " touches ", " equals ", " overlaps ", " crosses ", "$\n", - true, true, true, true, true}, + true, true, true, true, true, true}, false, ".", ".resTmp"); size_t gid = 0; diff --git a/src/util b/src/util index bbe6425..a814647 160000 --- a/src/util +++ b/src/util @@ -1 +1 @@ -Subproject commit bbe642559cf9b8d4a3625a2411243d80d4b39120 +Subproject commit a8146475fb1cbeb0308fb2fccd2d046f5c6618c7