root-project · jblomer · Nov 20, 2024 · Nov 20, 2024 · Nov 20, 2024 · Nov 20, 2024
@@ -279,6 +279,10 @@ public:
       std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange, const Internal::RColumnElementBase &element,
                                          std::size_t pageSize);
 
+      /// Has the same length than fPageInfos and stores the sum of the number of elements of all the pages
+      /// up to and including a given index. Used for binary search in Find().
+      std::vector<NTupleSize_t> fCumulativeNElements;
+
    public:
       /// We do not need to store the element size / uncompressed page size because we know to which column
       /// the page belongs
@@ -319,6 +323,7 @@ public:
          RPageRange clone;
          clone.fPhysicalColumnId = fPhysicalColumnId;
          clone.fPageInfos = fPageInfos;
+         clone.fCumulativeNElements = fCumulativeNElements;
          return clone;
       }
 
@@ -427,6 +432,7 @@ class RClusterGroupDescriptor {
 private:
    DescriptorId_t fClusterGroupId = kInvalidDescriptorId;
    /// The cluster IDs can be empty if the corresponding page list is not loaded.
+   /// Otherwise, cluster ids are sorted by first entry number.
    std::vector<DescriptorId_t> fClusterIds;
    /// The page list that corresponds to the cluster group
    RNTupleLocator fPageListLocator;
@@ -561,12 +567,19 @@ private:
    std::unordered_map<DescriptorId_t, RFieldDescriptor> fFieldDescriptors;
    std::unordered_map<DescriptorId_t, RColumnDescriptor> fColumnDescriptors;
    std::unordered_map<DescriptorId_t, RClusterGroupDescriptor> fClusterGroupDescriptors;
+   /// References cluster groups sorted by entry range and thus allows for binary search.
+   /// Note that this list is empty during the descriptor building process and will only be
+   /// created when the final descriptor is extracted from the builder.
+   std::vector<DescriptorId_t> fSortedClusterGroupIds;
    /// May contain only a subset of all the available clusters, e.g. the clusters of the current file
    /// from a chain of files
    std::unordered_map<DescriptorId_t, RClusterDescriptor> fClusterDescriptors;
    std::vector<RExtraTypeInfoDescriptor> fExtraTypeInfoDescriptors;
    std::unique_ptr<RHeaderExtension> fHeaderExtension;
 
+   // We don't expose this publicly because when we add sharded clusters, this interface does not make sense anymore
+   DescriptorId_t FindClusterId(NTupleSize_t entryIdx) const;
+
 public:
    static constexpr unsigned int kFeatureFlagTest = 137; // Bit reserved for forward-compatibility testing
 
@@ -1302,7 +1315,7 @@ public:
       fClusterGroup.fNClusters = nClusters;
       return *this;
    }
-   void AddClusters(const std::vector<DescriptorId_t> &clusterIds)
+   void AddSortedClusters(const std::vector<DescriptorId_t> &clusterIds)
    {
       if (clusterIds.size() != fClusterGroup.GetNClusters())
          throw RException(R__FAIL("mismatch of number of clusters"));

@@ -179,21 +179,32 @@ ROOT::Experimental::RColumnDescriptor ROOT::Experimental::RColumnDescriptor::Clo
 ROOT::Experimental::RClusterDescriptor::RPageRange::RPageInfoExtended
 ROOT::Experimental::RClusterDescriptor::RPageRange::Find(ClusterSize_t::ValueType idxInCluster) const
 {
-   // TODO(jblomer): binary search
-   RPageInfo pageInfo;
-   decltype(idxInCluster) firstInPage = 0;
-   NTupleSize_t pageNo = 0;
-   for (const auto &pi : fPageInfos) {
-      if (firstInPage + pi.fNElements > idxInCluster) {
-         pageInfo = pi;
-         break;
+   const auto N = fCumulativeNElements.size();
+   R__ASSERT(N > 0);
+   R__ASSERT(N == fPageInfos.size());
+
+   std::size_t left = 0;
+   std::size_t right = N - 1;
+   std::size_t midpoint = N;
+   while (left <= right) {
+      midpoint = (left + right) / 2;
+      if (fCumulativeNElements[midpoint] <= idxInCluster) {
+         left = midpoint + 1;
+         continue;
       }
-      firstInPage += pi.fNElements;
-      ++pageNo;
+
+      if ((midpoint == 0) || (fCumulativeNElements[midpoint - 1] <= idxInCluster))
+         break;
+
+      right = midpoint - 1;
    }
+   R__ASSERT(midpoint < N);
+
+   auto pageInfo = fPageInfos[midpoint];
+   decltype(idxInCluster) firstInPage = (midpoint == 0) ? 0 : fCumulativeNElements[midpoint - 1];
    R__ASSERT(firstInPage <= idxInCluster);
    R__ASSERT((firstInPage + pageInfo.fNElements) > idxInCluster);
-   return RPageInfoExtended{pageInfo, firstInPage, pageNo};
+   return RPageInfoExtended{pageInfo, firstInPage, midpoint};
 }
 
 std::size_t
@@ -372,42 +383,129 @@ ROOT::Experimental::RNTupleDescriptor::FindPhysicalColumnId(DescriptorId_t field
 ROOT::Experimental::DescriptorId_t
 ROOT::Experimental::RNTupleDescriptor::FindClusterId(DescriptorId_t physicalColumnId, NTupleSize_t index) const
 {
-   // TODO(jblomer): binary search?
-   for (const auto &cd : fClusterDescriptors) {
-      if (!cd.second.ContainsColumn(physicalColumnId))
+   if (GetNClusterGroups() == 0)
+      return kInvalidDescriptorId;
+
+   // Binary search in the cluster group list, followed by a binary search in the clusters of that cluster group
+
+   std::size_t cgLeft = 0;
+   std::size_t cgRight = GetNClusterGroups() - 1;
+   while (cgLeft <= cgRight) {
+      const std::size_t cgMidpoint = (cgLeft + cgRight) / 2;
+      const auto &clusterIds = GetClusterGroupDescriptor(fSortedClusterGroupIds[cgMidpoint]).GetClusterIds();
+      R__ASSERT(!clusterIds.empty());
+
+      const auto firstElementInGroup =
+         GetClusterDescriptor(clusterIds.front()).GetColumnRange(physicalColumnId).fFirstElementIndex;
+      if (firstElementInGroup > index) {
+         // Look into the lower half of cluster groups
+         R__ASSERT(cgMidpoint > 0);
+         cgRight = cgMidpoint - 1;
          continue;
-      auto columnRange = cd.second.GetColumnRange(physicalColumnId);
-      if (columnRange.Contains(index))
-         return cd.second.GetId();
+      }
+
+      const auto &lastColumnRange = GetClusterDescriptor(clusterIds.back()).GetColumnRange(physicalColumnId);
+      if ((lastColumnRange.fFirstElementIndex + lastColumnRange.fNElements) <= index) {
+         // Look into the upper half of cluster groups
+         cgLeft = cgMidpoint + 1;
+         continue;
+      }
+
+      // Binary search in the current cluster group; since we already checked the element range boundaries,
+      // the element must be in that cluster group.
+      std::size_t clusterLeft = 0;
+      std::size_t clusterRight = clusterIds.size() - 1;
+      while (clusterLeft <= clusterRight) {
+         const std::size_t clusterMidpoint = (clusterLeft + clusterRight) / 2;
+         const auto clusterId = clusterIds[clusterMidpoint];
+         const auto &columnRange = GetClusterDescriptor(clusterId).GetColumnRange(physicalColumnId);
+
+         if (columnRange.Contains(index))
+            return clusterId;
+
+         if (columnRange.fFirstElementIndex > index) {
+            R__ASSERT(clusterMidpoint > 0);
+            clusterRight = clusterMidpoint - 1;
+            continue;
+         }
+
+         if (columnRange.fFirstElementIndex + columnRange.fNElements <= index) {
+            clusterLeft = clusterMidpoint + 1;
+            continue;
+         }
+      }
+      R__ASSERT(false);
+   }
+   return kInvalidDescriptorId;
+}
+
+ROOT::Experimental::DescriptorId_t ROOT::Experimental::RNTupleDescriptor::FindClusterId(NTupleSize_t entryIdx) const
+{
+   if (GetNClusterGroups() == 0)
+      return kInvalidDescriptorId;
+
+   // Binary search in the cluster group list, followed by a binary search in the clusters of that cluster group
+
+   std::size_t cgLeft = 0;
+   std::size_t cgRight = GetNClusterGroups() - 1;
+   while (cgLeft <= cgRight) {
+      const std::size_t cgMidpoint = (cgLeft + cgRight) / 2;
+      const auto &cgDesc = GetClusterGroupDescriptor(fSortedClusterGroupIds[cgMidpoint]);
+
+      if (cgDesc.GetMinEntry() > entryIdx) {
+         R__ASSERT(cgMidpoint > 0);
+         cgRight = cgMidpoint - 1;
+         continue;
+      }
+
+      if (cgDesc.GetMinEntry() + cgDesc.GetEntrySpan() <= entryIdx) {
+         cgLeft = cgMidpoint + 1;
+         continue;
+      }
+
+      // Binary search in the current cluster group; since we already checked the element range boundaries,
+      // the element must be in that cluster group.
+      const auto &clusterIds = cgDesc.GetClusterIds();
+      R__ASSERT(!clusterIds.empty());
+      std::size_t clusterLeft = 0;
+      std::size_t clusterRight = clusterIds.size() - 1;
+      while (clusterLeft <= clusterRight) {
+         const std::size_t clusterMidpoint = (clusterLeft + clusterRight) / 2;
+         const auto &clusterDesc = GetClusterDescriptor(clusterIds[clusterMidpoint]);
+
+         if (clusterDesc.GetFirstEntryIndex() > entryIdx) {
+            R__ASSERT(clusterMidpoint > 0);
+            clusterRight = clusterMidpoint - 1;
+            continue;
+         }
+
+         if (clusterDesc.GetFirstEntryIndex() + clusterDesc.GetNEntries() <= entryIdx) {
+            clusterLeft = clusterMidpoint + 1;
+            continue;
+         }
+
+         return clusterIds[clusterMidpoint];
+      }
+      R__ASSERT(false);
    }
    return kInvalidDescriptorId;
 }
 
-// TODO(jblomer): fix for cases of sharded clasters
 ROOT::Experimental::DescriptorId_t
 ROOT::Experimental::RNTupleDescriptor::FindNextClusterId(DescriptorId_t clusterId) const
 {
    const auto &clusterDesc = GetClusterDescriptor(clusterId);
-   auto firstEntryInNextCluster = clusterDesc.GetFirstEntryIndex() + clusterDesc.GetNEntries();
-   // TODO(jblomer): binary search?
-   for (const auto &cd : fClusterDescriptors) {
-      if (cd.second.GetFirstEntryIndex() == firstEntryInNextCluster)
-         return cd.second.GetId();
-   }
-   return kInvalidDescriptorId;
+   const auto firstEntryInNextCluster = clusterDesc.GetFirstEntryIndex() + clusterDesc.GetNEntries();
+   return FindClusterId(firstEntryInNextCluster);
 }
 
-// TODO(jblomer): fix for cases of sharded clasters
 ROOT::Experimental::DescriptorId_t
 ROOT::Experimental::RNTupleDescriptor::FindPrevClusterId(DescriptorId_t clusterId) const
 {
    const auto &clusterDesc = GetClusterDescriptor(clusterId);
-   // TODO(jblomer): binary search?
-   for (const auto &cd : fClusterDescriptors) {
-      if (cd.second.GetFirstEntryIndex() + cd.second.GetNEntries() == clusterDesc.GetFirstEntryIndex())
-         return cd.second.GetId();
-   }
-   return kInvalidDescriptorId;
+   if (clusterDesc.GetFirstEntryIndex() == 0)
+      return kInvalidDescriptorId;
+   return FindClusterId(clusterDesc.GetFirstEntryIndex() - 1);
 }
 
 std::vector<ROOT::Experimental::DescriptorId_t>
@@ -489,8 +587,13 @@ ROOT::Experimental::RNTupleDescriptor::AddClusterGroupDetails(DescriptorId_t clu
          return R__FAIL("invalid attempt to re-populate existing cluster");
       }
    }
+   std::sort(clusterIds.begin(), clusterIds.end(),
+             [this](DescriptorId_t a, DescriptorId_t b)
+             {
+                return fClusterDescriptors[a].GetFirstEntryIndex() < fClusterDescriptors[b].GetFirstEntryIndex();
+             });
    auto cgBuilder = Internal::RClusterGroupDescriptorBuilder::FromSummary(iter->second);
-   cgBuilder.AddClusters(clusterIds);
+   cgBuilder.AddSortedClusters(clusterIds);
    iter->second = cgBuilder.MoveDescriptor().Unwrap();
    return RResult<void>::Success();
 }
@@ -554,6 +657,7 @@ std::unique_ptr<ROOT::Experimental::RNTupleDescriptor> ROOT::Experimental::RNTup
       clone->fColumnDescriptors.emplace(d.first, d.second.Clone());
    for (const auto &d : fClusterGroupDescriptors)
       clone->fClusterGroupDescriptors.emplace(d.first, d.second.Clone());
+   clone->fSortedClusterGroupIds = fSortedClusterGroupIds;
    for (const auto &d : fClusterDescriptors)
       clone->fClusterDescriptors.emplace(d.first, d.second.Clone());
    for (const auto &d : fExtraTypeInfoDescriptors)
@@ -731,10 +835,17 @@ ROOT::Experimental::Internal::RClusterDescriptorBuilder::MoveDescriptor()
       return R__FAIL("unset cluster ID");
    if (fCluster.fNEntries == 0)
       return R__FAIL("empty cluster");
-   for (const auto &pr : fCluster.fPageRanges) {
+   for (auto &pr : fCluster.fPageRanges) {
       if (fCluster.fColumnRanges.count(pr.first) == 0) {
          return R__FAIL("missing column range");
       }
+      pr.second.fCumulativeNElements.clear();
+      pr.second.fCumulativeNElements.reserve(pr.second.fPageInfos.size());
+      NTupleSize_t sum = 0;
+      for (const auto &pi : pr.second.fPageInfos) {
+         sum += pi.fNElements;
+         pr.second.fCumulativeNElements.emplace_back(sum);
+      }
    }
    RClusterDescriptor result;
    std::swap(result, fCluster);
@@ -827,6 +938,15 @@ ROOT::Experimental::RResult<void> ROOT::Experimental::Internal::RNTupleDescripto
 ROOT::Experimental::RNTupleDescriptor ROOT::Experimental::Internal::RNTupleDescriptorBuilder::MoveDescriptor()
 {
    EnsureValidDescriptor().ThrowOnError();
+   fDescriptor.fSortedClusterGroupIds.reserve(fDescriptor.fClusterGroupDescriptors.size());
+   for (const auto &[id, _] : fDescriptor.fClusterGroupDescriptors)
+      fDescriptor.fSortedClusterGroupIds.emplace_back(id);
+   std::sort(fDescriptor.fSortedClusterGroupIds.begin(), fDescriptor.fSortedClusterGroupIds.end(),
+             [this](DescriptorId_t a, DescriptorId_t b)
+             {
+               return fDescriptor.fClusterGroupDescriptors[a].GetMinEntry() <
+                      fDescriptor.fClusterGroupDescriptors[b].GetMinEntry();
+             });
    RNTupleDescriptor result;
    std::swap(result, fDescriptor);
    return result;

@@ -1160,7 +1160,7 @@ void ROOT::Experimental::Internal::RPagePersistentSink::CommitClusterGroup()
    for (auto i = fNextClusterInGroup; i < nClusters; ++i) {
       clusterIds.emplace_back(i);
    }
-   cgBuilder.AddClusters(clusterIds);
+   cgBuilder.AddSortedClusters(clusterIds);
    fDescriptorBuilder.AddClusterGroup(cgBuilder.MoveDescriptor().Unwrap());
    fSerializationContext.MapClusterGroupId(clusterGroupId);
 

@@ -62,12 +62,10 @@ class RPageSourceMock : public RPageSource {
                                    .MoveDescriptor()
                                    .Unwrap());
       }
-      descBuilder.AddClusterGroup(ROOT::Experimental::Internal::RClusterGroupDescriptorBuilder()
-                                     .ClusterGroupId(0)
-                                     .MinEntry(0)
-                                     .EntrySpan(6)
-                                     .MoveDescriptor()
-                                     .Unwrap());
+      ROOT::Experimental::Internal::RClusterGroupDescriptorBuilder cgBuilder;
+      cgBuilder.ClusterGroupId(0).MinEntry(0).EntrySpan(6).NClusters(6);
+      cgBuilder.AddSortedClusters({0, 1, 2, 3, 4, 5});
+      descBuilder.AddClusterGroup(cgBuilder.MoveDescriptor().Unwrap());
       auto descriptorGuard = GetExclDescriptorGuard();
       descriptorGuard.MoveIn(descBuilder.MoveDescriptor());
    }