Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ntuple] Fixes lookup & searching in the descriptor #17004

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
15 changes: 14 additions & 1 deletion tree/ntuple/v7/inc/ROOT/RNTupleDescriptor.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,10 @@ public:
std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange, const Internal::RColumnElementBase &element,
std::size_t pageSize);

/// Has the same length than fPageInfos and stores the sum of the number of elements of all the pages
/// up to and including a given index. Used for binary search in Find().
std::vector<NTupleSize_t> fCumulativeNElements;

public:
/// We do not need to store the element size / uncompressed page size because we know to which column
/// the page belongs
Expand Down Expand Up @@ -319,6 +323,7 @@ public:
RPageRange clone;
clone.fPhysicalColumnId = fPhysicalColumnId;
clone.fPageInfos = fPageInfos;
clone.fCumulativeNElements = fCumulativeNElements;
return clone;
}

Expand Down Expand Up @@ -427,6 +432,7 @@ class RClusterGroupDescriptor {
private:
DescriptorId_t fClusterGroupId = kInvalidDescriptorId;
/// The cluster IDs can be empty if the corresponding page list is not loaded.
/// Otherwise, cluster ids are sorted by first entry number.
std::vector<DescriptorId_t> fClusterIds;
/// The page list that corresponds to the cluster group
RNTupleLocator fPageListLocator;
Expand Down Expand Up @@ -561,12 +567,19 @@ private:
std::unordered_map<DescriptorId_t, RFieldDescriptor> fFieldDescriptors;
std::unordered_map<DescriptorId_t, RColumnDescriptor> fColumnDescriptors;
std::unordered_map<DescriptorId_t, RClusterGroupDescriptor> fClusterGroupDescriptors;
/// References cluster groups sorted by entry range and thus allows for binary search.
/// Note that this list is empty during the descriptor building process and will only be
/// created when the final descriptor is extracted from the builder.
std::vector<DescriptorId_t> fSortedClusterGroupIds;
/// May contain only a subset of all the available clusters, e.g. the clusters of the current file
/// from a chain of files
std::unordered_map<DescriptorId_t, RClusterDescriptor> fClusterDescriptors;
std::vector<RExtraTypeInfoDescriptor> fExtraTypeInfoDescriptors;
std::unique_ptr<RHeaderExtension> fHeaderExtension;

// We don't expose this publicly because when we add sharded clusters, this interface does not make sense anymore
DescriptorId_t FindClusterId(NTupleSize_t entryIdx) const;

public:
static constexpr unsigned int kFeatureFlagTest = 137; // Bit reserved for forward-compatibility testing

Expand Down Expand Up @@ -1302,7 +1315,7 @@ public:
fClusterGroup.fNClusters = nClusters;
return *this;
}
void AddClusters(const std::vector<DescriptorId_t> &clusterIds)
void AddSortedClusters(const std::vector<DescriptorId_t> &clusterIds)
{
if (clusterIds.size() != fClusterGroup.GetNClusters())
throw RException(R__FAIL("mismatch of number of clusters"));
Expand Down
188 changes: 154 additions & 34 deletions tree/ntuple/v7/src/RNTupleDescriptor.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -179,21 +179,32 @@ ROOT::Experimental::RColumnDescriptor ROOT::Experimental::RColumnDescriptor::Clo
ROOT::Experimental::RClusterDescriptor::RPageRange::RPageInfoExtended
ROOT::Experimental::RClusterDescriptor::RPageRange::Find(ClusterSize_t::ValueType idxInCluster) const
{
// TODO(jblomer): binary search
RPageInfo pageInfo;
decltype(idxInCluster) firstInPage = 0;
NTupleSize_t pageNo = 0;
for (const auto &pi : fPageInfos) {
if (firstInPage + pi.fNElements > idxInCluster) {
pageInfo = pi;
break;
const auto N = fCumulativeNElements.size();
R__ASSERT(N > 0);
R__ASSERT(N == fPageInfos.size());

std::size_t left = 0;
std::size_t right = N - 1;
std::size_t midpoint = N;
while (left <= right) {
midpoint = (left + right) / 2;
if (fCumulativeNElements[midpoint] <= idxInCluster) {
left = midpoint + 1;
continue;
}
firstInPage += pi.fNElements;
++pageNo;

if ((midpoint == 0) || (fCumulativeNElements[midpoint - 1] <= idxInCluster))
break;

right = midpoint - 1;
}
R__ASSERT(midpoint < N);

auto pageInfo = fPageInfos[midpoint];
decltype(idxInCluster) firstInPage = (midpoint == 0) ? 0 : fCumulativeNElements[midpoint - 1];
R__ASSERT(firstInPage <= idxInCluster);
R__ASSERT((firstInPage + pageInfo.fNElements) > idxInCluster);
return RPageInfoExtended{pageInfo, firstInPage, pageNo};
return RPageInfoExtended{pageInfo, firstInPage, midpoint};
}

std::size_t
Expand Down Expand Up @@ -372,42 +383,129 @@ ROOT::Experimental::RNTupleDescriptor::FindPhysicalColumnId(DescriptorId_t field
ROOT::Experimental::DescriptorId_t
ROOT::Experimental::RNTupleDescriptor::FindClusterId(DescriptorId_t physicalColumnId, NTupleSize_t index) const
{
// TODO(jblomer): binary search?
for (const auto &cd : fClusterDescriptors) {
if (!cd.second.ContainsColumn(physicalColumnId))
if (GetNClusterGroups() == 0)
return kInvalidDescriptorId;

// Binary search in the cluster group list, followed by a binary search in the clusters of that cluster group

std::size_t cgLeft = 0;
std::size_t cgRight = GetNClusterGroups() - 1;
while (cgLeft <= cgRight) {
const std::size_t cgMidpoint = (cgLeft + cgRight) / 2;
const auto &clusterIds = GetClusterGroupDescriptor(fSortedClusterGroupIds[cgMidpoint]).GetClusterIds();
R__ASSERT(!clusterIds.empty());
Comment on lines +395 to +396
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unrelated (for now): this requires deserializing all page lists to populate all cluster group descriptors. In the future, we may first want to search loaded cluster groups under the assumption that by loading the (global) entry first, we already have the necessary information...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed. When we implement partial loading of page lists, we need to modify this, e.g. to first look into the available page lists and then load the remaining ones or so.


const auto firstElementInGroup =
GetClusterDescriptor(clusterIds.front()).GetColumnRange(physicalColumnId).fFirstElementIndex;
if (firstElementInGroup > index) {
// Look into the lower half of cluster groups
R__ASSERT(cgMidpoint > 0);
cgRight = cgMidpoint - 1;
continue;
auto columnRange = cd.second.GetColumnRange(physicalColumnId);
if (columnRange.Contains(index))
return cd.second.GetId();
}

const auto &lastColumnRange = GetClusterDescriptor(clusterIds.back()).GetColumnRange(physicalColumnId);
if ((lastColumnRange.fFirstElementIndex + lastColumnRange.fNElements) <= index) {
// Look into the upper half of cluster groups
cgLeft = cgMidpoint + 1;
continue;
}

// Binary search in the current cluster group; since we already checked the element range boundaries,
// the element must be in that cluster group.
std::size_t clusterLeft = 0;
std::size_t clusterRight = clusterIds.size() - 1;
while (clusterLeft <= clusterRight) {
const std::size_t clusterMidpoint = (clusterLeft + clusterRight) / 2;
const auto clusterId = clusterIds[clusterMidpoint];
const auto &columnRange = GetClusterDescriptor(clusterId).GetColumnRange(physicalColumnId);

if (columnRange.Contains(index))
return clusterId;

if (columnRange.fFirstElementIndex > index) {
R__ASSERT(clusterMidpoint > 0);
clusterRight = clusterMidpoint - 1;
continue;
}

if (columnRange.fFirstElementIndex + columnRange.fNElements <= index) {
clusterLeft = clusterMidpoint + 1;
continue;
}
}
R__ASSERT(false);
}
return kInvalidDescriptorId;
}

ROOT::Experimental::DescriptorId_t ROOT::Experimental::RNTupleDescriptor::FindClusterId(NTupleSize_t entryIdx) const
{
if (GetNClusterGroups() == 0)
return kInvalidDescriptorId;

// Binary search in the cluster group list, followed by a binary search in the clusters of that cluster group

std::size_t cgLeft = 0;
std::size_t cgRight = GetNClusterGroups() - 1;
while (cgLeft <= cgRight) {
const std::size_t cgMidpoint = (cgLeft + cgRight) / 2;
const auto &cgDesc = GetClusterGroupDescriptor(fSortedClusterGroupIds[cgMidpoint]);

if (cgDesc.GetMinEntry() > entryIdx) {
R__ASSERT(cgMidpoint > 0);
cgRight = cgMidpoint - 1;
continue;
}

if (cgDesc.GetMinEntry() + cgDesc.GetEntrySpan() <= entryIdx) {
cgLeft = cgMidpoint + 1;
continue;
}

// Binary search in the current cluster group; since we already checked the element range boundaries,
// the element must be in that cluster group.
const auto &clusterIds = cgDesc.GetClusterIds();
R__ASSERT(!clusterIds.empty());
std::size_t clusterLeft = 0;
std::size_t clusterRight = clusterIds.size() - 1;
while (clusterLeft <= clusterRight) {
const std::size_t clusterMidpoint = (clusterLeft + clusterRight) / 2;
const auto &clusterDesc = GetClusterDescriptor(clusterIds[clusterMidpoint]);

if (clusterDesc.GetFirstEntryIndex() > entryIdx) {
R__ASSERT(clusterMidpoint > 0);
clusterRight = clusterMidpoint - 1;
continue;
}

if (clusterDesc.GetFirstEntryIndex() + clusterDesc.GetNEntries() <= entryIdx) {
clusterLeft = clusterMidpoint + 1;
continue;
}

return clusterIds[clusterMidpoint];
}
R__ASSERT(false);
}
return kInvalidDescriptorId;
}

// TODO(jblomer): fix for cases of sharded clasters
ROOT::Experimental::DescriptorId_t
ROOT::Experimental::RNTupleDescriptor::FindNextClusterId(DescriptorId_t clusterId) const
{
const auto &clusterDesc = GetClusterDescriptor(clusterId);
auto firstEntryInNextCluster = clusterDesc.GetFirstEntryIndex() + clusterDesc.GetNEntries();
// TODO(jblomer): binary search?
for (const auto &cd : fClusterDescriptors) {
if (cd.second.GetFirstEntryIndex() == firstEntryInNextCluster)
return cd.second.GetId();
}
return kInvalidDescriptorId;
const auto firstEntryInNextCluster = clusterDesc.GetFirstEntryIndex() + clusterDesc.GetNEntries();
return FindClusterId(firstEntryInNextCluster);
Comment on lines +498 to +499
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it worth shortcutting the common case here and check if clusterId + 1 contains firstEntryInNextCluster?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good question. The "problem" is that I think currently that shortcut will always trigger. However, I also don't want to rely on descriptor ID ordering... I need to think about it.

}

// TODO(jblomer): fix for cases of sharded clasters
ROOT::Experimental::DescriptorId_t
ROOT::Experimental::RNTupleDescriptor::FindPrevClusterId(DescriptorId_t clusterId) const
{
const auto &clusterDesc = GetClusterDescriptor(clusterId);
// TODO(jblomer): binary search?
for (const auto &cd : fClusterDescriptors) {
if (cd.second.GetFirstEntryIndex() + cd.second.GetNEntries() == clusterDesc.GetFirstEntryIndex())
return cd.second.GetId();
}
return kInvalidDescriptorId;
if (clusterDesc.GetFirstEntryIndex() == 0)
return kInvalidDescriptorId;
return FindClusterId(clusterDesc.GetFirstEntryIndex() - 1);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same question here

}

std::vector<ROOT::Experimental::DescriptorId_t>
Expand Down Expand Up @@ -489,8 +587,13 @@ ROOT::Experimental::RNTupleDescriptor::AddClusterGroupDetails(DescriptorId_t clu
return R__FAIL("invalid attempt to re-populate existing cluster");
}
}
std::sort(clusterIds.begin(), clusterIds.end(),
[this](DescriptorId_t a, DescriptorId_t b)
{
return fClusterDescriptors[a].GetFirstEntryIndex() < fClusterDescriptors[b].GetFirstEntryIndex();
});
auto cgBuilder = Internal::RClusterGroupDescriptorBuilder::FromSummary(iter->second);
cgBuilder.AddClusters(clusterIds);
cgBuilder.AddSortedClusters(clusterIds);
iter->second = cgBuilder.MoveDescriptor().Unwrap();
return RResult<void>::Success();
}
Expand Down Expand Up @@ -554,6 +657,7 @@ std::unique_ptr<ROOT::Experimental::RNTupleDescriptor> ROOT::Experimental::RNTup
clone->fColumnDescriptors.emplace(d.first, d.second.Clone());
for (const auto &d : fClusterGroupDescriptors)
clone->fClusterGroupDescriptors.emplace(d.first, d.second.Clone());
clone->fSortedClusterGroupIds = fSortedClusterGroupIds;
for (const auto &d : fClusterDescriptors)
clone->fClusterDescriptors.emplace(d.first, d.second.Clone());
for (const auto &d : fExtraTypeInfoDescriptors)
Expand Down Expand Up @@ -731,10 +835,17 @@ ROOT::Experimental::Internal::RClusterDescriptorBuilder::MoveDescriptor()
return R__FAIL("unset cluster ID");
if (fCluster.fNEntries == 0)
return R__FAIL("empty cluster");
for (const auto &pr : fCluster.fPageRanges) {
for (auto &pr : fCluster.fPageRanges) {
if (fCluster.fColumnRanges.count(pr.first) == 0) {
return R__FAIL("missing column range");
}
pr.second.fCumulativeNElements.clear();
pr.second.fCumulativeNElements.reserve(pr.second.fPageInfos.size());
NTupleSize_t sum = 0;
for (const auto &pi : pr.second.fPageInfos) {
sum += pi.fNElements;
pr.second.fCumulativeNElements.emplace_back(sum);
}
}
RClusterDescriptor result;
std::swap(result, fCluster);
Expand Down Expand Up @@ -827,6 +938,15 @@ ROOT::Experimental::RResult<void> ROOT::Experimental::Internal::RNTupleDescripto
ROOT::Experimental::RNTupleDescriptor ROOT::Experimental::Internal::RNTupleDescriptorBuilder::MoveDescriptor()
{
EnsureValidDescriptor().ThrowOnError();
fDescriptor.fSortedClusterGroupIds.reserve(fDescriptor.fClusterGroupDescriptors.size());
for (const auto &[id, _] : fDescriptor.fClusterGroupDescriptors)
fDescriptor.fSortedClusterGroupIds.emplace_back(id);
std::sort(fDescriptor.fSortedClusterGroupIds.begin(), fDescriptor.fSortedClusterGroupIds.end(),
[this](DescriptorId_t a, DescriptorId_t b)
{
return fDescriptor.fClusterGroupDescriptors[a].GetMinEntry() <
fDescriptor.fClusterGroupDescriptors[b].GetMinEntry();
});
RNTupleDescriptor result;
std::swap(result, fDescriptor);
return result;
Expand Down
2 changes: 1 addition & 1 deletion tree/ntuple/v7/src/RPageStorage.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -1160,7 +1160,7 @@ void ROOT::Experimental::Internal::RPagePersistentSink::CommitClusterGroup()
for (auto i = fNextClusterInGroup; i < nClusters; ++i) {
clusterIds.emplace_back(i);
}
cgBuilder.AddClusters(clusterIds);
cgBuilder.AddSortedClusters(clusterIds);
fDescriptorBuilder.AddClusterGroup(cgBuilder.MoveDescriptor().Unwrap());
fSerializationContext.MapClusterGroupId(clusterGroupId);

Expand Down
10 changes: 4 additions & 6 deletions tree/ntuple/v7/test/ntuple_cluster.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,10 @@ class RPageSourceMock : public RPageSource {
.MoveDescriptor()
.Unwrap());
}
descBuilder.AddClusterGroup(ROOT::Experimental::Internal::RClusterGroupDescriptorBuilder()
.ClusterGroupId(0)
.MinEntry(0)
.EntrySpan(6)
.MoveDescriptor()
.Unwrap());
ROOT::Experimental::Internal::RClusterGroupDescriptorBuilder cgBuilder;
cgBuilder.ClusterGroupId(0).MinEntry(0).EntrySpan(6).NClusters(6);
cgBuilder.AddSortedClusters({0, 1, 2, 3, 4, 5});
descBuilder.AddClusterGroup(cgBuilder.MoveDescriptor().Unwrap());
auto descriptorGuard = GetExclDescriptorGuard();
descriptorGuard.MoveIn(descBuilder.MoveDescriptor());
}
Expand Down
Loading
Loading