From fcb2132e73d7f4c30656fb181e5819c1bd5199d0 Mon Sep 17 00:00:00 2001 From: Florine de Geus Date: Fri, 4 Aug 2023 13:30:49 +0200 Subject: [PATCH] [ntuple] Throw when inconsistent compression is encountered --- tree/ntupleutil/v7/inc/ROOT/RNTupleInspector.hxx | 12 +++++++++--- tree/ntupleutil/v7/src/RNTupleInspector.cxx | 9 +++++++-- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/tree/ntupleutil/v7/inc/ROOT/RNTupleInspector.hxx b/tree/ntupleutil/v7/inc/ROOT/RNTupleInspector.hxx index dde0304cff673..5354bd4173580 100644 --- a/tree/ntupleutil/v7/inc/ROOT/RNTupleInspector.hxx +++ b/tree/ntupleutil/v7/inc/ROOT/RNTupleInspector.hxx @@ -115,7 +115,9 @@ private: /// information will be stored in `fColumnInfo`, and the RNTuple-level information /// in `fCompressionSettings`, `fOnDiskSize` and `fInMemorySize`. /// - /// This method is called when the `RNTupleInspector` is initially created. + /// This method is called when the `RNTupleInspector` is initially created. This means that anything unexpected about + /// the RNTuple itself (e.g. inconsistent compression settings across clusters) will be detected here. Therefore, any + /// related exceptions will be thrown on creation of the inspector. void CollectColumnInfo(); /// Recursively gather field-level information and store it in `fFieldTreeInfo`. @@ -133,7 +135,9 @@ public: RNTupleInspector &operator=(RNTupleInspector &&other) = delete; ~RNTupleInspector() = default; - /// Create a new inspector for a given RNTuple. + /// Create a new inspector for a given RNTuple. When this factory method is called, all required static information + /// is collected from the RNTuple's fields and underlying columns are collected at ones. This means that when any + /// inconsistencies are encountered (e.g. inconsistent compression across clusters), it will throw an error here. static std::unique_ptr Create(std::unique_ptr pageSource); static std::unique_ptr Create(RNTuple *sourceNTuple); static std::unique_ptr Create(std::string_view ntupleName, std::string_view storage); @@ -141,7 +145,9 @@ public: /// Get the descriptor for the RNTuple being inspected. RNTupleDescriptor *GetDescriptor() const { return fDescriptor.get(); } - /// Get the compression settings of the RNTuple being inspected. + /// Get the compression settings of the RNTuple being inspected. Here, we assume that the compression settings are + /// consistent across all clusters and columns. If this is not the case, an exception will be thrown upon + /// `RNTupleInspector::Create`. int GetCompressionSettings() const { return fCompressionSettings; } /// Get the on-disk, compressed size of the RNTuple being inspected, in bytes. diff --git a/tree/ntupleutil/v7/src/RNTupleInspector.cxx b/tree/ntupleutil/v7/src/RNTupleInspector.cxx index 39d78e6f70391..121d316ffa9a7 100644 --- a/tree/ntupleutil/v7/src/RNTupleInspector.cxx +++ b/tree/ntupleutil/v7/src/RNTupleInspector.cxx @@ -61,8 +61,13 @@ void ROOT::Experimental::RNTupleInspector::CollectColumnInfo() if (fCompressionSettings == -1) { fCompressionSettings = columnRange.fCompressionSettings; - } else { - R__ASSERT(columnRange.fCompressionSettings == fCompressionSettings); + } else if (fCompressionSettings != columnRange.fCompressionSettings) { + // Note that currently all clusters and columns are compressed with the same settings and it is not yet + // possible to do otherwise. This measn that currently, this exception should never be thrown, but this + // could change in the future. + throw RException(R__FAIL("compression setting mismatch between column ranges (" + + std::to_string(fCompressionSettings) + " vs " + + std::to_string(columnRange.fCompressionSettings) + ")")); } const auto &pageRange = clusterDescriptor.GetPageRange(colId);