diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index 28fb748a365453..99a5dfc8c242a4 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -303,8 +303,12 @@ Status VariantColumnReader::init(const ColumnReaderOptions& opts, const SegmentF } auto relative_path = path.copy_pop_front(); auto get_data_type_fn = [&]() { + // root subcolumn is ColumnObject::MostCommonType which is jsonb if (relative_path.empty()) { - return make_nullable(std::make_unique()); + return self_column_pb.is_nullable() + ? make_nullable(std::make_unique< + vectorized::ColumnObject::MostCommonType>()) + : std::make_unique(); } return vectorized::DataTypeFactory::instance().create_data_type(column_pb); }; diff --git a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp index de0123a330a904..38ac20807da6ae 100644 --- a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp +++ b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp @@ -257,8 +257,7 @@ Status HierarchicalDataReader::_init_container(vectorized::MutableColumnPtr& con // auto column = root_var.get_root(); // auto type = root_var.get_root_type(); MutableColumnPtr column = _root_reader->column->get_ptr(); - container_variant.add_sub_column({}, std::move(column), - ColumnObject::get_most_common_type()); + container_variant.add_sub_column({}, std::move(column), _root_reader->type); } // parent path -> subcolumns std::map nested_subcolumns; diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index 9b505e4a4a5d61..1e6508de0e21b7 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -233,8 +233,11 @@ Status Segment::new_iterator(SchemaSPtr schema, const StorageReadOptions& read_o if (col.is_extracted_column()) { auto relative_path = col.path_info_ptr()->copy_pop_front(); int32_t unique_id = col.unique_id() > 0 ? col.unique_id() : col.parent_unique_id(); - const auto* node = ((VariantColumnReader*)(_column_readers.at(unique_id).get())) - ->get_reader_by_path(relative_path); + const auto* node = + _column_readers.contains(unique_id) + ? ((VariantColumnReader*)(_column_readers.at(unique_id).get())) + ->get_reader_by_path(relative_path) + : nullptr; reader = node != nullptr ? node->data.reader.get() : nullptr; } else { reader = _column_readers.contains(col.unique_id()) @@ -828,7 +831,7 @@ ColumnReader* Segment::_get_column_reader(const TabletColumn& col) { if (col.has_path_info() || col.is_variant_type()) { auto relative_path = col.path_info_ptr()->copy_pop_front(); int32_t unique_id = col.unique_id() > 0 ? col.unique_id() : col.parent_unique_id(); - const auto* node = col.has_path_info() + const auto* node = col.has_path_info() && _column_readers.contains(unique_id) ? ((VariantColumnReader*)(_column_readers.at(unique_id).get())) ->get_reader_by_path(relative_path) : nullptr; diff --git a/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp b/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp index 33499a8e7e2acc..2fe9f642100fff 100644 --- a/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp +++ b/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp @@ -151,9 +151,11 @@ Status VariantColumnWriterImpl::_process_root_column(vectorized::ColumnObject* p return status; } const uint8_t* nullmap = - vectorized::check_and_get_column(_null_column.get()) - ->get_data() - .data(); + _null_column + ? vectorized::check_and_get_column(_null_column.get()) + ->get_data() + .data() + : nullptr; RETURN_IF_ERROR(_root_writer->append(nullmap, column->get_data(), num_rows)); ++column_id; converter->clear_source_content(); diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index 595326839c51a6..21f681f17a5c40 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -641,8 +641,8 @@ MutableColumnPtr ColumnObject::apply_for_columns(Func&& func) const { } auto sparse_column = func(serialized_sparse_column); res->serialized_sparse_column = sparse_column->assume_mutable(); - res->set_num_rows(serialized_sparse_column->size()); - check_consistency(); + res->num_rows = res->serialized_sparse_column->size(); + res->check_consistency(); return res; } @@ -815,11 +815,6 @@ ColumnObject::ColumnObject(bool is_nullable_, bool create_root_) ENABLE_CHECK_CONSISTENCY(this); } -ColumnObject::ColumnObject(MutableColumnPtr&& sparse_column) - : is_nullable(true), - num_rows(sparse_column->size()), - serialized_sparse_column(std::move(sparse_column)) {} - ColumnObject::ColumnObject(bool is_nullable_, DataTypePtr type, MutableColumnPtr&& column) : is_nullable(is_nullable_), num_rows(0) { add_sub_column({}, std::move(column), type); @@ -994,7 +989,8 @@ bool ColumnObject::Subcolumn::is_null_at(size_t n) const { ind -= num_of_defaults_in_prefix; for (const auto& part : data) { if (ind < part->size()) { - return assert_cast(*part).is_null_at(ind); + const auto* nullable = check_and_get_column(part.get()); + return nullable ? nullable->is_null_at(ind) : false; } ind -= part->size(); } @@ -1061,14 +1057,16 @@ void ColumnObject::Subcolumn::serialize_to_sparse_column(ColumnString* key, std: const auto& part = data[i]; if (row < part->size()) { // no need null in sparse column - if (!assert_cast(*part).is_null_at(row)) { + if (!assert_cast(*part).is_null_at( + row)) { // insert key key->insert_data(path.data(), path.size()); // every subcolumn is always Nullable auto nullable_serde = std::static_pointer_cast( data_types[i]->get_serde(CURRENT_SERIALIZE_NESTING_LEVEL)); - auto& nullable_col = assert_cast(*part); + auto& nullable_col = + assert_cast(*part); // insert value ColumnString::Chars& chars = value->get_chars(); @@ -1707,9 +1705,16 @@ bool ColumnObject::is_visible_root_value(size_t nrow) const { if (subcolumns.get_root()->data.is_null_at(nrow)) { return false; } - nrow = nrow - subcolumns.get_root()->data.num_of_defaults_in_prefix; - const auto& nullable = assert_cast(*subcolumns.get_root()->data.data[0]); - return !nullable.get_data_at(nrow).empty(); + int ind = nrow - subcolumns.get_root()->data.num_of_defaults_in_prefix; + for (const auto& part : subcolumns.get_root()->data.data) { + if (ind < part->size()) { + return !part->get_data_at(ind).empty(); + } + ind -= part->size(); + } + + throw doris::Exception(ErrorCode::OUT_OF_BOUND, "Index ({}) for getting field is out of range", + nrow); } Status ColumnObject::serialize_one_row_to_json_format(int64_t row_num, BufferWritable& output, diff --git a/be/src/vec/columns/column_object.h b/be/src/vec/columns/column_object.h index d1b19dfc6c22f1..fa207d19c39d86 100644 --- a/be/src/vec/columns/column_object.h +++ b/be/src/vec/columns/column_object.h @@ -274,8 +274,6 @@ class ColumnObject final : public COWHelper { explicit ColumnObject(bool is_nullable_, bool create_root = true); - explicit ColumnObject(MutableColumnPtr&& sparse_column); - explicit ColumnObject(bool is_nullable_, DataTypePtr type, MutableColumnPtr&& column); // create without root, num_rows = size