diff --git a/be/src/formats/parquet/column_reader.cpp b/be/src/formats/parquet/column_reader.cpp index afb3c6d12c5ea6..ca15802d56e768 100644 --- a/be/src/formats/parquet/column_reader.cpp +++ b/be/src/formats/parquet/column_reader.cpp @@ -371,20 +371,33 @@ class StructColumnReader : public ColumnReader { size_t origin_rows_to_skip = _opts.context->rows_to_skip; // Fill data for non-nullptr subfield column reader + bool first_read = true; + size_t real_read = 0; for (size_t i = 0; i < fields_column.size(); i++) { Column* child_column = fields_column[i].get(); if (_child_readers[i] != nullptr) { _opts.context->next_row = origin_next_row; _opts.context->rows_to_skip = origin_rows_to_skip; RETURN_IF_ERROR(_child_readers[i]->prepare_batch(num_records, content_type, child_column)); + size_t current_real_read = child_column->size(); + real_read = first_read ? current_real_read : real_read; + first_read = false; + if (UNLIKELY(real_read != current_real_read)) { + return Status::InternalError(strings::Substitute("Unmatched row count, $0", field_name)); + } } } + if (UNLIKELY(first_read)) { + return Status::InternalError( + strings::Substitute("All used subfield of struct type $1 is not exist", _field->name)); + } + // Append default value for not selected subfield for (size_t i = 0; i < fields_column.size(); i++) { Column* child_column = fields_column[i].get(); if (_child_readers[i] == nullptr) { - child_column->append_default(*num_records); + child_column->append_default(real_read); } } diff --git a/be/src/formats/parquet/group_reader.cpp b/be/src/formats/parquet/group_reader.cpp index 7e2a4317e36ed4..3222aea89859b0 100644 --- a/be/src/formats/parquet/group_reader.cpp +++ b/be/src/formats/parquet/group_reader.cpp @@ -450,6 +450,7 @@ Status GroupReader::_read(const std::vector& read_columns, size_t* row_coun } size_t count = *row_count; + size_t real_count = count; for (int col_idx : read_columns) { auto& column = _param.read_cols[col_idx]; ColumnContentType content_type = _dict_filter_ctx.column_content_type(col_idx); @@ -461,6 +462,10 @@ Status GroupReader::_read(const std::vector& read_columns, size_t* row_coun if (!status.ok() && !status.is_end_of_file()) { return status; } + real_count = col_idx == read_columns[0] ? count : real_count; + if (UNLIKELY(real_count != count)) { + return Status::InternalError(strings::Substitute("Unmatched row count, $0", _param.file->filename())); + } } if (count != *row_count) {