Skip to content

Commit

Permalink
[fix](new-scan)Fix new scanner load job bugs (apache#12903)
Browse files Browse the repository at this point in the history
Fix bugs:
1. Fe need to send file format (e.g. parquet, orc ...) to be while processing load jobs using new scanner.
2. Try to get parquet file column type from SchemaElement.type before getting from Logical type and Converted type.
  • Loading branch information
Jibing-Li authored and FreeOnePlus committed Oct 8, 2022
1 parent 7f24ac1 commit c834721
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 2 deletions.
30 changes: 28 additions & 2 deletions be/src/vec/exec/format/parquet/schema_desc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,11 +152,37 @@ void FieldDescriptor::parse_physical_field(const tparquet::SchemaElement& physic
physical_field->physical_type = physical_schema.type;
_physical_fields.push_back(physical_field);
physical_field->physical_column_index = _physical_fields.size() - 1;
physical_field->type = get_doris_type(physical_schema);
}

TypeDescriptor FieldDescriptor::get_doris_type(const tparquet::SchemaElement& physical_schema) {
TypeDescriptor type;
switch (physical_schema.type) {
case tparquet::Type::BOOLEAN:
type.type = TYPE_BOOLEAN;
return type;
case tparquet::Type::INT32:
type.type = TYPE_INT;
return type;
case tparquet::Type::INT64:
case tparquet::Type::INT96:
type.type = TYPE_BIGINT;
return type;
case tparquet::Type::FLOAT:
type.type = TYPE_FLOAT;
return type;
case tparquet::Type::DOUBLE:
type.type = TYPE_DOUBLE;
return type;
default:
break;
}
if (physical_schema.__isset.logicalType) {
physical_field->type = convert_to_doris_type(physical_schema.logicalType);
type = convert_to_doris_type(physical_schema.logicalType);
} else if (physical_schema.__isset.converted_type) {
physical_field->type = convert_to_doris_type(physical_schema.converted_type);
type = convert_to_doris_type(physical_schema.converted_type);
}
return type;
}

TypeDescriptor FieldDescriptor::convert_to_doris_type(tparquet::LogicalType logicalType) {
Expand Down
2 changes: 2 additions & 0 deletions be/src/vec/exec/format/parquet/schema_desc.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ class FieldDescriptor {

TypeDescriptor convert_to_doris_type(tparquet::ConvertedType::type convertedType);

TypeDescriptor get_doris_type(const tparquet::SchemaElement& physical_schema);

public:
FieldDescriptor() = default;
~FieldDescriptor() = default;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ public ParamCreateContext createContext(Analyzer analyzer) throws UserException
ctx.timezone = analyzer.getTimezone();

TFileScanRangeParams params = new TFileScanRangeParams();
params.format_type = formatType(fileGroupInfo.getFileGroup().getFileFormat(), "");
params.setStrictMode(fileGroupInfo.isStrictMode());
params.setProperties(fileGroupInfo.getBrokerDesc().getProperties());
if (fileGroupInfo.getBrokerDesc().getFileType() == TFileType.FILE_HDFS) {
Expand Down

0 comments on commit c834721

Please sign in to comment.