Skip to content

Commit

Permalink
[Fix](parquet-reader) some fix for #5 (Fix and optimize parquet min-m…
Browse files Browse the repository at this point in the history
…ax filtering).
  • Loading branch information
kaka11chen committed Jul 24, 2024
1 parent 4eb4770 commit b75f5b8
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 10 deletions.
10 changes: 5 additions & 5 deletions be/src/vec/exec/format/parquet/parquet_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -437,16 +437,16 @@ class CorruptStatistics {
private:
static void warn_parse_error_once(const std::string& createdBy, const std::string_view& msg) {
//if (!already_logged.exchange(true)) {
LOG(WARNING) << "Ignoring statistics because created_by could not be parsed (see "
"PARQUET-251)."
" CreatedBy: "
<< createdBy << ", msg: " << msg;
LOG(WARNING) << "Ignoring statistics because created_by could not be parsed (see "
"PARQUET-251)."
" CreatedBy: "
<< createdBy << ", msg: " << msg;
//}
}

static void warn_once(const std::string_view& msg) {
//if (!already_logged.exchange(true)) {
LOG(WARNING) << msg;
LOG(WARNING) << msg;
//}
}

Expand Down
12 changes: 7 additions & 5 deletions be/src/vec/exec/format/parquet/parquet_pred_cmp.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ class ParquetPredicate {

CppType min_value;
CppType max_value;
std::unique_ptr<std::string> encoded_min_copy;
std::unique_ptr<std::string> encoded_max_copy;
tparquet::Type::type physical_type = col_schema->physical_type;
switch (col_val_range.type()) {
#define DISPATCH(REINTERPRET_TYPE, PARQUET_TYPE) \
Expand Down Expand Up @@ -186,13 +188,13 @@ class ParquetPredicate {
case TYPE_STRING:
if constexpr (std::is_same_v<CppType, StringRef>) {
if (!use_min_max_value) {
std::string min_copy(encoded_min);
std::string max_copy(encoded_max);
if (!_try_read_old_utf8_stats(min_copy, max_copy)) {
encoded_min_copy = std::make_unique<std::string>(encoded_min);
encoded_max_copy = std::make_unique<std::string>(encoded_max);
if (!_try_read_old_utf8_stats(*encoded_min_copy, *encoded_max_copy)) {
return false;
}
min_value = StringRef(min_copy);
max_value = StringRef(max_copy);
min_value = StringRef(*encoded_min_copy);
max_value = StringRef(*encoded_max_copy);
} else {
min_value = StringRef(encoded_min);
max_value = StringRef(encoded_max);
Expand Down

0 comments on commit b75f5b8

Please sign in to comment.