From 08832d9f3a94acff981140279bba443b69751e14 Mon Sep 17 00:00:00 2001 From: Qi Chen Date: Tue, 24 Oct 2023 02:51:43 +0800 Subject: [PATCH] [Fix](exec) Fix date dict dead loop. (#25570) --- be/src/util/time_lut.cpp | 2 +- be/src/vec/exec/format/orc/vorc_reader.cpp | 2 +- be/src/vec/exec/format/orc/vorc_reader.h | 2 +- be/src/vec/exec/format/parquet/decoder.cpp | 2 +- be/src/vec/exec/format/parquet/decoder.h | 2 +- be/src/vec/runtime/vdatetime_value.cpp | 26 ++- be/src/vec/runtime/vdatetime_value.h | 18 +- be/test/vec/runtime/vdatetime_value_test.cpp | 168 +++++++++++++++++++ 8 files changed, 201 insertions(+), 21 deletions(-) diff --git a/be/src/util/time_lut.cpp b/be/src/util/time_lut.cpp index 9be2dec4fc7504..616541d411f636 100644 --- a/be/src/util/time_lut.cpp +++ b/be/src/util/time_lut.cpp @@ -96,7 +96,7 @@ uint8_t calc_weekday(uint64_t day_nr, bool is_sunday_first_day) { } uint32_t calc_daynr(uint16_t year, uint8_t month, uint8_t day) { - // date_day_offet_dict range from [1900-01-01, 2039-10-24] + // date_day_offet_dict range from [1900-01-01, 2039-12-31] if (date_day_offset_dict::can_speed_up_calc_daynr(year) && LIKELY(date_day_offset_dict::get_dict_init())) { return date_day_offset_dict::get().daynr(year, month, day); diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index c1228c9d454e70..212540c0d8e49d 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -149,7 +149,7 @@ OrcReader::OrcReader(RuntimeProfile* profile, RuntimeState* state, TimezoneUtils::find_cctz_time_zone(ctz, _time_zone); VecDateTimeValue t; t.from_unixtime(0, ctz); - _offset_days = t.day() == 31 ? 0 : 1; + _offset_days = t.day() == 31 ? -1 : 0; // If 1969-12-31, then returns -1. _init_profile(); _init_system_properties(); _init_file_description(); diff --git a/be/src/vec/exec/format/orc/vorc_reader.h b/be/src/vec/exec/format/orc/vorc_reader.h index c41b8718d75c1d..feb1b70062635e 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.h +++ b/be/src/vec/exec/format/orc/vorc_reader.h @@ -500,7 +500,7 @@ class OrcReader : public GenericReader { int64_t _range_size; const std::string& _ctz; const std::vector* _column_names; - size_t _offset_days = 0; + int32_t _offset_days = 0; cctz::time_zone _time_zone; std::list _read_cols; diff --git a/be/src/vec/exec/format/parquet/decoder.cpp b/be/src/vec/exec/format/parquet/decoder.cpp index bf8ef0b233044c..0a158176091511 100644 --- a/be/src/vec/exec/format/parquet/decoder.cpp +++ b/be/src/vec/exec/format/parquet/decoder.cpp @@ -181,7 +181,7 @@ void Decoder::init(FieldSchema* field_schema, cctz::time_zone* ctz) { if (_decode_params->ctz) { VecDateTimeValue t; t.from_unixtime(0, *_decode_params->ctz); - _decode_params->offset_days = t.day() == 31 ? 0 : 1; + _decode_params->offset_days = t.day() == 31 ? -1 : 0; // If 1969-12-31, then returns -1. } } } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/parquet/decoder.h b/be/src/vec/exec/format/parquet/decoder.h index 6c1030818cdf9f..acd9965bad8b7a 100644 --- a/be/src/vec/exec/format/parquet/decoder.h +++ b/be/src/vec/exec/format/parquet/decoder.h @@ -71,7 +71,7 @@ struct DecodeParams { static const cctz::time_zone utc0; // schema.logicalType.TIMESTAMP.isAdjustedToUTC == true, we should set the time zone cctz::time_zone* ctz = nullptr; - size_t offset_days = 0; + int32_t offset_days = 0; int64_t second_mask = 1; int64_t scale_to_nano_factor = 1; DecimalScaleParams decimal_scale; diff --git a/be/src/vec/runtime/vdatetime_value.cpp b/be/src/vec/runtime/vdatetime_value.cpp index 201548b1642624..3e24c1ffe0afbb 100644 --- a/be/src/vec/runtime/vdatetime_value.cpp +++ b/be/src/vec/runtime/vdatetime_value.cpp @@ -2667,10 +2667,10 @@ template typename DateV2Value::underlying_value DateV2Value::to_date_int_val() const { return int_val_; } - +// [1900-01-01, 2039-12-31] static std::array, date_day_offset_dict::DICT_DAYS> DATE_DAY_OFFSET_ITEMS; - +// [1900-01-01, 2039-12-31] static std::array, 12>, 140> DATE_DAY_OFFSET_DICT; static bool DATE_DAY_OFFSET_ITEMS_INIT = false; @@ -2687,19 +2687,27 @@ bool date_day_offset_dict::get_dict_init() { date_day_offset_dict::date_day_offset_dict() { DateV2Value d; + // Init days before epoch. d.set_time(1969, 12, 31, 0, 0, 0, 0); - for (int i = 0; i < DAY_AFTER_EPOCH; ++i) { - DATE_DAY_OFFSET_ITEMS[DAY_BEFORE_EPOCH + i] = d; + for (int i = 0; i < DAY_BEFORE_EPOCH; ++i) { + DATE_DAY_OFFSET_ITEMS[DAY_BEFORE_EPOCH - i - 1] = d; DATE_DAY_OFFSET_DICT[d.year() - START_YEAR][d.month() - 1][d.day() - 1] = calc_daynr(d.year(), d.month(), d.day()); - d += 1; + d -= 1; } - d.set_time(1969, 12, 31, 0, 0, 0, 0); - for (int i = 0; i <= DAY_BEFORE_EPOCH; ++i) { - DATE_DAY_OFFSET_ITEMS[DAY_BEFORE_EPOCH - i] = d; + // Init epoch day. + d.set_time(1970, 1, 1, 0, 0, 0, 0); + DATE_DAY_OFFSET_ITEMS[DAY_BEFORE_EPOCH] = d; + DATE_DAY_OFFSET_DICT[d.year() - START_YEAR][d.month() - 1][d.day() - 1] = + calc_daynr(d.year(), d.month(), d.day()); + d += 1; + + // Init days after epoch. + for (int i = 0; i < DAY_AFTER_EPOCH; ++i) { + DATE_DAY_OFFSET_ITEMS[DAY_BEFORE_EPOCH + 1 + i] = d; DATE_DAY_OFFSET_DICT[d.year() - START_YEAR][d.month() - 1][d.day() - 1] = calc_daynr(d.year(), d.month(), d.day()); - d -= 1; + d += 1; } DATE_DAY_OFFSET_ITEMS_INIT = true; diff --git a/be/src/vec/runtime/vdatetime_value.h b/be/src/vec/runtime/vdatetime_value.h index 97c82f68bbeb5b..b03c09a55d352b 100644 --- a/be/src/vec/runtime/vdatetime_value.h +++ b/be/src/vec/runtime/vdatetime_value.h @@ -1516,6 +1516,9 @@ int64_t datetime_diff(const VecDateTimeValue& ts_value1, const DateV2Value& t return 0; } +/** + * Date dict table. date range is [1900-01-01, 2039-12-31]. + */ class date_day_offset_dict { private: static date_day_offset_dict instance; @@ -1526,15 +1529,16 @@ class date_day_offset_dict { date_day_offset_dict& operator=(const date_day_offset_dict&) = default; public: - static constexpr int DAY_BEFORE_EPOCH = 25566; // 1900-01-01 - static constexpr int DAY_AFTER_EPOCH = 25500; // 2039-10-24 - static constexpr int DICT_DAYS = DAY_BEFORE_EPOCH + DAY_AFTER_EPOCH; + static constexpr int DAY_BEFORE_EPOCH = 25567; // 1900-01-01 + static constexpr int DAY_AFTER_EPOCH = 25566; // 2039-12-31 + static constexpr int DICT_DAYS = DAY_BEFORE_EPOCH + 1 + DAY_AFTER_EPOCH; // 1 means 1970-01-01 - static constexpr int START_YEAR = 1900; // 1900-01-01 - static constexpr int END_YEAR = 2039; // 2039-10-24 - static constexpr int DAY_OFFSET_CAL_START_POINT_DAYNR = 719527; // 1969-12-31 + static constexpr int START_YEAR = 1900; // 1900-01-01 + static constexpr int END_YEAR = 2039; // 2039-10-24 + static constexpr int DAY_OFFSET_CAL_START_POINT_DAYNR = + 719528; // 1970-01-01 (start from 0000-01-01, 0000-01-01 is day 1, returns 1) - static bool can_speed_up_calc_daynr(int year) { return year >= START_YEAR && year < END_YEAR; } + static bool can_speed_up_calc_daynr(int year) { return year >= START_YEAR && year <= END_YEAR; } static int get_offset_by_daynr(int daynr) { return daynr - DAY_OFFSET_CAL_START_POINT_DAYNR; } diff --git a/be/test/vec/runtime/vdatetime_value_test.cpp b/be/test/vec/runtime/vdatetime_value_test.cpp index 05943dcc6c3455..bb396b2ce6f333 100644 --- a/be/test/vec/runtime/vdatetime_value_test.cpp +++ b/be/test/vec/runtime/vdatetime_value_test.cpp @@ -570,4 +570,172 @@ TEST(VDateTimeValueTest, date_v2_to_string_test) { } } +TEST(VDateTimeValueTest, date_v2_daynr_test) { + { + DateV2Value date_v2; + // 1970/01/01 + EXPECT_TRUE(date_v2.get_date_from_daynr(719528)); + EXPECT_TRUE(date_v2.year() == 1970); + EXPECT_TRUE(date_v2.month() == 1); + EXPECT_TRUE(date_v2.day() == 1); + EXPECT_TRUE(date_v2.hour() == 0); + EXPECT_TRUE(date_v2.minute() == 0); + EXPECT_TRUE(date_v2.second() == 0); + EXPECT_TRUE(date_v2.microsecond() == 0); + EXPECT_TRUE(doris::calc_daynr(1970, 1, 1) == 719528); + EXPECT_TRUE(date_day_offset_dict::get().get_dict_init()); + EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_calc_daynr(1970)); + EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_daynr_to_date(719528)); + } + + { + DateV2Value date_v2; + // 1969/12/31 + EXPECT_TRUE(date_v2.get_date_from_daynr(719527)); + EXPECT_TRUE(date_v2.year() == 1969); + EXPECT_TRUE(date_v2.month() == 12); + EXPECT_TRUE(date_v2.day() == 31); + EXPECT_TRUE(date_v2.hour() == 0); + EXPECT_TRUE(date_v2.minute() == 0); + EXPECT_TRUE(date_v2.second() == 0); + EXPECT_TRUE(date_v2.microsecond() == 0); + EXPECT_TRUE(doris::calc_daynr(1969, 12, 31) == 719527); + EXPECT_TRUE(date_day_offset_dict::get().get_dict_init()); + EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_calc_daynr(1969)); + EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_daynr_to_date(719527)); + } + + { + DateV2Value date_v2; + // 1900/01/01 + EXPECT_TRUE(date_v2.get_date_from_daynr(693961)); + EXPECT_TRUE(date_v2.year() == 1900); + EXPECT_TRUE(date_v2.month() == 1); + EXPECT_TRUE(date_v2.day() == 1); + EXPECT_TRUE(date_v2.hour() == 0); + EXPECT_TRUE(date_v2.minute() == 0); + EXPECT_TRUE(date_v2.second() == 0); + EXPECT_TRUE(date_v2.microsecond() == 0); + EXPECT_TRUE(doris::calc_daynr(1900, 1, 1) == 693961); + EXPECT_TRUE(date_day_offset_dict::get().get_dict_init()); + EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_calc_daynr(1900)); + EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_daynr_to_date(693961)); + } + + { + DateV2Value date_v2; + // 1899/12/31 + EXPECT_TRUE(date_v2.get_date_from_daynr(693960)); + EXPECT_TRUE(date_v2.year() == 1899); + EXPECT_TRUE(date_v2.month() == 12); + EXPECT_TRUE(date_v2.day() == 31); + EXPECT_TRUE(date_v2.hour() == 0); + EXPECT_TRUE(date_v2.minute() == 0); + EXPECT_TRUE(date_v2.second() == 0); + EXPECT_TRUE(date_v2.microsecond() == 0); + EXPECT_TRUE(doris::calc_daynr(1899, 12, 31) == 693960); + EXPECT_TRUE(date_day_offset_dict::get().get_dict_init()); + EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_calc_daynr(1899)); + EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_daynr_to_date(693960)); + } + + { + DateV2Value date_v2; + // 2039/12/31 + EXPECT_TRUE(date_v2.get_date_from_daynr(745094)); + EXPECT_TRUE(date_v2.year() == 2039); + EXPECT_TRUE(date_v2.month() == 12); + EXPECT_TRUE(date_v2.day() == 31); + EXPECT_TRUE(date_v2.hour() == 0); + EXPECT_TRUE(date_v2.minute() == 0); + EXPECT_TRUE(date_v2.second() == 0); + EXPECT_TRUE(date_v2.microsecond() == 0); + EXPECT_TRUE(doris::calc_daynr(2039, 12, 31) == 745094); + EXPECT_TRUE(date_day_offset_dict::get().get_dict_init()); + EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_calc_daynr(2039)); + EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_daynr_to_date(745094)); + } + + { + DateV2Value date_v2; + // 2040/01/01 + EXPECT_TRUE(date_v2.get_date_from_daynr(745095)); + EXPECT_TRUE(date_v2.year() == 2040); + EXPECT_TRUE(date_v2.month() == 1); + EXPECT_TRUE(date_v2.day() == 1); + EXPECT_TRUE(date_v2.hour() == 0); + EXPECT_TRUE(date_v2.minute() == 0); + EXPECT_TRUE(date_v2.second() == 0); + EXPECT_TRUE(date_v2.microsecond() == 0); + EXPECT_TRUE(doris::calc_daynr(2040, 01, 01) == 745095); + EXPECT_TRUE(date_day_offset_dict::get().get_dict_init()); + EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_calc_daynr(2040)); + EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_daynr_to_date(745095)); + } + + { + DateV2Value date_v2; + // 0000/01/01 + EXPECT_TRUE(date_v2.get_date_from_daynr(1)); + EXPECT_TRUE(date_v2.year() == 0); + EXPECT_TRUE(date_v2.month() == 1); + EXPECT_TRUE(date_v2.day() == 1); + EXPECT_TRUE(date_v2.hour() == 0); + EXPECT_TRUE(date_v2.minute() == 0); + EXPECT_TRUE(date_v2.second() == 0); + EXPECT_TRUE(date_v2.microsecond() == 0); + EXPECT_TRUE(doris::calc_daynr(0, 01, 01) == 1); + EXPECT_TRUE(date_day_offset_dict::get().get_dict_init()); + EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_calc_daynr(0)); + EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_daynr_to_date(1)); + } + + { + DateV2Value date_v2; + // Invalid date 0000/00/01 + EXPECT_TRUE(date_v2.year() == 0); + EXPECT_TRUE(date_v2.month() == 0); + EXPECT_TRUE(date_v2.day() == 0); + EXPECT_TRUE(date_v2.hour() == 0); + EXPECT_TRUE(date_v2.minute() == 0); + EXPECT_TRUE(date_v2.second() == 0); + EXPECT_TRUE(date_v2.microsecond() == 0); + EXPECT_TRUE(doris::calc_daynr(0, 0, 1) == 0); + } + + { + DateV2Value date_v2; + // 9999/12/31 + EXPECT_TRUE(date_v2.get_date_from_daynr(3652424)); + EXPECT_TRUE(date_v2.year() == 9999); + EXPECT_TRUE(date_v2.month() == 12); + EXPECT_TRUE(date_v2.day() == 31); + EXPECT_TRUE(date_v2.hour() == 0); + EXPECT_TRUE(date_v2.minute() == 0); + EXPECT_TRUE(date_v2.second() == 0); + EXPECT_TRUE(date_v2.microsecond() == 0); + EXPECT_TRUE(doris::calc_daynr(9999, 12, 31) == 3652424); + EXPECT_TRUE(date_day_offset_dict::get().get_dict_init()); + EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_calc_daynr(9999)); + EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_daynr_to_date(3652424)); + } + + { + DateV2Value date_v2; + // Invalid date 10000/01/01 + EXPECT_FALSE(date_v2.get_date_from_daynr(3652425)); + EXPECT_TRUE(date_v2.year() == 0); + EXPECT_TRUE(date_v2.month() == 0); + EXPECT_TRUE(date_v2.day() == 0); + EXPECT_TRUE(date_v2.hour() == 0); + EXPECT_TRUE(date_v2.minute() == 0); + EXPECT_TRUE(date_v2.second() == 0); + EXPECT_TRUE(date_v2.microsecond() == 0); + EXPECT_TRUE(doris::calc_daynr(10000, 01, 01) == 3652425); + EXPECT_TRUE(date_day_offset_dict::get().get_dict_init()); + EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_calc_daynr(10000)); + EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_daynr_to_date(3652425)); + } +} + } // namespace doris::vectorized