Skip to content

Commit

Permalink
[Fix](exec) Fix date dict dead loop. (apache#25570)
Browse files Browse the repository at this point in the history
  • Loading branch information
kaka11chen authored Oct 23, 2023
1 parent 9006e2b commit 08832d9
Show file tree
Hide file tree
Showing 8 changed files with 201 additions and 21 deletions.
2 changes: 1 addition & 1 deletion be/src/util/time_lut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ uint8_t calc_weekday(uint64_t day_nr, bool is_sunday_first_day) {
}

uint32_t calc_daynr(uint16_t year, uint8_t month, uint8_t day) {
// date_day_offet_dict range from [1900-01-01, 2039-10-24]
// date_day_offet_dict range from [1900-01-01, 2039-12-31]
if (date_day_offset_dict::can_speed_up_calc_daynr(year) &&
LIKELY(date_day_offset_dict::get_dict_init())) {
return date_day_offset_dict::get().daynr(year, month, day);
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/exec/format/orc/vorc_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ OrcReader::OrcReader(RuntimeProfile* profile, RuntimeState* state,
TimezoneUtils::find_cctz_time_zone(ctz, _time_zone);
VecDateTimeValue t;
t.from_unixtime(0, ctz);
_offset_days = t.day() == 31 ? 0 : 1;
_offset_days = t.day() == 31 ? -1 : 0; // If 1969-12-31, then returns -1.
_init_profile();
_init_system_properties();
_init_file_description();
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/exec/format/orc/vorc_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,7 @@ class OrcReader : public GenericReader {
int64_t _range_size;
const std::string& _ctz;
const std::vector<std::string>* _column_names;
size_t _offset_days = 0;
int32_t _offset_days = 0;
cctz::time_zone _time_zone;

std::list<std::string> _read_cols;
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/exec/format/parquet/decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ void Decoder::init(FieldSchema* field_schema, cctz::time_zone* ctz) {
if (_decode_params->ctz) {
VecDateTimeValue t;
t.from_unixtime(0, *_decode_params->ctz);
_decode_params->offset_days = t.day() == 31 ? 0 : 1;
_decode_params->offset_days = t.day() == 31 ? -1 : 0; // If 1969-12-31, then returns -1.
}
}
} // namespace doris::vectorized
2 changes: 1 addition & 1 deletion be/src/vec/exec/format/parquet/decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ struct DecodeParams {
static const cctz::time_zone utc0;
// schema.logicalType.TIMESTAMP.isAdjustedToUTC == true, we should set the time zone
cctz::time_zone* ctz = nullptr;
size_t offset_days = 0;
int32_t offset_days = 0;
int64_t second_mask = 1;
int64_t scale_to_nano_factor = 1;
DecimalScaleParams decimal_scale;
Expand Down
26 changes: 17 additions & 9 deletions be/src/vec/runtime/vdatetime_value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2667,10 +2667,10 @@ template <typename T>
typename DateV2Value<T>::underlying_value DateV2Value<T>::to_date_int_val() const {
return int_val_;
}

// [1900-01-01, 2039-12-31]
static std::array<DateV2Value<DateV2ValueType>, date_day_offset_dict::DICT_DAYS>
DATE_DAY_OFFSET_ITEMS;

// [1900-01-01, 2039-12-31]
static std::array<std::array<std::array<int, 31>, 12>, 140> DATE_DAY_OFFSET_DICT;

static bool DATE_DAY_OFFSET_ITEMS_INIT = false;
Expand All @@ -2687,19 +2687,27 @@ bool date_day_offset_dict::get_dict_init() {

date_day_offset_dict::date_day_offset_dict() {
DateV2Value<DateV2ValueType> d;
// Init days before epoch.
d.set_time(1969, 12, 31, 0, 0, 0, 0);
for (int i = 0; i < DAY_AFTER_EPOCH; ++i) {
DATE_DAY_OFFSET_ITEMS[DAY_BEFORE_EPOCH + i] = d;
for (int i = 0; i < DAY_BEFORE_EPOCH; ++i) {
DATE_DAY_OFFSET_ITEMS[DAY_BEFORE_EPOCH - i - 1] = d;
DATE_DAY_OFFSET_DICT[d.year() - START_YEAR][d.month() - 1][d.day() - 1] =
calc_daynr(d.year(), d.month(), d.day());
d += 1;
d -= 1;
}
d.set_time(1969, 12, 31, 0, 0, 0, 0);
for (int i = 0; i <= DAY_BEFORE_EPOCH; ++i) {
DATE_DAY_OFFSET_ITEMS[DAY_BEFORE_EPOCH - i] = d;
// Init epoch day.
d.set_time(1970, 1, 1, 0, 0, 0, 0);
DATE_DAY_OFFSET_ITEMS[DAY_BEFORE_EPOCH] = d;
DATE_DAY_OFFSET_DICT[d.year() - START_YEAR][d.month() - 1][d.day() - 1] =
calc_daynr(d.year(), d.month(), d.day());
d += 1;

// Init days after epoch.
for (int i = 0; i < DAY_AFTER_EPOCH; ++i) {
DATE_DAY_OFFSET_ITEMS[DAY_BEFORE_EPOCH + 1 + i] = d;
DATE_DAY_OFFSET_DICT[d.year() - START_YEAR][d.month() - 1][d.day() - 1] =
calc_daynr(d.year(), d.month(), d.day());
d -= 1;
d += 1;
}

DATE_DAY_OFFSET_ITEMS_INIT = true;
Expand Down
18 changes: 11 additions & 7 deletions be/src/vec/runtime/vdatetime_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -1516,6 +1516,9 @@ int64_t datetime_diff(const VecDateTimeValue& ts_value1, const DateV2Value<T>& t
return 0;
}

/**
* Date dict table. date range is [1900-01-01, 2039-12-31].
*/
class date_day_offset_dict {
private:
static date_day_offset_dict instance;
Expand All @@ -1526,15 +1529,16 @@ class date_day_offset_dict {
date_day_offset_dict& operator=(const date_day_offset_dict&) = default;

public:
static constexpr int DAY_BEFORE_EPOCH = 25566; // 1900-01-01
static constexpr int DAY_AFTER_EPOCH = 25500; // 2039-10-24
static constexpr int DICT_DAYS = DAY_BEFORE_EPOCH + DAY_AFTER_EPOCH;
static constexpr int DAY_BEFORE_EPOCH = 25567; // 1900-01-01
static constexpr int DAY_AFTER_EPOCH = 25566; // 2039-12-31
static constexpr int DICT_DAYS = DAY_BEFORE_EPOCH + 1 + DAY_AFTER_EPOCH; // 1 means 1970-01-01

static constexpr int START_YEAR = 1900; // 1900-01-01
static constexpr int END_YEAR = 2039; // 2039-10-24
static constexpr int DAY_OFFSET_CAL_START_POINT_DAYNR = 719527; // 1969-12-31
static constexpr int START_YEAR = 1900; // 1900-01-01
static constexpr int END_YEAR = 2039; // 2039-10-24
static constexpr int DAY_OFFSET_CAL_START_POINT_DAYNR =
719528; // 1970-01-01 (start from 0000-01-01, 0000-01-01 is day 1, returns 1)

static bool can_speed_up_calc_daynr(int year) { return year >= START_YEAR && year < END_YEAR; }
static bool can_speed_up_calc_daynr(int year) { return year >= START_YEAR && year <= END_YEAR; }

static int get_offset_by_daynr(int daynr) { return daynr - DAY_OFFSET_CAL_START_POINT_DAYNR; }

Expand Down
168 changes: 168 additions & 0 deletions be/test/vec/runtime/vdatetime_value_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -570,4 +570,172 @@ TEST(VDateTimeValueTest, date_v2_to_string_test) {
}
}

TEST(VDateTimeValueTest, date_v2_daynr_test) {
{
DateV2Value<DateV2ValueType> date_v2;
// 1970/01/01
EXPECT_TRUE(date_v2.get_date_from_daynr(719528));
EXPECT_TRUE(date_v2.year() == 1970);
EXPECT_TRUE(date_v2.month() == 1);
EXPECT_TRUE(date_v2.day() == 1);
EXPECT_TRUE(date_v2.hour() == 0);
EXPECT_TRUE(date_v2.minute() == 0);
EXPECT_TRUE(date_v2.second() == 0);
EXPECT_TRUE(date_v2.microsecond() == 0);
EXPECT_TRUE(doris::calc_daynr(1970, 1, 1) == 719528);
EXPECT_TRUE(date_day_offset_dict::get().get_dict_init());
EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_calc_daynr(1970));
EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_daynr_to_date(719528));
}

{
DateV2Value<DateV2ValueType> date_v2;
// 1969/12/31
EXPECT_TRUE(date_v2.get_date_from_daynr(719527));
EXPECT_TRUE(date_v2.year() == 1969);
EXPECT_TRUE(date_v2.month() == 12);
EXPECT_TRUE(date_v2.day() == 31);
EXPECT_TRUE(date_v2.hour() == 0);
EXPECT_TRUE(date_v2.minute() == 0);
EXPECT_TRUE(date_v2.second() == 0);
EXPECT_TRUE(date_v2.microsecond() == 0);
EXPECT_TRUE(doris::calc_daynr(1969, 12, 31) == 719527);
EXPECT_TRUE(date_day_offset_dict::get().get_dict_init());
EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_calc_daynr(1969));
EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_daynr_to_date(719527));
}

{
DateV2Value<DateV2ValueType> date_v2;
// 1900/01/01
EXPECT_TRUE(date_v2.get_date_from_daynr(693961));
EXPECT_TRUE(date_v2.year() == 1900);
EXPECT_TRUE(date_v2.month() == 1);
EXPECT_TRUE(date_v2.day() == 1);
EXPECT_TRUE(date_v2.hour() == 0);
EXPECT_TRUE(date_v2.minute() == 0);
EXPECT_TRUE(date_v2.second() == 0);
EXPECT_TRUE(date_v2.microsecond() == 0);
EXPECT_TRUE(doris::calc_daynr(1900, 1, 1) == 693961);
EXPECT_TRUE(date_day_offset_dict::get().get_dict_init());
EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_calc_daynr(1900));
EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_daynr_to_date(693961));
}

{
DateV2Value<DateV2ValueType> date_v2;
// 1899/12/31
EXPECT_TRUE(date_v2.get_date_from_daynr(693960));
EXPECT_TRUE(date_v2.year() == 1899);
EXPECT_TRUE(date_v2.month() == 12);
EXPECT_TRUE(date_v2.day() == 31);
EXPECT_TRUE(date_v2.hour() == 0);
EXPECT_TRUE(date_v2.minute() == 0);
EXPECT_TRUE(date_v2.second() == 0);
EXPECT_TRUE(date_v2.microsecond() == 0);
EXPECT_TRUE(doris::calc_daynr(1899, 12, 31) == 693960);
EXPECT_TRUE(date_day_offset_dict::get().get_dict_init());
EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_calc_daynr(1899));
EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_daynr_to_date(693960));
}

{
DateV2Value<DateV2ValueType> date_v2;
// 2039/12/31
EXPECT_TRUE(date_v2.get_date_from_daynr(745094));
EXPECT_TRUE(date_v2.year() == 2039);
EXPECT_TRUE(date_v2.month() == 12);
EXPECT_TRUE(date_v2.day() == 31);
EXPECT_TRUE(date_v2.hour() == 0);
EXPECT_TRUE(date_v2.minute() == 0);
EXPECT_TRUE(date_v2.second() == 0);
EXPECT_TRUE(date_v2.microsecond() == 0);
EXPECT_TRUE(doris::calc_daynr(2039, 12, 31) == 745094);
EXPECT_TRUE(date_day_offset_dict::get().get_dict_init());
EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_calc_daynr(2039));
EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_daynr_to_date(745094));
}

{
DateV2Value<DateV2ValueType> date_v2;
// 2040/01/01
EXPECT_TRUE(date_v2.get_date_from_daynr(745095));
EXPECT_TRUE(date_v2.year() == 2040);
EXPECT_TRUE(date_v2.month() == 1);
EXPECT_TRUE(date_v2.day() == 1);
EXPECT_TRUE(date_v2.hour() == 0);
EXPECT_TRUE(date_v2.minute() == 0);
EXPECT_TRUE(date_v2.second() == 0);
EXPECT_TRUE(date_v2.microsecond() == 0);
EXPECT_TRUE(doris::calc_daynr(2040, 01, 01) == 745095);
EXPECT_TRUE(date_day_offset_dict::get().get_dict_init());
EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_calc_daynr(2040));
EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_daynr_to_date(745095));
}

{
DateV2Value<DateV2ValueType> date_v2;
// 0000/01/01
EXPECT_TRUE(date_v2.get_date_from_daynr(1));
EXPECT_TRUE(date_v2.year() == 0);
EXPECT_TRUE(date_v2.month() == 1);
EXPECT_TRUE(date_v2.day() == 1);
EXPECT_TRUE(date_v2.hour() == 0);
EXPECT_TRUE(date_v2.minute() == 0);
EXPECT_TRUE(date_v2.second() == 0);
EXPECT_TRUE(date_v2.microsecond() == 0);
EXPECT_TRUE(doris::calc_daynr(0, 01, 01) == 1);
EXPECT_TRUE(date_day_offset_dict::get().get_dict_init());
EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_calc_daynr(0));
EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_daynr_to_date(1));
}

{
DateV2Value<DateV2ValueType> date_v2;
// Invalid date 0000/00/01
EXPECT_TRUE(date_v2.year() == 0);
EXPECT_TRUE(date_v2.month() == 0);
EXPECT_TRUE(date_v2.day() == 0);
EXPECT_TRUE(date_v2.hour() == 0);
EXPECT_TRUE(date_v2.minute() == 0);
EXPECT_TRUE(date_v2.second() == 0);
EXPECT_TRUE(date_v2.microsecond() == 0);
EXPECT_TRUE(doris::calc_daynr(0, 0, 1) == 0);
}

{
DateV2Value<DateV2ValueType> date_v2;
// 9999/12/31
EXPECT_TRUE(date_v2.get_date_from_daynr(3652424));
EXPECT_TRUE(date_v2.year() == 9999);
EXPECT_TRUE(date_v2.month() == 12);
EXPECT_TRUE(date_v2.day() == 31);
EXPECT_TRUE(date_v2.hour() == 0);
EXPECT_TRUE(date_v2.minute() == 0);
EXPECT_TRUE(date_v2.second() == 0);
EXPECT_TRUE(date_v2.microsecond() == 0);
EXPECT_TRUE(doris::calc_daynr(9999, 12, 31) == 3652424);
EXPECT_TRUE(date_day_offset_dict::get().get_dict_init());
EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_calc_daynr(9999));
EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_daynr_to_date(3652424));
}

{
DateV2Value<DateV2ValueType> date_v2;
// Invalid date 10000/01/01
EXPECT_FALSE(date_v2.get_date_from_daynr(3652425));
EXPECT_TRUE(date_v2.year() == 0);
EXPECT_TRUE(date_v2.month() == 0);
EXPECT_TRUE(date_v2.day() == 0);
EXPECT_TRUE(date_v2.hour() == 0);
EXPECT_TRUE(date_v2.minute() == 0);
EXPECT_TRUE(date_v2.second() == 0);
EXPECT_TRUE(date_v2.microsecond() == 0);
EXPECT_TRUE(doris::calc_daynr(10000, 01, 01) == 3652425);
EXPECT_TRUE(date_day_offset_dict::get().get_dict_init());
EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_calc_daynr(10000));
EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_daynr_to_date(3652425));
}
}

} // namespace doris::vectorized

0 comments on commit 08832d9

Please sign in to comment.