Skip to content

Commit

Permalink
Merge pull request #47 from duckdb/mcci
Browse files Browse the repository at this point in the history
Actual commit
  • Loading branch information
hannes authored Jun 28, 2024
2 parents a18bfab + 68b6be9 commit 6d4d0e7
Show file tree
Hide file tree
Showing 680 changed files with 101,204 additions and 54,222 deletions.
7 changes: 6 additions & 1 deletion .github/workflows/Nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,14 @@ jobs:
- name: Vendor sources
id: vendor
run: |
git pull --rebase
git config --global user.email "[email protected]"
git config --global user.name "DuckDB Labs GitHub Bot"
git rm -rf src/duckdb
python vendor.py --duckdb .git/duckdb
git add src/duckdb
rm -rf .git/duckdb
export REV=`git rev-parse --short HEAD`
git commit -m "Update vendored DuckDB sources to $REV"
git push --dry-run
# Check if ahead of upstream branch
# If yes, set a step output
Expand Down
12 changes: 6 additions & 6 deletions src/duckdb/extension/icu/icu-strptime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,15 @@ struct ICUStrptime : public ICUDateFunc {
}

// Now get the parts in the given time zone
uint64_t micros = 0;
uint64_t micros = parsed.GetMicros();
calendar->set(UCAL_EXTENDED_YEAR, parsed.data[0]); // strptime doesn't understand eras
calendar->set(UCAL_MONTH, parsed.data[1] - 1);
calendar->set(UCAL_DATE, parsed.data[2]);
calendar->set(UCAL_HOUR_OF_DAY, parsed.data[3]);
calendar->set(UCAL_MINUTE, parsed.data[4]);
calendar->set(UCAL_SECOND, parsed.data[5]);
calendar->set(UCAL_MILLISECOND, parsed.data[6] / Interval::MICROS_PER_MSEC);
micros = parsed.data[6] % Interval::MICROS_PER_MSEC;
calendar->set(UCAL_MILLISECOND, micros / Interval::MICROS_PER_MSEC);
micros %= Interval::MICROS_PER_MSEC;

// This overrides the TZ setting, so only use it if an offset was parsed.
// Note that we don't bother/worry about the DST setting because the two just combine.
Expand Down Expand Up @@ -158,7 +158,7 @@ struct ICUStrptime : public ICUDateFunc {
}
}

static bind_scalar_function_t bind_strptime;
static bind_scalar_function_t bind_strptime; // NOLINT

static duckdb::unique_ptr<FunctionData> StrpTimeBindFunction(ClientContext &context, ScalarFunction &bound_function,
vector<duckdb::unique_ptr<Expression>> &arguments) {
Expand Down Expand Up @@ -194,7 +194,7 @@ struct ICUStrptime : public ICUDateFunc {
throw InvalidInputException("strptime format list must not be empty");
}
vector<StrpTimeFormat> formats;
bool has_tz = true;
bool has_tz = false;
for (const auto &child : children) {
format_string = child.ToString();
format.format_specifier = format_string;
Expand Down Expand Up @@ -341,7 +341,7 @@ struct ICUStrptime : public ICUDateFunc {
}
};

bind_scalar_function_t ICUStrptime::bind_strptime = nullptr;
bind_scalar_function_t ICUStrptime::bind_strptime = nullptr; // NOLINT

struct ICUStrftime : public ICUDateFunc {
static void ParseFormatSpecifier(string_t &format_str, StrfTimeFormat &format) {
Expand Down
165 changes: 92 additions & 73 deletions src/duckdb/extension/icu/icu-table-range.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,13 @@ namespace duckdb {
struct ICUTableRange {
using CalendarPtr = unique_ptr<icu::Calendar>;

struct BindData : public TableFunctionData {
BindData(const BindData &other)
struct ICURangeBindData : public TableFunctionData {
ICURangeBindData(const ICURangeBindData &other)
: TableFunctionData(other), tz_setting(other.tz_setting), cal_setting(other.cal_setting),
calendar(other.calendar->clone()), start(other.start), end(other.end), increment(other.increment),
inclusive_bound(other.inclusive_bound), greater_than_check(other.greater_than_check) {
calendar(other.calendar->clone()) {
}

explicit BindData(ClientContext &context) {
explicit ICURangeBindData(ClientContext &context) {
Value tz_value;
if (context.TryGetCurrentSetting("TimeZone", tz_value)) {
tz_setting = tz_value.ToString();
Expand Down Expand Up @@ -48,24 +47,22 @@ struct ICUTableRange {
string tz_setting;
string cal_setting;
CalendarPtr calendar;
};

struct ICURangeLocalState : public LocalTableFunctionState {
ICURangeLocalState() {
}

bool initialized_row = false;
idx_t current_input_row = 0;
timestamp_t current_state;

timestamp_t start;
timestamp_t end;
interval_t increment;
bool inclusive_bound;
bool greater_than_check;

bool Equals(const FunctionData &other_p) const override {
auto &other = other_p.Cast<const BindData>();
return other.start == start && other.end == end && other.increment == increment &&
other.inclusive_bound == inclusive_bound && other.greater_than_check == greater_than_check &&
*calendar == *other.calendar;
}

unique_ptr<FunctionData> Copy() const override {
return make_uniq<BindData>(*this);
}

bool Finished(timestamp_t current_value) const {
if (greater_than_check) {
if (inclusive_bound) {
Expand All @@ -84,107 +81,129 @@ struct ICUTableRange {
};

template <bool GENERATE_SERIES>
static unique_ptr<FunctionData> Bind(ClientContext &context, TableFunctionBindInput &input,
vector<LogicalType> &return_types, vector<string> &names) {
auto result = make_uniq<BindData>(context);

auto &inputs = input.inputs;
D_ASSERT(inputs.size() == 3);
for (const auto &value : inputs) {
if (value.IsNull()) {
throw BinderException("RANGE with NULL bounds is not supported");
static void GenerateRangeDateTimeParameters(DataChunk &input, idx_t row_id, ICURangeLocalState &result) {
input.Flatten();
for (idx_t c = 0; c < input.ColumnCount(); c++) {
if (FlatVector::IsNull(input.data[c], row_id)) {
result.start = timestamp_t(0);
result.end = timestamp_t(0);
result.increment = interval_t();
result.greater_than_check = true;
result.inclusive_bound = false;
return;
}
}
result->start = inputs[0].GetValue<timestamp_t>();
result->end = inputs[1].GetValue<timestamp_t>();
result->increment = inputs[2].GetValue<interval_t>();

result.start = FlatVector::GetValue<timestamp_t>(input.data[0], row_id);
result.end = FlatVector::GetValue<timestamp_t>(input.data[1], row_id);
result.increment = FlatVector::GetValue<interval_t>(input.data[2], row_id);

// Infinities either cause errors or infinite loops, so just ban them
if (!Timestamp::IsFinite(result->start) || !Timestamp::IsFinite(result->end)) {
if (!Timestamp::IsFinite(result.start) || !Timestamp::IsFinite(result.end)) {
throw BinderException("RANGE with infinite bounds is not supported");
}

if (result->increment.months == 0 && result->increment.days == 0 && result->increment.micros == 0) {
if (result.increment.months == 0 && result.increment.days == 0 && result.increment.micros == 0) {
throw BinderException("interval cannot be 0!");
}
// all elements should point in the same direction
if (result->increment.months > 0 || result->increment.days > 0 || result->increment.micros > 0) {
if (result->increment.months < 0 || result->increment.days < 0 || result->increment.micros < 0) {
if (result.increment.months > 0 || result.increment.days > 0 || result.increment.micros > 0) {
if (result.increment.months < 0 || result.increment.days < 0 || result.increment.micros < 0) {
throw BinderException("RANGE with composite interval that has mixed signs is not supported");
}
result->greater_than_check = true;
if (result->start > result->end) {
result.greater_than_check = true;
if (result.start > result.end) {
throw BinderException(
"start is bigger than end, but increment is positive: cannot generate infinite series");
}
} else {
result->greater_than_check = false;
if (result->start < result->end) {
result.greater_than_check = false;
if (result.start < result.end) {
throw BinderException(
"start is smaller than end, but increment is negative: cannot generate infinite series");
}
}
return_types.push_back(inputs[0].type());
result.inclusive_bound = GENERATE_SERIES;
}

template <bool GENERATE_SERIES>
static unique_ptr<FunctionData> Bind(ClientContext &context, TableFunctionBindInput &input,
vector<LogicalType> &return_types, vector<string> &names) {
auto result = make_uniq<ICURangeBindData>(context);

return_types.push_back(LogicalType::TIMESTAMP_TZ);
if (GENERATE_SERIES) {
// generate_series has inclusive bounds on the RHS
result->inclusive_bound = true;
names.emplace_back("generate_series");
} else {
result->inclusive_bound = false;
names.emplace_back("range");
}
return std::move(result);
}

struct State : public GlobalTableFunctionState {
explicit State(timestamp_t start_p) : current_state(start_p) {
}

timestamp_t current_state;
bool finished = false;
};

static unique_ptr<GlobalTableFunctionState> Init(ClientContext &context, TableFunctionInitInput &input) {
auto &bind_data = input.bind_data->Cast<BindData>();
return make_uniq<State>(bind_data.start);
static unique_ptr<LocalTableFunctionState> RangeDateTimeLocalInit(ExecutionContext &context,
TableFunctionInitInput &input,
GlobalTableFunctionState *global_state) {
return make_uniq<ICURangeLocalState>();
}

static void ICUTableRangeFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
auto &bind_data = data_p.bind_data->Cast<BindData>();
template <bool GENERATE_SERIES>
static OperatorResultType ICUTableRangeFunction(ExecutionContext &context, TableFunctionInput &data_p,
DataChunk &input, DataChunk &output) {
auto &bind_data = data_p.bind_data->Cast<ICURangeBindData>();
auto &state = data_p.local_state->Cast<ICURangeLocalState>();
CalendarPtr calendar_ptr(bind_data.calendar->clone());
auto calendar = calendar_ptr.get();
auto &state = data_p.global_state->Cast<State>();
if (state.finished) {
return;
}

idx_t size = 0;
auto data = FlatVector::GetData<timestamp_t>(output.data[0]);
while (true) {
data[size++] = state.current_state;
state.current_state = ICUDateFunc::Add(calendar, state.current_state, bind_data.increment);
if (bind_data.Finished(state.current_state)) {
state.finished = true;
break;
if (!state.initialized_row) {
// initialize for the current input row
if (state.current_input_row >= input.size()) {
// ran out of rows
state.current_input_row = 0;
state.initialized_row = false;
return OperatorResultType::NEED_MORE_INPUT;
}
GenerateRangeDateTimeParameters<GENERATE_SERIES>(input, state.current_input_row, state);
state.initialized_row = true;
state.current_state = state.start;
}
idx_t size = 0;
auto data = FlatVector::GetData<timestamp_t>(output.data[0]);
while (true) {
if (state.Finished(state.current_state)) {
break;
}
data[size++] = state.current_state;
state.current_state = ICUDateFunc::Add(calendar, state.current_state, state.increment);
if (size >= STANDARD_VECTOR_SIZE) {
break;
}
}
if (size >= STANDARD_VECTOR_SIZE) {
break;
if (size == 0) {
// move to next row
state.current_input_row++;
state.initialized_row = false;
continue;
}
output.SetCardinality(size);
return OperatorResultType::HAVE_MORE_OUTPUT;
}
output.SetCardinality(size);
}

static void AddICUTableRangeFunction(DatabaseInstance &db) {
TableFunctionSet range("range");
range.AddFunction(TableFunction({LogicalType::TIMESTAMP_TZ, LogicalType::TIMESTAMP_TZ, LogicalType::INTERVAL},
ICUTableRangeFunction, Bind<false>, Init));
TableFunction range_function({LogicalType::TIMESTAMP_TZ, LogicalType::TIMESTAMP_TZ, LogicalType::INTERVAL},
nullptr, Bind<false>, nullptr, RangeDateTimeLocalInit);
range_function.in_out_function = ICUTableRangeFunction<false>;
range.AddFunction(range_function);
ExtensionUtil::AddFunctionOverload(db, range);

// generate_series: similar to range, but inclusive instead of exclusive bounds on the RHS
TableFunctionSet generate_series("generate_series");
generate_series.AddFunction(
TableFunction({LogicalType::TIMESTAMP_TZ, LogicalType::TIMESTAMP_TZ, LogicalType::INTERVAL},
ICUTableRangeFunction, Bind<true>, Init));
TableFunction generate_series_function(
{LogicalType::TIMESTAMP_TZ, LogicalType::TIMESTAMP_TZ, LogicalType::INTERVAL}, nullptr, Bind<true>, nullptr,
RangeDateTimeLocalInit);
generate_series_function.in_out_function = ICUTableRangeFunction<true>;
generate_series.AddFunction(generate_series_function);
ExtensionUtil::AddFunctionOverload(db, generate_series);
}
};
Expand Down
Loading

0 comments on commit 6d4d0e7

Please sign in to comment.