Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix CI #106

Merged
merged 7 commits into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/actions/manylinux_2014_setup/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -108,4 +108,4 @@ runs:
uses: hendrikmuhs/[email protected] # Note: pinned due to GLIBC incompatibility in later releases
with:
key: ${{ github.job }}
save: ${{ github.ref == 'refs/heads/main' || github.repository != 'duckdb/duckdb-java' }}
save: ${{ github.ref == 'refs/heads/main' || github.repository != 'duckdb/duckdb-java' }}
6 changes: 3 additions & 3 deletions .github/workflows/Java.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
name: Java Linux (amd64)
runs-on: ubuntu-latest
container:
image: quay.io/pypa/manylinux2014_x86_64
image: quay.io/pypa/manylinux_2_28_x86_64
env:
GEN: ninja
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
Expand Down Expand Up @@ -71,7 +71,7 @@ jobs:
name: Java Linux (aarch64)
runs-on: ubuntu-latest
container:
image: ubuntu:18.04
image: ubuntu:20.04
needs: java-linux-amd64
env:
GEN: ninja
Expand Down Expand Up @@ -276,7 +276,7 @@ jobs:
runs-on: ubuntu-20.04
if: ${{ inputs.skip_tests != 'true' }}
needs: java-linux-amd64
container: quay.io/pypa/manylinux2014_x86_64
container: quay.io/pypa/manylinux_2_28_x86_64
env:
BUILD_JDBC: 1
GEN: ninja
Expand Down
6 changes: 3 additions & 3 deletions CMakeLists.txt

Large diffs are not rendered by default.

196 changes: 196 additions & 0 deletions src/duckdb/extension/core_functions/aggregate/algebraic/avg.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
#include "core_functions/aggregate/algebraic_functions.hpp"
#include "core_functions/aggregate/sum_helpers.hpp"
#include "duckdb/common/types/hugeint.hpp"
#include "duckdb/common/exception.hpp"
#include "duckdb/function/function_set.hpp"
#include "duckdb/planner/expression.hpp"

namespace duckdb {

template <class T>
struct AvgState {
uint64_t count;
T value;

void Initialize() {
this->count = 0;
}

void Combine(const AvgState<T> &other) {
this->count += other.count;
this->value += other.value;
}
};

struct KahanAvgState {
uint64_t count;
double value;
double err;

void Initialize() {
this->count = 0;
this->err = 0.0;
}

void Combine(const KahanAvgState &other) {
this->count += other.count;
KahanAddInternal(other.value, this->value, this->err);
KahanAddInternal(other.err, this->value, this->err);
}
};

struct AverageDecimalBindData : public FunctionData {
explicit AverageDecimalBindData(double scale) : scale(scale) {
}

double scale;

public:
unique_ptr<FunctionData> Copy() const override {
return make_uniq<AverageDecimalBindData>(scale);
};

bool Equals(const FunctionData &other_p) const override {
auto &other = other_p.Cast<AverageDecimalBindData>();
return scale == other.scale;
}
};

struct AverageSetOperation {
template <class STATE>
static void Initialize(STATE &state) {
state.Initialize();
}
template <class STATE>
static void Combine(const STATE &source, STATE &target, AggregateInputData &) {
target.Combine(source);
}
template <class STATE>
static void AddValues(STATE &state, idx_t count) {
state.count += count;
}
};

template <class T>
static T GetAverageDivident(uint64_t count, optional_ptr<FunctionData> bind_data) {
T divident = T(count);
if (bind_data) {
auto &avg_bind_data = bind_data->Cast<AverageDecimalBindData>();
divident *= avg_bind_data.scale;
}
return divident;
}

struct IntegerAverageOperation : public BaseSumOperation<AverageSetOperation, RegularAdd> {
template <class T, class STATE>
static void Finalize(STATE &state, T &target, AggregateFinalizeData &finalize_data) {
if (state.count == 0) {
finalize_data.ReturnNull();
} else {
double divident = GetAverageDivident<double>(state.count, finalize_data.input.bind_data);
target = double(state.value) / divident;
}
}
};

struct IntegerAverageOperationHugeint : public BaseSumOperation<AverageSetOperation, AddToHugeint> {
template <class T, class STATE>
static void Finalize(STATE &state, T &target, AggregateFinalizeData &finalize_data) {
if (state.count == 0) {
finalize_data.ReturnNull();
} else {
long double divident = GetAverageDivident<long double>(state.count, finalize_data.input.bind_data);
target = Hugeint::Cast<long double>(state.value) / divident;
}
}
};

struct HugeintAverageOperation : public BaseSumOperation<AverageSetOperation, HugeintAdd> {
template <class T, class STATE>
static void Finalize(STATE &state, T &target, AggregateFinalizeData &finalize_data) {
if (state.count == 0) {
finalize_data.ReturnNull();
} else {
long double divident = GetAverageDivident<long double>(state.count, finalize_data.input.bind_data);
target = Hugeint::Cast<long double>(state.value) / divident;
}
}
};

struct NumericAverageOperation : public BaseSumOperation<AverageSetOperation, RegularAdd> {
template <class T, class STATE>
static void Finalize(STATE &state, T &target, AggregateFinalizeData &finalize_data) {
if (state.count == 0) {
finalize_data.ReturnNull();
} else {
target = state.value / state.count;
}
}
};

struct KahanAverageOperation : public BaseSumOperation<AverageSetOperation, KahanAdd> {
template <class T, class STATE>
static void Finalize(STATE &state, T &target, AggregateFinalizeData &finalize_data) {
if (state.count == 0) {
finalize_data.ReturnNull();
} else {
target = (state.value / state.count) + (state.err / state.count);
}
}
};

AggregateFunction GetAverageAggregate(PhysicalType type) {
switch (type) {
case PhysicalType::INT16: {
return AggregateFunction::UnaryAggregate<AvgState<int64_t>, int16_t, double, IntegerAverageOperation>(
LogicalType::SMALLINT, LogicalType::DOUBLE);
}
case PhysicalType::INT32: {
return AggregateFunction::UnaryAggregate<AvgState<hugeint_t>, int32_t, double, IntegerAverageOperationHugeint>(
LogicalType::INTEGER, LogicalType::DOUBLE);
}
case PhysicalType::INT64: {
return AggregateFunction::UnaryAggregate<AvgState<hugeint_t>, int64_t, double, IntegerAverageOperationHugeint>(
LogicalType::BIGINT, LogicalType::DOUBLE);
}
case PhysicalType::INT128: {
return AggregateFunction::UnaryAggregate<AvgState<hugeint_t>, hugeint_t, double, HugeintAverageOperation>(
LogicalType::HUGEINT, LogicalType::DOUBLE);
}
default:
throw InternalException("Unimplemented average aggregate");
}
}

unique_ptr<FunctionData> BindDecimalAvg(ClientContext &context, AggregateFunction &function,
vector<unique_ptr<Expression>> &arguments) {
auto decimal_type = arguments[0]->return_type;
function = GetAverageAggregate(decimal_type.InternalType());
function.name = "avg";
function.arguments[0] = decimal_type;
function.return_type = LogicalType::DOUBLE;
return make_uniq<AverageDecimalBindData>(
Hugeint::Cast<double>(Hugeint::POWERS_OF_TEN[DecimalType::GetScale(decimal_type)]));
}

AggregateFunctionSet AvgFun::GetFunctions() {
AggregateFunctionSet avg;

avg.AddFunction(AggregateFunction({LogicalTypeId::DECIMAL}, LogicalTypeId::DECIMAL, nullptr, nullptr, nullptr,
nullptr, nullptr, FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr,
BindDecimalAvg));
avg.AddFunction(GetAverageAggregate(PhysicalType::INT16));
avg.AddFunction(GetAverageAggregate(PhysicalType::INT32));
avg.AddFunction(GetAverageAggregate(PhysicalType::INT64));
avg.AddFunction(GetAverageAggregate(PhysicalType::INT128));
avg.AddFunction(AggregateFunction::UnaryAggregate<AvgState<double>, double, double, NumericAverageOperation>(
LogicalType::DOUBLE, LogicalType::DOUBLE));
return avg;
}

AggregateFunction FAvgFun::GetFunction() {
return AggregateFunction::UnaryAggregate<KahanAvgState, double, double, KahanAverageOperation>(LogicalType::DOUBLE,
LogicalType::DOUBLE);
}

} // namespace duckdb
13 changes: 13 additions & 0 deletions src/duckdb/extension/core_functions/aggregate/algebraic/corr.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include "core_functions/aggregate/algebraic_functions.hpp"
#include "core_functions/aggregate/algebraic/covar.hpp"
#include "core_functions/aggregate/algebraic/stddev.hpp"
#include "core_functions/aggregate/algebraic/corr.hpp"
#include "duckdb/function/function_set.hpp"

namespace duckdb {

AggregateFunction CorrFun::GetFunction() {
return AggregateFunction::BinaryAggregate<CorrState, double, double, double, CorrOperation>(
LogicalType::DOUBLE, LogicalType::DOUBLE, LogicalType::DOUBLE);
}
} // namespace duckdb
17 changes: 17 additions & 0 deletions src/duckdb/extension/core_functions/aggregate/algebraic/covar.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#include "core_functions/aggregate/algebraic_functions.hpp"
#include "duckdb/common/types/null_value.hpp"
#include "core_functions/aggregate/algebraic/covar.hpp"

namespace duckdb {

AggregateFunction CovarPopFun::GetFunction() {
return AggregateFunction::BinaryAggregate<CovarState, double, double, double, CovarPopOperation>(
LogicalType::DOUBLE, LogicalType::DOUBLE, LogicalType::DOUBLE);
}

AggregateFunction CovarSampFun::GetFunction() {
return AggregateFunction::BinaryAggregate<CovarState, double, double, double, CovarSampOperation>(
LogicalType::DOUBLE, LogicalType::DOUBLE, LogicalType::DOUBLE);
}

} // namespace duckdb
34 changes: 34 additions & 0 deletions src/duckdb/extension/core_functions/aggregate/algebraic/stddev.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#include "core_functions/aggregate/algebraic_functions.hpp"
#include "duckdb/common/vector_operations/vector_operations.hpp"
#include "duckdb/function/function_set.hpp"
#include "core_functions/aggregate/algebraic/stddev.hpp"
#include <cmath>

namespace duckdb {

AggregateFunction StdDevSampFun::GetFunction() {
return AggregateFunction::UnaryAggregate<StddevState, double, double, STDDevSampOperation>(LogicalType::DOUBLE,
LogicalType::DOUBLE);
}

AggregateFunction StdDevPopFun::GetFunction() {
return AggregateFunction::UnaryAggregate<StddevState, double, double, STDDevPopOperation>(LogicalType::DOUBLE,
LogicalType::DOUBLE);
}

AggregateFunction VarPopFun::GetFunction() {
return AggregateFunction::UnaryAggregate<StddevState, double, double, VarPopOperation>(LogicalType::DOUBLE,
LogicalType::DOUBLE);
}

AggregateFunction VarSampFun::GetFunction() {
return AggregateFunction::UnaryAggregate<StddevState, double, double, VarSampOperation>(LogicalType::DOUBLE,
LogicalType::DOUBLE);
}

AggregateFunction StandardErrorOfTheMeanFun::GetFunction() {
return AggregateFunction::UnaryAggregate<StddevState, double, double, StandardErrorOfTheMeanOperation>(
LogicalType::DOUBLE, LogicalType::DOUBLE);
}

} // namespace duckdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#include "duckdb/common/exception.hpp"
#include "duckdb/common/types/hash.hpp"
#include "duckdb/common/types/hyperloglog.hpp"
#include "core_functions/aggregate/distributive_functions.hpp"
#include "duckdb/function/function_set.hpp"
#include "duckdb/planner/expression/bound_aggregate_expression.hpp"
#include "hyperloglog.hpp"

namespace duckdb {

// Algorithms from
// "New cardinality estimation algorithms for HyperLogLog sketches"
// Otmar Ertl, arXiv:1702.01284
struct ApproxDistinctCountState {
HyperLogLog hll;
};

struct ApproxCountDistinctFunction {
template <class STATE>
static void Initialize(STATE &state) {
new (&state) STATE();
}

template <class STATE, class OP>
static void Combine(const STATE &source, STATE &target, AggregateInputData &) {
target.hll.Merge(source.hll);
}

template <class T, class STATE>
static void Finalize(STATE &state, T &target, AggregateFinalizeData &finalize_data) {
target = UnsafeNumericCast<T>(state.hll.Count());
}

static bool IgnoreNull() {
return true;
}
};

static void ApproxCountDistinctSimpleUpdateFunction(Vector inputs[], AggregateInputData &, idx_t input_count,
data_ptr_t state, idx_t count) {
D_ASSERT(input_count == 1);
auto &input = inputs[0];

if (count > STANDARD_VECTOR_SIZE) {
throw InternalException("ApproxCountDistinct - count must be at most vector size");
}
Vector hash_vec(LogicalType::HASH, count);
VectorOperations::Hash(input, hash_vec, count);

auto agg_state = reinterpret_cast<ApproxDistinctCountState *>(state);
agg_state->hll.Update(input, hash_vec, count);
}

static void ApproxCountDistinctUpdateFunction(Vector inputs[], AggregateInputData &, idx_t input_count,
Vector &state_vector, idx_t count) {
D_ASSERT(input_count == 1);
auto &input = inputs[0];
UnifiedVectorFormat idata;
input.ToUnifiedFormat(count, idata);

if (count > STANDARD_VECTOR_SIZE) {
throw InternalException("ApproxCountDistinct - count must be at most vector size");
}
Vector hash_vec(LogicalType::HASH, count);
VectorOperations::Hash(input, hash_vec, count);

UnifiedVectorFormat sdata;
state_vector.ToUnifiedFormat(count, sdata);
const auto states = UnifiedVectorFormat::GetDataNoConst<ApproxDistinctCountState *>(sdata);

UnifiedVectorFormat hdata;
hash_vec.ToUnifiedFormat(count, hdata);
const auto *hashes = UnifiedVectorFormat::GetData<hash_t>(hdata);
for (idx_t i = 0; i < count; i++) {
if (idata.validity.RowIsValid(idata.sel->get_index(i))) {
auto agg_state = states[sdata.sel->get_index(i)];
const auto hash = hashes[hdata.sel->get_index(i)];
agg_state->hll.InsertElement(hash);
}
}
}

AggregateFunction GetApproxCountDistinctFunction(const LogicalType &input_type) {
auto fun = AggregateFunction(
{input_type}, LogicalTypeId::BIGINT, AggregateFunction::StateSize<ApproxDistinctCountState>,
AggregateFunction::StateInitialize<ApproxDistinctCountState, ApproxCountDistinctFunction>,
ApproxCountDistinctUpdateFunction,
AggregateFunction::StateCombine<ApproxDistinctCountState, ApproxCountDistinctFunction>,
AggregateFunction::StateFinalize<ApproxDistinctCountState, int64_t, ApproxCountDistinctFunction>,
ApproxCountDistinctSimpleUpdateFunction);
fun.null_handling = FunctionNullHandling::SPECIAL_HANDLING;
return fun;
}

AggregateFunction ApproxCountDistinctFun::GetFunction() {
return GetApproxCountDistinctFunction(LogicalType::ANY);
}

} // namespace duckdb
Loading
Loading