Skip to content

Commit

Permalink
Merge pull request #106 from lnkuiper/bump_manylinux
Browse files Browse the repository at this point in the history
Fix CI
  • Loading branch information
Mytherin authored Dec 5, 2024
2 parents 3d187b9 + ef53550 commit 01f8c77
Show file tree
Hide file tree
Showing 169 changed files with 27,917 additions and 10 deletions.
2 changes: 1 addition & 1 deletion .github/actions/manylinux_2014_setup/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -108,4 +108,4 @@ runs:
uses: hendrikmuhs/[email protected] # Note: pinned due to GLIBC incompatibility in later releases
with:
key: ${{ github.job }}
save: ${{ github.ref == 'refs/heads/main' || github.repository != 'duckdb/duckdb-java' }}
save: ${{ github.ref == 'refs/heads/main' || github.repository != 'duckdb/duckdb-java' }}
6 changes: 3 additions & 3 deletions .github/workflows/Java.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
name: Java Linux (amd64)
runs-on: ubuntu-latest
container:
image: quay.io/pypa/manylinux2014_x86_64
image: quay.io/pypa/manylinux_2_28_x86_64
env:
GEN: ninja
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
Expand Down Expand Up @@ -71,7 +71,7 @@ jobs:
name: Java Linux (aarch64)
runs-on: ubuntu-latest
container:
image: ubuntu:18.04
image: ubuntu:20.04
needs: java-linux-amd64
env:
GEN: ninja
Expand Down Expand Up @@ -276,7 +276,7 @@ jobs:
runs-on: ubuntu-20.04
if: ${{ inputs.skip_tests != 'true' }}
needs: java-linux-amd64
container: quay.io/pypa/manylinux2014_x86_64
container: quay.io/pypa/manylinux_2_28_x86_64
env:
BUILD_JDBC: 1
GEN: ninja
Expand Down
6 changes: 3 additions & 3 deletions CMakeLists.txt

Large diffs are not rendered by default.

196 changes: 196 additions & 0 deletions src/duckdb/extension/core_functions/aggregate/algebraic/avg.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
#include "core_functions/aggregate/algebraic_functions.hpp"
#include "core_functions/aggregate/sum_helpers.hpp"
#include "duckdb/common/types/hugeint.hpp"
#include "duckdb/common/exception.hpp"
#include "duckdb/function/function_set.hpp"
#include "duckdb/planner/expression.hpp"

namespace duckdb {

template <class T>
struct AvgState {
uint64_t count;
T value;

void Initialize() {
this->count = 0;
}

void Combine(const AvgState<T> &other) {
this->count += other.count;
this->value += other.value;
}
};

struct KahanAvgState {
uint64_t count;
double value;
double err;

void Initialize() {
this->count = 0;
this->err = 0.0;
}

void Combine(const KahanAvgState &other) {
this->count += other.count;
KahanAddInternal(other.value, this->value, this->err);
KahanAddInternal(other.err, this->value, this->err);
}
};

struct AverageDecimalBindData : public FunctionData {
explicit AverageDecimalBindData(double scale) : scale(scale) {
}

double scale;

public:
unique_ptr<FunctionData> Copy() const override {
return make_uniq<AverageDecimalBindData>(scale);
};

bool Equals(const FunctionData &other_p) const override {
auto &other = other_p.Cast<AverageDecimalBindData>();
return scale == other.scale;
}
};

struct AverageSetOperation {
template <class STATE>
static void Initialize(STATE &state) {
state.Initialize();
}
template <class STATE>
static void Combine(const STATE &source, STATE &target, AggregateInputData &) {
target.Combine(source);
}
template <class STATE>
static void AddValues(STATE &state, idx_t count) {
state.count += count;
}
};

template <class T>
static T GetAverageDivident(uint64_t count, optional_ptr<FunctionData> bind_data) {
T divident = T(count);
if (bind_data) {
auto &avg_bind_data = bind_data->Cast<AverageDecimalBindData>();
divident *= avg_bind_data.scale;
}
return divident;
}

struct IntegerAverageOperation : public BaseSumOperation<AverageSetOperation, RegularAdd> {
template <class T, class STATE>
static void Finalize(STATE &state, T &target, AggregateFinalizeData &finalize_data) {
if (state.count == 0) {
finalize_data.ReturnNull();
} else {
double divident = GetAverageDivident<double>(state.count, finalize_data.input.bind_data);
target = double(state.value) / divident;
}
}
};

struct IntegerAverageOperationHugeint : public BaseSumOperation<AverageSetOperation, AddToHugeint> {
template <class T, class STATE>
static void Finalize(STATE &state, T &target, AggregateFinalizeData &finalize_data) {
if (state.count == 0) {
finalize_data.ReturnNull();
} else {
long double divident = GetAverageDivident<long double>(state.count, finalize_data.input.bind_data);
target = Hugeint::Cast<long double>(state.value) / divident;
}
}
};

struct HugeintAverageOperation : public BaseSumOperation<AverageSetOperation, HugeintAdd> {
template <class T, class STATE>
static void Finalize(STATE &state, T &target, AggregateFinalizeData &finalize_data) {
if (state.count == 0) {
finalize_data.ReturnNull();
} else {
long double divident = GetAverageDivident<long double>(state.count, finalize_data.input.bind_data);
target = Hugeint::Cast<long double>(state.value) / divident;
}
}
};

struct NumericAverageOperation : public BaseSumOperation<AverageSetOperation, RegularAdd> {
template <class T, class STATE>
static void Finalize(STATE &state, T &target, AggregateFinalizeData &finalize_data) {
if (state.count == 0) {
finalize_data.ReturnNull();
} else {
target = state.value / state.count;
}
}
};

struct KahanAverageOperation : public BaseSumOperation<AverageSetOperation, KahanAdd> {
template <class T, class STATE>
static void Finalize(STATE &state, T &target, AggregateFinalizeData &finalize_data) {
if (state.count == 0) {
finalize_data.ReturnNull();
} else {
target = (state.value / state.count) + (state.err / state.count);
}
}
};

AggregateFunction GetAverageAggregate(PhysicalType type) {
switch (type) {
case PhysicalType::INT16: {
return AggregateFunction::UnaryAggregate<AvgState<int64_t>, int16_t, double, IntegerAverageOperation>(
LogicalType::SMALLINT, LogicalType::DOUBLE);
}
case PhysicalType::INT32: {
return AggregateFunction::UnaryAggregate<AvgState<hugeint_t>, int32_t, double, IntegerAverageOperationHugeint>(
LogicalType::INTEGER, LogicalType::DOUBLE);
}
case PhysicalType::INT64: {
return AggregateFunction::UnaryAggregate<AvgState<hugeint_t>, int64_t, double, IntegerAverageOperationHugeint>(
LogicalType::BIGINT, LogicalType::DOUBLE);
}
case PhysicalType::INT128: {
return AggregateFunction::UnaryAggregate<AvgState<hugeint_t>, hugeint_t, double, HugeintAverageOperation>(
LogicalType::HUGEINT, LogicalType::DOUBLE);
}
default:
throw InternalException("Unimplemented average aggregate");
}
}

unique_ptr<FunctionData> BindDecimalAvg(ClientContext &context, AggregateFunction &function,
vector<unique_ptr<Expression>> &arguments) {
auto decimal_type = arguments[0]->return_type;
function = GetAverageAggregate(decimal_type.InternalType());
function.name = "avg";
function.arguments[0] = decimal_type;
function.return_type = LogicalType::DOUBLE;
return make_uniq<AverageDecimalBindData>(
Hugeint::Cast<double>(Hugeint::POWERS_OF_TEN[DecimalType::GetScale(decimal_type)]));
}

AggregateFunctionSet AvgFun::GetFunctions() {
AggregateFunctionSet avg;

avg.AddFunction(AggregateFunction({LogicalTypeId::DECIMAL}, LogicalTypeId::DECIMAL, nullptr, nullptr, nullptr,
nullptr, nullptr, FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr,
BindDecimalAvg));
avg.AddFunction(GetAverageAggregate(PhysicalType::INT16));
avg.AddFunction(GetAverageAggregate(PhysicalType::INT32));
avg.AddFunction(GetAverageAggregate(PhysicalType::INT64));
avg.AddFunction(GetAverageAggregate(PhysicalType::INT128));
avg.AddFunction(AggregateFunction::UnaryAggregate<AvgState<double>, double, double, NumericAverageOperation>(
LogicalType::DOUBLE, LogicalType::DOUBLE));
return avg;
}

AggregateFunction FAvgFun::GetFunction() {
return AggregateFunction::UnaryAggregate<KahanAvgState, double, double, KahanAverageOperation>(LogicalType::DOUBLE,
LogicalType::DOUBLE);
}

} // namespace duckdb
13 changes: 13 additions & 0 deletions src/duckdb/extension/core_functions/aggregate/algebraic/corr.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include "core_functions/aggregate/algebraic_functions.hpp"
#include "core_functions/aggregate/algebraic/covar.hpp"
#include "core_functions/aggregate/algebraic/stddev.hpp"
#include "core_functions/aggregate/algebraic/corr.hpp"
#include "duckdb/function/function_set.hpp"

namespace duckdb {

AggregateFunction CorrFun::GetFunction() {
return AggregateFunction::BinaryAggregate<CorrState, double, double, double, CorrOperation>(
LogicalType::DOUBLE, LogicalType::DOUBLE, LogicalType::DOUBLE);
}
} // namespace duckdb
17 changes: 17 additions & 0 deletions src/duckdb/extension/core_functions/aggregate/algebraic/covar.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#include "core_functions/aggregate/algebraic_functions.hpp"
#include "duckdb/common/types/null_value.hpp"
#include "core_functions/aggregate/algebraic/covar.hpp"

namespace duckdb {

AggregateFunction CovarPopFun::GetFunction() {
return AggregateFunction::BinaryAggregate<CovarState, double, double, double, CovarPopOperation>(
LogicalType::DOUBLE, LogicalType::DOUBLE, LogicalType::DOUBLE);
}

AggregateFunction CovarSampFun::GetFunction() {
return AggregateFunction::BinaryAggregate<CovarState, double, double, double, CovarSampOperation>(
LogicalType::DOUBLE, LogicalType::DOUBLE, LogicalType::DOUBLE);
}

} // namespace duckdb
34 changes: 34 additions & 0 deletions src/duckdb/extension/core_functions/aggregate/algebraic/stddev.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#include "core_functions/aggregate/algebraic_functions.hpp"
#include "duckdb/common/vector_operations/vector_operations.hpp"
#include "duckdb/function/function_set.hpp"
#include "core_functions/aggregate/algebraic/stddev.hpp"
#include <cmath>

namespace duckdb {

AggregateFunction StdDevSampFun::GetFunction() {
return AggregateFunction::UnaryAggregate<StddevState, double, double, STDDevSampOperation>(LogicalType::DOUBLE,
LogicalType::DOUBLE);
}

AggregateFunction StdDevPopFun::GetFunction() {
return AggregateFunction::UnaryAggregate<StddevState, double, double, STDDevPopOperation>(LogicalType::DOUBLE,
LogicalType::DOUBLE);
}

AggregateFunction VarPopFun::GetFunction() {
return AggregateFunction::UnaryAggregate<StddevState, double, double, VarPopOperation>(LogicalType::DOUBLE,
LogicalType::DOUBLE);
}

AggregateFunction VarSampFun::GetFunction() {
return AggregateFunction::UnaryAggregate<StddevState, double, double, VarSampOperation>(LogicalType::DOUBLE,
LogicalType::DOUBLE);
}

AggregateFunction StandardErrorOfTheMeanFun::GetFunction() {
return AggregateFunction::UnaryAggregate<StddevState, double, double, StandardErrorOfTheMeanOperation>(
LogicalType::DOUBLE, LogicalType::DOUBLE);
}

} // namespace duckdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#include "duckdb/common/exception.hpp"
#include "duckdb/common/types/hash.hpp"
#include "duckdb/common/types/hyperloglog.hpp"
#include "core_functions/aggregate/distributive_functions.hpp"
#include "duckdb/function/function_set.hpp"
#include "duckdb/planner/expression/bound_aggregate_expression.hpp"
#include "hyperloglog.hpp"

namespace duckdb {

// Algorithms from
// "New cardinality estimation algorithms for HyperLogLog sketches"
// Otmar Ertl, arXiv:1702.01284
struct ApproxDistinctCountState {
HyperLogLog hll;
};

struct ApproxCountDistinctFunction {
template <class STATE>
static void Initialize(STATE &state) {
new (&state) STATE();
}

template <class STATE, class OP>
static void Combine(const STATE &source, STATE &target, AggregateInputData &) {
target.hll.Merge(source.hll);
}

template <class T, class STATE>
static void Finalize(STATE &state, T &target, AggregateFinalizeData &finalize_data) {
target = UnsafeNumericCast<T>(state.hll.Count());
}

static bool IgnoreNull() {
return true;
}
};

static void ApproxCountDistinctSimpleUpdateFunction(Vector inputs[], AggregateInputData &, idx_t input_count,
data_ptr_t state, idx_t count) {
D_ASSERT(input_count == 1);
auto &input = inputs[0];

if (count > STANDARD_VECTOR_SIZE) {
throw InternalException("ApproxCountDistinct - count must be at most vector size");
}
Vector hash_vec(LogicalType::HASH, count);
VectorOperations::Hash(input, hash_vec, count);

auto agg_state = reinterpret_cast<ApproxDistinctCountState *>(state);
agg_state->hll.Update(input, hash_vec, count);
}

static void ApproxCountDistinctUpdateFunction(Vector inputs[], AggregateInputData &, idx_t input_count,
Vector &state_vector, idx_t count) {
D_ASSERT(input_count == 1);
auto &input = inputs[0];
UnifiedVectorFormat idata;
input.ToUnifiedFormat(count, idata);

if (count > STANDARD_VECTOR_SIZE) {
throw InternalException("ApproxCountDistinct - count must be at most vector size");
}
Vector hash_vec(LogicalType::HASH, count);
VectorOperations::Hash(input, hash_vec, count);

UnifiedVectorFormat sdata;
state_vector.ToUnifiedFormat(count, sdata);
const auto states = UnifiedVectorFormat::GetDataNoConst<ApproxDistinctCountState *>(sdata);

UnifiedVectorFormat hdata;
hash_vec.ToUnifiedFormat(count, hdata);
const auto *hashes = UnifiedVectorFormat::GetData<hash_t>(hdata);
for (idx_t i = 0; i < count; i++) {
if (idata.validity.RowIsValid(idata.sel->get_index(i))) {
auto agg_state = states[sdata.sel->get_index(i)];
const auto hash = hashes[hdata.sel->get_index(i)];
agg_state->hll.InsertElement(hash);
}
}
}

AggregateFunction GetApproxCountDistinctFunction(const LogicalType &input_type) {
auto fun = AggregateFunction(
{input_type}, LogicalTypeId::BIGINT, AggregateFunction::StateSize<ApproxDistinctCountState>,
AggregateFunction::StateInitialize<ApproxDistinctCountState, ApproxCountDistinctFunction>,
ApproxCountDistinctUpdateFunction,
AggregateFunction::StateCombine<ApproxDistinctCountState, ApproxCountDistinctFunction>,
AggregateFunction::StateFinalize<ApproxDistinctCountState, int64_t, ApproxCountDistinctFunction>,
ApproxCountDistinctSimpleUpdateFunction);
fun.null_handling = FunctionNullHandling::SPECIAL_HANDLING;
return fun;
}

AggregateFunction ApproxCountDistinctFun::GetFunction() {
return GetApproxCountDistinctFunction(LogicalType::ANY);
}

} // namespace duckdb
Loading

0 comments on commit 01f8c77

Please sign in to comment.