Skip to content

Commit

Permalink
Update vendored DuckDB sources to cb7af4d
Browse files Browse the repository at this point in the history
  • Loading branch information
duckdblabs-bot committed Oct 28, 2024
1 parent cb7af4d commit 08f2a0f
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 17 deletions.
39 changes: 32 additions & 7 deletions src/duckdb/src/core_functions/aggregate/holistic/approx_top_k.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ struct ApproxTopKValue {
uint32_t capacity = 0;
};

struct ApproxTopKState {
struct InternalApproxTopKState {
// the top-k data structure has two components
// a list of k values sorted on "count" (i.e. values[0] has the lowest count)
// a lookup map: string_t -> idx in "values" array
Expand Down Expand Up @@ -169,15 +169,34 @@ struct ApproxTopKState {
}
};

struct ApproxTopKState {
InternalApproxTopKState *state;

InternalApproxTopKState &GetState() {
if (!state) {
state = new InternalApproxTopKState();
}
return *state;
}

const InternalApproxTopKState &GetState() const {
if (!state) {
throw InternalException("No state available");
}
return *state;
}
};

struct ApproxTopKOperation {
template <class STATE>
static void Initialize(STATE &state) {
new (&state) STATE();
state.state = nullptr;
}

template <class TYPE, class STATE>
static void Operation(STATE &state, const TYPE &input, AggregateInputData &aggr_input, Vector &top_k_vector,
static void Operation(STATE &aggr_state, const TYPE &input, AggregateInputData &aggr_input, Vector &top_k_vector,
idx_t offset, idx_t count) {
auto &state = aggr_state.GetState();
if (state.values.empty()) {
static constexpr int64_t MAX_APPROX_K = 1000000;
// not initialized yet - initialize the K value and set all counters to 0
Expand Down Expand Up @@ -208,7 +227,13 @@ struct ApproxTopKOperation {
}

template <class STATE, class OP>
static void Combine(const STATE &source, STATE &target, AggregateInputData &aggr_input) {
static void Combine(const STATE &aggr_source, STATE &aggr_target, AggregateInputData &aggr_input) {
if (!aggr_source.state) {
// source state is empty
return;
}
auto &source = aggr_source.GetState();
auto &target = aggr_target.GetState();
if (source.values.empty()) {
// source is empty
return;
Expand Down Expand Up @@ -279,7 +304,7 @@ struct ApproxTopKOperation {

template <class STATE>
static void Destroy(STATE &state, AggregateInputData &aggr_input_data) {
state.~STATE();
delete state.state;
}

static bool IgnoreNull() {
Expand Down Expand Up @@ -324,7 +349,7 @@ static void ApproxTopKFinalize(Vector &state_vector, AggregateInputData &, Vecto
idx_t new_entries = 0;
// figure out how much space we need
for (idx_t i = 0; i < count; i++) {
auto &state = *states[sdata.sel->get_index(i)];
auto &state = states[sdata.sel->get_index(i)]->GetState();
if (state.values.empty()) {
continue;
}
Expand All @@ -340,7 +365,7 @@ static void ApproxTopKFinalize(Vector &state_vector, AggregateInputData &, Vecto
idx_t current_offset = old_len;
for (idx_t i = 0; i < count; i++) {
const auto rid = i + offset;
auto &state = *states[sdata.sel->get_index(i)];
auto &state = states[sdata.sel->get_index(i)]->GetState();
if (state.values.empty()) {
mask.SetInvalid(rid);
continue;
Expand Down
6 changes: 3 additions & 3 deletions src/duckdb/src/function/table/version/pragma_version.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#ifndef DUCKDB_PATCH_VERSION
#define DUCKDB_PATCH_VERSION "3-dev69"
#define DUCKDB_PATCH_VERSION "3-dev75"
#endif
#ifndef DUCKDB_MINOR_VERSION
#define DUCKDB_MINOR_VERSION 1
Expand All @@ -8,10 +8,10 @@
#define DUCKDB_MAJOR_VERSION 1
#endif
#ifndef DUCKDB_VERSION
#define DUCKDB_VERSION "v1.1.3-dev69"
#define DUCKDB_VERSION "v1.1.3-dev75"
#endif
#ifndef DUCKDB_SOURCE_ID
#define DUCKDB_SOURCE_ID "e791508e9b"
#define DUCKDB_SOURCE_ID "c73cfca650"
#endif
#include "duckdb/function/table/system_functions.hpp"
#include "duckdb/main/database.hpp"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,6 @@ class DistinctStatistics {
static constexpr double INTEGRAL_SAMPLE_RATE = 0.3;
//! For concurrent access
mutable mutex lock;
//! Preallocated vector for hashes
Vector hash_vec;
};

} // namespace duckdb
8 changes: 3 additions & 5 deletions src/duckdb/src/storage/statistics/distinct_statistics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,11 @@

namespace duckdb {

DistinctStatistics::DistinctStatistics()
: log(make_uniq<HyperLogLog>()), sample_count(0), total_count(0),
hash_vec(LogicalType::HASH, STANDARD_VECTOR_SIZE) {
DistinctStatistics::DistinctStatistics() : log(make_uniq<HyperLogLog>()), sample_count(0), total_count(0) {
}

DistinctStatistics::DistinctStatistics(unique_ptr<HyperLogLog> log, idx_t sample_count, idx_t total_count)
: log(std::move(log)), sample_count(sample_count), total_count(total_count),
hash_vec(LogicalType::HASH, STANDARD_VECTOR_SIZE) {
: log(std::move(log)), sample_count(sample_count), total_count(total_count) {
}

unique_ptr<DistinctStatistics> DistinctStatistics::Copy() const {
Expand All @@ -41,6 +38,7 @@ void DistinctStatistics::Update(Vector &v, idx_t count, bool sample) {
sample_count += count;

lock_guard<mutex> guard(lock);
Vector hash_vec(LogicalType::HASH, count);
VectorOperations::Hash(v, hash_vec, count);

UnifiedVectorFormat vdata;
Expand Down

0 comments on commit 08f2a0f

Please sign in to comment.