Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vendor: Update vendored sources to duckdb/duckdb@5d48575b112de7ad2759bad4b65bc86dd21185a2 #751

Merged
merged 1 commit into from
Dec 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ void ColumnDataCollectionSegment::ReadChunk(idx_t chunk_index, ChunkManagementSt
const vector<column_t> &column_ids) {
D_ASSERT(chunk.ColumnCount() == column_ids.size());
D_ASSERT(state.properties != ColumnDataScanProperties::INVALID);
chunk.Reset();
InitializeChunkState(chunk_index, state);
auto &chunk_meta = chunk_data[chunk_index];
for (idx_t i = 0; i < column_ids.size(); i++) {
Expand Down
2 changes: 1 addition & 1 deletion src/duckdb/src/execution/aggregate_hashtable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ void GroupedAggregateHashTable::Verify() {
}

void GroupedAggregateHashTable::ClearPointerTable() {
std::fill_n(entries, capacity, ht_entry_t::GetEmptyEntry());
std::fill_n(entries, capacity, ht_entry_t());
}

void GroupedAggregateHashTable::ResetCount() {
Expand Down
172 changes: 78 additions & 94 deletions src/duckdb/src/execution/join_hashtable.cpp

Large diffs are not rendered by default.

24 changes: 10 additions & 14 deletions src/duckdb/src/execution/operator/join/physical_hash_join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -331,18 +331,15 @@ SinkResultType PhysicalHashJoin::Sink(ExecutionContext &context, DataChunk &chun
filter_pushdown->Sink(lstate.join_keys, *lstate.local_filter_state);
}

// build the HT
auto &ht = *lstate.hash_table;
if (payload_columns.col_types.empty()) {
// there are only keys: place an empty chunk in the payload
if (payload_columns.col_types.empty()) { // there are only keys: place an empty chunk in the payload
lstate.payload_chunk.SetCardinality(chunk.size());
ht.Build(lstate.append_state, lstate.join_keys, lstate.payload_chunk);
} else {
// there are payload columns
} else { // there are payload columns
lstate.payload_chunk.ReferenceColumns(chunk, payload_columns.col_idxs);
ht.Build(lstate.append_state, lstate.join_keys, lstate.payload_chunk);
}

// build the HT
lstate.hash_table->Build(lstate.append_state, lstate.join_keys, lstate.payload_chunk);

return SinkResultType::NEED_MORE_INPUT;
}

Expand Down Expand Up @@ -816,21 +813,18 @@ OperatorResultType PhysicalHashJoin::ExecuteInternal(ExecutionContext &context,
D_ASSERT(sink.finalized);
D_ASSERT(!sink.scanned_data);

if (state.scan_structure.is_null || sink.perfect_join_executor) {
// place the lhs projected columns in the chunk
state.lhs_output.ReferenceColumns(input, lhs_output_columns.col_idxs);
}

if (sink.hash_table->Count() == 0) {
if (EmptyResultIfRHSIsEmpty()) {
return OperatorResultType::FINISHED;
}
state.lhs_output.ReferenceColumns(input, lhs_output_columns.col_idxs);
ConstructEmptyJoinResult(sink.hash_table->join_type, sink.hash_table->has_null, state.lhs_output, chunk);
return OperatorResultType::NEED_MORE_INPUT;
}

if (sink.perfect_join_executor) {
D_ASSERT(!sink.external);
state.lhs_output.ReferenceColumns(input, lhs_output_columns.col_idxs);
return sink.perfect_join_executor->ProbePerfectHashTable(context, input, state.lhs_output, chunk,
*state.perfect_hash_join_state);
}
Expand Down Expand Up @@ -858,6 +852,8 @@ OperatorResultType PhysicalHashJoin::ExecuteInternal(ExecutionContext &context,
sink.hash_table->Probe(state.scan_structure, state.lhs_join_keys, state.join_key_state, state.probe_state);
}
}

state.lhs_output.ReferenceColumns(input, lhs_output_columns.col_idxs);
state.scan_structure.Next(state.lhs_join_keys, state.lhs_output, chunk);

if (state.scan_structure.PointersExhausted() && chunk.size() == 0) {
Expand Down Expand Up @@ -1226,7 +1222,6 @@ void HashJoinLocalSourceState::ExternalProbe(HashJoinGlobalSinkState &sink, Hash
lhs_join_keys.Reset();
lhs_join_key_executor.Execute(lhs_probe_chunk, lhs_join_keys);
lhs_output.ReferenceColumns(lhs_probe_chunk, sink.op.lhs_output_columns.col_idxs);
auto precomputed_hashes = &lhs_probe_chunk.data.back();

if (sink.hash_table->Count() == 0 && !gstate.op.EmptyResultIfRHSIsEmpty()) {
gstate.op.ConstructEmptyJoinResult(sink.hash_table->join_type, sink.hash_table->has_null, lhs_output, chunk);
Expand All @@ -1235,6 +1230,7 @@ void HashJoinLocalSourceState::ExternalProbe(HashJoinGlobalSinkState &sink, Hash
}

// Perform the probe
auto precomputed_hashes = &lhs_probe_chunk.data.back();
sink.hash_table->Probe(scan_structure, lhs_join_keys, join_key_state, probe_state, precomputed_hashes);
scan_structure.Next(lhs_join_keys, lhs_output, chunk);
}
Expand Down
6 changes: 3 additions & 3 deletions src/duckdb/src/function/table/version/pragma_version.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#ifndef DUCKDB_PATCH_VERSION
#define DUCKDB_PATCH_VERSION "4-dev2447"
#define DUCKDB_PATCH_VERSION "4-dev2463"
#endif
#ifndef DUCKDB_MINOR_VERSION
#define DUCKDB_MINOR_VERSION 1
Expand All @@ -8,10 +8,10 @@
#define DUCKDB_MAJOR_VERSION 1
#endif
#ifndef DUCKDB_VERSION
#define DUCKDB_VERSION "v1.1.4-dev2447"
#define DUCKDB_VERSION "v1.1.4-dev2463"
#endif
#ifndef DUCKDB_SOURCE_ID
#define DUCKDB_SOURCE_ID "1ef91baeae"
#define DUCKDB_SOURCE_ID "5d48575b11"
#endif
#include "duckdb/function/table/system_functions.hpp"
#include "duckdb/main/database.hpp"
Expand Down
52 changes: 25 additions & 27 deletions src/duckdb/src/include/duckdb/execution/ht_entry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@

namespace duckdb {

#if !defined(DISABLE_POINTER_SALT) && defined(__ANDROID__)
// Google, why does Android need 18446744 TB of address space?
#define DISABLE_POINTER_SALT
#endif

//! The ht_entry_t struct represents an individual entry within a hash table.
/*!
This struct is used by the JoinHashTable and AggregateHashTable to store entries within the hash table. It stores
Expand All @@ -21,31 +26,38 @@ namespace duckdb {
*/
struct ht_entry_t { // NOLINT
public:
//! Upper 16 bits are salt
#ifdef DISABLE_POINTER_SALT
//! No salt, all pointer
static constexpr const hash_t SALT_MASK = 0x0000000000000000;
static constexpr const hash_t POINTER_MASK = 0xFFFFFFFFFFFFFFFF;
#else
//! Upper 16 bits are salt, lower 48 bits are the pointer
static constexpr const hash_t SALT_MASK = 0xFFFF000000000000;
//! Lower 48 bits are the pointer
static constexpr const hash_t POINTER_MASK = 0x0000FFFFFFFFFFFF;
#endif

explicit inline ht_entry_t(hash_t value_p) noexcept : value(value_p) {
ht_entry_t() noexcept : value(0) {
}

// Add a default constructor for 32-bit linux test case
ht_entry_t() noexcept : value(0) {
explicit ht_entry_t(hash_t value_p) noexcept : value(value_p) {
}

inline bool IsOccupied() const {
return value != 0;
ht_entry_t(const hash_t &salt, const data_ptr_t &pointer)
: value(cast_pointer_to_uint64(pointer) | (salt & SALT_MASK)) {
}

// Returns a pointer based on the stored value without checking cell occupancy.
// This can return a nullptr if the cell is not occupied.
inline data_ptr_t GetPointerOrNull() const {
return cast_uint64_to_pointer(value & POINTER_MASK);
inline bool IsOccupied() const {
return value != 0;
}

// Returns a pointer based on the stored value if the cell is occupied
//! Returns a pointer based on the stored value (asserts if the cell is occupied)
inline data_ptr_t GetPointer() const {
D_ASSERT(IsOccupied());
return GetPointerOrNull();
}

//! Returns a pointer based on the stored value
inline data_ptr_t GetPointerOrNull() const {
return cast_uint64_to_pointer(value & POINTER_MASK);
}

Expand All @@ -59,15 +71,10 @@ struct ht_entry_t { // NOLINT
}

// Returns the salt, leaves upper salt bits intact, sets lower bits to all 1's
static inline hash_t ExtractSalt(hash_t hash) {
static inline hash_t ExtractSalt(const hash_t &hash) {
return hash | POINTER_MASK;
}

// Returns the salt, leaves upper salt bits intact, sets lower bits to all 0's
static inline hash_t ExtractSaltWithNulls(hash_t hash) {
return hash & SALT_MASK;
}

inline hash_t GetSalt() const {
return ExtractSalt(value);
}
Expand All @@ -81,15 +88,6 @@ struct ht_entry_t { // NOLINT
value = salt;
}

static inline ht_entry_t GetDesiredEntry(const data_ptr_t &pointer, const hash_t &salt) {
auto desired = cast_pointer_to_uint64(pointer) | (salt & SALT_MASK);
return ht_entry_t(desired);
}

static inline ht_entry_t GetEmptyEntry() {
return ht_entry_t(0);
}

private:
hash_t value;
};
Expand Down
8 changes: 3 additions & 5 deletions src/duckdb/src/include/duckdb/execution/join_hashtable.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,22 +137,20 @@ class JoinHashTable {

public:
struct SharedState {

SharedState();

// The ptrs to the row to which a key should be inserted into during building
// or matched against during probing
Vector rhs_row_locations;
Vector salt_v;

SelectionVector salt_match_sel;
SelectionVector key_no_match_sel;
};

struct ProbeState : SharedState {

ProbeState();

Vector salt_v;
Vector ht_offsets_v;
Vector ht_offsets_dense_v;

Expand Down Expand Up @@ -419,8 +417,8 @@ class JoinHashTable {
//! Build HT for the next partitioned probe round
bool PrepareExternalFinalize(const idx_t max_ht_size);
//! Probe whatever we can, sink the rest into a thread-local HT
void ProbeAndSpill(ScanStructure &scan_structure, DataChunk &keys, TupleDataChunkState &key_state,
ProbeState &probe_state, DataChunk &payload, ProbeSpill &probe_spill,
void ProbeAndSpill(ScanStructure &scan_structure, DataChunk &probe_keys, TupleDataChunkState &key_state,
ProbeState &probe_state, DataChunk &probe_chunk, ProbeSpill &probe_spill,
ProbeSpillLocalAppendState &spill_state, DataChunk &spill_chunk);

private:
Expand Down