Skip to content

Commit

Permalink
Update vendored DuckDB sources to d7c8414
Browse files Browse the repository at this point in the history
  • Loading branch information
duckdblabs-bot committed Dec 11, 2024
1 parent d7c8414 commit fe39c1a
Show file tree
Hide file tree
Showing 43 changed files with 611 additions and 374 deletions.
18 changes: 12 additions & 6 deletions src/duckdb/extension/core_functions/aggregate/holistic/mode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ struct ModeState {
//! The collection being read
const ColumnDataCollection *inputs;
//! The state used for reading the collection on this thread
ColumnDataScanState scan;
ColumnDataScanState *scan = nullptr;
//! The data chunk paged into into
DataChunk page;
//! The data pointer
Expand All @@ -93,31 +93,37 @@ struct ModeState {
if (mode) {
delete mode;
}
if (scan) {
delete scan;
}
}

void InitializePage(const WindowPartitionInput &partition) {
if (!scan) {
scan = new ColumnDataScanState();
}
if (page.ColumnCount() == 0) {
D_ASSERT(partition.inputs);
inputs = partition.inputs;
D_ASSERT(partition.column_ids.size() == 1);
inputs->InitializeScan(scan, partition.column_ids);
inputs->InitializeScanChunk(scan, page);
inputs->InitializeScan(*scan, partition.column_ids);
inputs->InitializeScanChunk(*scan, page);
}
}

inline sel_t RowOffset(idx_t row_idx) const {
D_ASSERT(RowIsVisible(row_idx));
return UnsafeNumericCast<sel_t>(row_idx - scan.current_row_index);
return UnsafeNumericCast<sel_t>(row_idx - scan->current_row_index);
}

inline bool RowIsVisible(idx_t row_idx) const {
return (row_idx < scan.next_row_index && scan.current_row_index <= row_idx);
return (row_idx < scan->next_row_index && scan->current_row_index <= row_idx);
}

inline idx_t Seek(idx_t row_idx) {
if (!RowIsVisible(row_idx)) {
D_ASSERT(inputs);
inputs->Seek(row_idx, scan, page);
inputs->Seek(row_idx, *scan, page);
data = FlatVector::GetData<KEY_TYPE>(page.data[0]);
validity = &FlatVector::Validity(page.data[0]);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,9 @@ static AggregateFunction GetHistogramBinFunction(const LogicalType &type) {

template <class HIST>
AggregateFunction GetHistogramBinFunction(const LogicalType &type) {
if (type.id() == LogicalTypeId::DECIMAL) {
return GetHistogramBinFunction<HIST>(LogicalType::DOUBLE);
}
switch (type.InternalType()) {
#ifndef DUCKDB_SMALLER_BINARY
case PhysicalType::BOOL:
Expand Down
6 changes: 4 additions & 2 deletions src/duckdb/extension/parquet/column_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2119,8 +2119,9 @@ void ArrayColumnWriter::Write(ColumnWriterState &state_p, Vector &vector, idx_t
struct double_na_equal {
double_na_equal() : val(0) {
}
double_na_equal(const double val_p) : val(val_p) {
explicit double_na_equal(const double val_p) : val(val_p) {
}
// NOLINTNEXTLINE: allow implicit conversion to double
operator double() const {
return val;
}
Expand All @@ -2137,8 +2138,9 @@ struct double_na_equal {
struct float_na_equal {
float_na_equal() : val(0) {
}
float_na_equal(const float val_p) : val(val_p) {
explicit float_na_equal(const float val_p) : val(val_p) {
}
// NOLINTNEXTLINE: allow implicit conversion to float
operator float() const {
return val;
}
Expand Down
2 changes: 1 addition & 1 deletion src/duckdb/extension/parquet/parquet_metadata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -641,7 +641,7 @@ void ParquetMetaDataOperatorData::ExecuteBloomProbe(ClientContext &context, cons
auto bloom_excludes =
ParquetStatisticsUtils::BloomFilterExcludes(filter, column.meta_data, *protocol, allocator);
current_chunk.SetValue(0, count, Value(file_path));
current_chunk.SetValue(1, count, Value::BIGINT(row_group_idx));
current_chunk.SetValue(1, count, Value::BIGINT(NumericCast<int64_t>(row_group_idx)));
current_chunk.SetValue(2, count, Value::BOOLEAN(bloom_excludes));

count++;
Expand Down
10 changes: 5 additions & 5 deletions src/duckdb/extension/parquet/parquet_statistics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,7 @@ ParquetBloomFilter::ParquetBloomFilter(idx_t num_entries, double bloom_filter_fa
// see http://tfk.mit.edu/pdf/bloom.pdf
double f = bloom_filter_false_positive_ratio;
double k = 8.0;
double n = num_entries;
double n = LossyNumericCast<double>(num_entries);
double m = -k * n / std::log(1 - std::pow(f, 1 / k));
auto b = MaxValue<idx_t>(NextPowerOfTwo(m / k) / 32, 1);

Expand All @@ -573,14 +573,14 @@ ParquetBloomFilter::ParquetBloomFilter(unique_ptr<ResizeableBuffer> data_p) {
}

void ParquetBloomFilter::FilterInsert(uint64_t x) {
auto blocks = (ParquetBloomBlock *)(data->ptr);
auto blocks = reinterpret_cast<ParquetBloomBlock *>(data->ptr);
uint64_t i = ((x >> 32) * block_count) >> 32;
auto &b = blocks[i];
ParquetBloomBlock::BlockInsert(b, x);
}

bool ParquetBloomFilter::FilterCheck(uint64_t x) {
auto blocks = (ParquetBloomBlock *)(data->ptr);
auto blocks = reinterpret_cast<ParquetBloomBlock *>(data->ptr);
auto i = ((x >> 32) * block_count) >> 32;
return ParquetBloomBlock::BlockCheck(blocks[i], x);
}
Expand All @@ -595,12 +595,12 @@ static uint8_t PopCnt64(uint64_t n) {
}

double ParquetBloomFilter::OneRatio() {
auto bloom_ptr = (uint64_t *)data->ptr;
auto bloom_ptr = reinterpret_cast<uint64_t *>(data->ptr);
idx_t one_count = 0;
for (idx_t b_idx = 0; b_idx < data->len / sizeof(uint64_t); ++b_idx) {
one_count += PopCnt64(bloom_ptr[b_idx]);
}
return one_count / (data->len * 8.0);
return LossyNumericCast<double>(one_count) / (data->len * 8.0);
}

ResizeableBuffer *ParquetBloomFilter::Get() {
Expand Down
7 changes: 4 additions & 3 deletions src/duckdb/extension/parquet/parquet_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,7 @@ void ParquetWriter::Finalize() {
// write nonsense bloom filter header
duckdb_parquet::BloomFilterHeader filter_header;
auto bloom_filter_bytes = bloom_filter_entry.bloom_filter->Get();
filter_header.numBytes = bloom_filter_bytes->len;
filter_header.numBytes = NumericCast<int32_t>(bloom_filter_bytes->len);
filter_header.algorithm.__set_BLOCK(duckdb_parquet::SplitBlockAlgorithm());
filter_header.compression.__set_UNCOMPRESSED(duckdb_parquet::Uncompressed());
filter_header.hash.__set_XXHASH(duckdb_parquet::XxHash());
Expand All @@ -544,14 +544,15 @@ void ParquetWriter::Finalize() {
file_meta_data.row_groups[bloom_filter_entry.row_group_idx].columns[bloom_filter_entry.column_idx];

column_chunk.meta_data.__isset.bloom_filter_offset = true;
column_chunk.meta_data.bloom_filter_offset = writer->GetTotalWritten();
column_chunk.meta_data.bloom_filter_offset = NumericCast<int64_t>(writer->GetTotalWritten());

auto bloom_filter_header_size = Write(filter_header);
// write actual data
WriteData(bloom_filter_bytes->ptr, bloom_filter_bytes->len);

column_chunk.meta_data.__isset.bloom_filter_length = true;
column_chunk.meta_data.bloom_filter_length = bloom_filter_header_size + bloom_filter_bytes->len;
column_chunk.meta_data.bloom_filter_length =
NumericCast<int32_t>(bloom_filter_header_size + bloom_filter_bytes->len);
}

const auto metadata_start_offset = writer->GetTotalWritten();
Expand Down
6 changes: 3 additions & 3 deletions src/duckdb/src/common/fsst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ string_t FSSTPrimitives::DecompressValue(void *duckdb_fsst_decoder, Vector &resu
decompressed_string_size);
}

Value FSSTPrimitives::DecompressValue(void *duckdb_fsst_decoder, const char *compressed_string,
const idx_t compressed_string_len, vector<unsigned char> &decompress_buffer) {
string FSSTPrimitives::DecompressValue(void *duckdb_fsst_decoder, const char *compressed_string,
const idx_t compressed_string_len, vector<unsigned char> &decompress_buffer) {

auto compressed_string_ptr = (unsigned char *)compressed_string; // NOLINT
auto fsst_decoder = reinterpret_cast<duckdb_fsst_decoder_t *>(duckdb_fsst_decoder);
Expand All @@ -30,7 +30,7 @@ Value FSSTPrimitives::DecompressValue(void *duckdb_fsst_decoder, const char *com

D_ASSERT(!decompress_buffer.empty());
D_ASSERT(decompressed_string_size <= decompress_buffer.size() - 1);
return Value(string(char_ptr_cast(decompress_buffer.data()), decompressed_string_size));
return string(char_ptr_cast(decompress_buffer.data()), decompressed_string_size);
}

} // namespace duckdb
2 changes: 1 addition & 1 deletion src/duckdb/src/common/operator/cast_operators.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2298,7 +2298,7 @@ bool DoubleToDecimalCast(SRC input, DST &result, CastParameters &parameters, uin
double roundedValue = round(value);
if (roundedValue <= -NumericHelper::DOUBLE_POWERS_OF_TEN[width] ||
roundedValue >= NumericHelper::DOUBLE_POWERS_OF_TEN[width]) {
string error = StringUtil::Format("Could not cast value %f to DECIMAL(%d,%d)", value, width, scale);
string error = StringUtil::Format("Could not cast value %f to DECIMAL(%d,%d)", input, width, scale);
HandleCastError::AssignError(error, parameters);
return false;
}
Expand Down
2 changes: 2 additions & 0 deletions src/duckdb/src/common/types/row/tuple_data_collection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ void TupleDataCollection::InitializeChunkState(TupleDataChunkState &chunk_state,
}
InitializeVectorFormat(chunk_state.vector_data, types);

chunk_state.cached_cast_vectors.clear();
chunk_state.cached_cast_vector_cache.clear();
for (auto &col : column_ids) {
auto &type = types[col];
if (TypeVisitor::Contains(type, LogicalTypeId::ARRAY)) {
Expand Down
13 changes: 10 additions & 3 deletions src/duckdb/src/common/types/vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -601,9 +601,16 @@ Value Vector::GetValueInternal(const Vector &v_p, idx_t index_p) {
auto str_compressed = reinterpret_cast<string_t *>(data)[index];
auto decoder = FSSTVector::GetDecoder(*vector);
auto &decompress_buffer = FSSTVector::GetDecompressBuffer(*vector);
Value result = FSSTPrimitives::DecompressValue(decoder, str_compressed.GetData(), str_compressed.GetSize(),
decompress_buffer);
return result;
auto string_val = FSSTPrimitives::DecompressValue(decoder, str_compressed.GetData(), str_compressed.GetSize(),
decompress_buffer);
switch (vector->GetType().id()) {
case LogicalTypeId::VARCHAR:
return Value(std::move(string_val));
case LogicalTypeId::BLOB:
return Value::BLOB_RAW(string_val);
default:
throw InternalException("Unsupported vector type for FSST vector");
}
}

switch (vector->GetType().id()) {
Expand Down
Loading

0 comments on commit fe39c1a

Please sign in to comment.