Skip to content

Commit

Permalink
Merge pull request #122 from samansmink/improve-table-function-to-string
Browse files Browse the repository at this point in the history
Improve table function to string
  • Loading branch information
samansmink authored Nov 20, 2024
2 parents a2364a1 + 35e9d52 commit b77a9da
Show file tree
Hide file tree
Showing 7 changed files with 40 additions and 24 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/MainDistributionPipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ jobs:
name: Build extension binaries
uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main
with:
# pip install duckdb==1.1.4.dev1594
duckdb_version: 0ccf3c25cc
# pip install duckdb==1.1.4.dev2005
duckdb_version: b470dea7ee
ci_tools_version: main
extension_name: delta
enable_rust: true
Expand All @@ -33,5 +33,5 @@ jobs:
with:
extension_name: delta
ci_tools_version: main
duckdb_version: 0ccf3c25cc
duckdb_version: b470dea7ee
exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64_mingw'
2 changes: 1 addition & 1 deletion duckdb
Submodule duckdb updated 417 files
38 changes: 23 additions & 15 deletions src/functions/delta_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,7 @@ unique_ptr<MultiFileList> DeltaSnapshot::ComplexFilterPushdown(ClientContext &co
for (const auto &filter : filters) {
combiner.AddFilter(filter->Copy());
}
auto filterstmp = combiner.GenerateTableScanFilters(info.column_ids);
auto filterstmp = combiner.GenerateTableScanFilters(info.column_indexes);

// TODO: can/should we figure out if this filtered anything?
auto filtered_list = make_uniq<DeltaSnapshot>(context, paths[0]);
Expand Down Expand Up @@ -643,9 +643,9 @@ void DeltaMultiFileReader::BindOptions(MultiFileReaderOptions &options, MultiFil
void DeltaMultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_options,
const MultiFileReaderBindData &options, const string &filename,
const vector<string> &local_names, const vector<LogicalType> &global_types,
const vector<string> &global_names, const vector<column_t> &global_column_ids,
MultiFileReaderData &reader_data, ClientContext &context,
optional_ptr<MultiFileReaderGlobalState> global_state) {
const vector<string> &global_names,
const vector<ColumnIndex> &global_column_ids, MultiFileReaderData &reader_data,
ClientContext &context, optional_ptr<MultiFileReaderGlobalState> global_state) {
MultiFileReader::FinalizeBind(file_options, options, filename, local_names, global_types, global_names,
global_column_ids, reader_data, context, global_state);

Expand All @@ -671,7 +671,7 @@ void DeltaMultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_optio

if (!file_metadata->partition_map.empty()) {
for (idx_t i = 0; i < global_column_ids.size(); i++) {
column_t col_id = global_column_ids[i];
column_t col_id = global_column_ids[i].GetPrimaryIndex();
if (IsRowIdColumnId(col_id)) {
continue;
}
Expand Down Expand Up @@ -749,14 +749,14 @@ unique_ptr<MultiFileReaderGlobalState> DeltaMultiFileReader::InitializeGlobalSta
duckdb::ClientContext &context, const duckdb::MultiFileReaderOptions &file_options,
const duckdb::MultiFileReaderBindData &bind_data, const duckdb::MultiFileList &file_list,
const vector<duckdb::LogicalType> &global_types, const vector<std::string> &global_names,
const vector<duckdb::column_t> &global_column_ids) {
const vector<ColumnIndex> &global_column_ids) {
vector<LogicalType> extra_columns;
vector<pair<string, idx_t>> mapped_columns;

// Create a map of the columns that are in the projection
case_insensitive_map_t<idx_t> selected_columns;
for (idx_t i = 0; i < global_column_ids.size(); i++) {
auto global_id = global_column_ids[i];
auto global_id = global_column_ids[i].GetPrimaryIndex();
if (IsRowIdColumnId(global_id)) {
continue;
}
Expand Down Expand Up @@ -815,7 +815,7 @@ unique_ptr<MultiFileReaderGlobalState> DeltaMultiFileReader::InitializeGlobalSta
// in the parquet files, we just add null constant columns
static void CustomMulfiFileNameMapping(const string &file_name, const vector<LogicalType> &local_types,
const vector<string> &local_names, const vector<LogicalType> &global_types,
const vector<string> &global_names, const vector<column_t> &global_column_ids,
const vector<string> &global_names, const vector<ColumnIndex> &global_column_ids,
MultiFileReaderData &reader_data, const string &initial_file,
optional_ptr<MultiFileReaderGlobalState> global_state) {
D_ASSERT(global_types.size() == global_names.size());
Expand All @@ -839,7 +839,7 @@ static void CustomMulfiFileNameMapping(const string &file_name, const vector<Log
continue;
}
// not constant - look up the column in the name map
auto global_id = global_column_ids[i];
auto global_id = global_column_ids[i].GetPrimaryIndex();
if (global_id >= global_types.size()) {
throw InternalException(
"MultiFileReader::CreatePositionalMapping - global_id is out of range in global_types for this file");
Expand Down Expand Up @@ -880,7 +880,7 @@ static void CustomMulfiFileNameMapping(const string &file_name, const vector<Log
void DeltaMultiFileReader::CreateNameMapping(const string &file_name, const vector<LogicalType> &local_types,
const vector<string> &local_names, const vector<LogicalType> &global_types,
const vector<string> &global_names,
const vector<column_t> &global_column_ids,
const vector<ColumnIndex> &global_column_ids,
MultiFileReaderData &reader_data, const string &initial_file,
optional_ptr<MultiFileReaderGlobalState> global_state) {
// First call the base implementation to do most mapping
Expand Down Expand Up @@ -980,11 +980,17 @@ bool DeltaMultiFileReader::ParseOption(const string &key, const Value &val, Mult

return MultiFileReader::ParseOption(key, val, options, context);
}
//
// DeltaMultiFileReaderBindData::DeltaMultiFileReaderBindData(DeltaSnapshot & delta_snapshot):
// current_snapshot(delta_snapshot){
//
//}

static InsertionOrderPreservingMap<string> DeltaFunctionToString(TableFunctionToStringInput &input) {
InsertionOrderPreservingMap<string> result;

if (input.table_function.function_info) {
auto &table_info = input.table_function.function_info->Cast<DeltaFunctionInfo>();
result["Table"] = table_info.table_name;
}

return result;
}

TableFunctionSet DeltaFunctions::GetDeltaScanFunction(DatabaseInstance &instance) {
// Parquet extension needs to be loaded for this to make sense
Expand All @@ -1007,6 +1013,8 @@ TableFunctionSet DeltaFunctions::GetDeltaScanFunction(DatabaseInstance &instance
function.table_scan_progress = nullptr;
function.get_bind_info = nullptr;

function.to_string = DeltaFunctionToString;

// Schema param is just confusing here
function.named_parameters.erase("schema");

Expand Down
7 changes: 4 additions & 3 deletions src/include/functions/delta_scan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ struct DeltaSnapshot;
struct DeltaFunctionInfo : public TableFunctionInfo {
shared_ptr<DeltaSnapshot> snapshot;
string expected_path;
string table_name;
};

struct DeltaFileMetaData {
Expand Down Expand Up @@ -132,20 +133,20 @@ struct DeltaMultiFileReader : public MultiFileReader {

void CreateNameMapping(const string &file_name, const vector<LogicalType> &local_types,
const vector<string> &local_names, const vector<LogicalType> &global_types,
const vector<string> &global_names, const vector<column_t> &global_column_ids,
const vector<string> &global_names, const vector<ColumnIndex> &global_column_ids,
MultiFileReaderData &reader_data, const string &initial_file,
optional_ptr<MultiFileReaderGlobalState> global_state) override;

unique_ptr<MultiFileReaderGlobalState>
InitializeGlobalState(ClientContext &context, const MultiFileReaderOptions &file_options,
const MultiFileReaderBindData &bind_data, const MultiFileList &file_list,
const vector<LogicalType> &global_types, const vector<string> &global_names,
const vector<column_t> &global_column_ids) override;
const vector<ColumnIndex> &global_column_ids) override;

void FinalizeBind(const MultiFileReaderOptions &file_options, const MultiFileReaderBindData &options,
const string &filename, const vector<string> &local_names,
const vector<LogicalType> &global_types, const vector<string> &global_names,
const vector<column_t> &global_column_ids, MultiFileReaderData &reader_data,
const vector<ColumnIndex> &global_column_ids, MultiFileReaderData &reader_data,
ClientContext &context, optional_ptr<MultiFileReaderGlobalState> global_state) override;

//! Override the FinalizeChunk method
Expand Down
1 change: 1 addition & 0 deletions src/storage/delta_table_entry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ TableFunction DeltaTableEntry::GetScanFunction(ClientContext &context, unique_pt
auto function_info = make_shared_ptr<DeltaFunctionInfo>();

function_info->snapshot = this->snapshot;
function_info->table_name = delta_catalog.GetName();
delta_scan_function.function_info = std::move(function_info);

vector<Value> inputs = {delta_catalog.GetDBPath()};
Expand Down
8 changes: 7 additions & 1 deletion test/sql/dat/attach.test
Original file line number Diff line number Diff line change
Expand Up @@ -122,4 +122,10 @@ select utf8 from dt
1
2
3
4
4

# Test that the explain output contains the table name
query II
explain from dt
----
physical_plan <REGEX>:.*Table: dt.*

0 comments on commit b77a9da

Please sign in to comment.