From 558161498aca5db29bb14c383eb7b050d9be06c8 Mon Sep 17 00:00:00 2001 From: nemonlou Date: Mon, 19 Aug 2024 11:34:49 +0800 Subject: [PATCH] [CH]duplicate column name case support in broadcast join #6926 --- .../GlutenClickhouseFunctionSuite.scala | 20 ++++++++++++++++ .../Join/BroadCastJoinBuilder.cpp | 23 +++++++++++++++---- 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala index 1d4d1b6f8afb..2f7c409374b1 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala @@ -254,4 +254,24 @@ class GlutenClickhouseFunctionSuite extends GlutenClickHouseTPCHAbstractSuite { } } + test("duplicate column name issue") { + withTable("left_table", "right_table") { + sql("create table left_table(id int, name string) using orc") + sql("create table right_table(id int, book string) using orc") + sql("insert into left_table values (1,'a'),(2,'b'),(3,'c'),(4,'d')") + sql("insert into right_table values (1,'a'),(1,'b'),(2,'c'),(2,'d')") + compareResultsAgainstVanillaSpark( + """ + |select p1.id, p1.name, p2.book + | from left_table p1 left join + | (select id, id, book + | from right_table where id <= 2) p2 + | on p1.id=p2.id + |""".stripMargin, + true, + { _ => } + ) + } + } + } diff --git a/cpp-ch/local-engine/Join/BroadCastJoinBuilder.cpp b/cpp-ch/local-engine/Join/BroadCastJoinBuilder.cpp index f47f423df89b..da301dcb89f8 100644 --- a/cpp-ch/local-engine/Join/BroadCastJoinBuilder.cpp +++ b/cpp-ch/local-engine/Join/BroadCastJoinBuilder.cpp @@ -57,13 +57,26 @@ jlong callJavaGet(const std::string & id) DB::Block resetBuildTableBlockName(Block & block, bool only_one = false) { DB::ColumnsWithTypeAndName new_cols; + std::set names; + int32_t seq = 0; for (const auto & col : block) { - // Add a prefix to avoid column name conflicts with left table. - new_cols.emplace_back(col.column, col.type, BlockUtil::RIHGT_COLUMN_PREFIX + col.name); - - if (only_one) - break; + // Add a prefix to avoid column name conflicts with left table. + std::stringstream new_name; + // add a sequence to avoid duplicate name in some rare cases + if (names.find(col.name) == names.end()) + { + new_name << BlockUtil::RIHGT_COLUMN_PREFIX << col.name; + names.insert(col.name); + } + else + { + new_name << BlockUtil::RIHGT_COLUMN_PREFIX << (seq++) << "_" << col.name; + } + new_cols.emplace_back(col.column, col.type, new_name.str()); + + if (only_one) + break; } return DB::Block(new_cols); }