Skip to content

Commit

Permalink
[CH]duplicate column name case support in broadcast join #6926 (#6927)
Browse files Browse the repository at this point in the history
What changes were proposed in this pull request?
Fixes: #6926

How was this patch tested?
by UT
  • Loading branch information
loudongfeng authored Aug 20, 2024
1 parent c3e9677 commit 221f0f8
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -198,4 +198,24 @@ class GlutenClickhouseFunctionSuite extends GlutenClickHouseTPCHAbstractSuite {
}
}

test("duplicate column name issue") {
withTable("left_table", "right_table") {
sql("create table left_table(id int, name string) using orc")
sql("create table right_table(id int, book string) using orc")
sql("insert into left_table values (1,'a'),(2,'b'),(3,'c'),(4,'d')")
sql("insert into right_table values (1,'a'),(1,'b'),(2,'c'),(2,'d')")
compareResultsAgainstVanillaSpark(
"""
|select p1.id, p1.name, p2.book
| from left_table p1 left join
| (select id, id, book
| from right_table where id <= 2) p2
| on p1.id=p2.id
|""".stripMargin,
true,
{ _ => }
)
}
}

}
23 changes: 18 additions & 5 deletions cpp-ch/local-engine/Join/BroadCastJoinBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,26 @@ jlong callJavaGet(const std::string & id)
DB::Block resetBuildTableBlockName(Block & block, bool only_one = false)
{
DB::ColumnsWithTypeAndName new_cols;
std::set<std::string> names;
int32_t seq = 0;
for (const auto & col : block)
{
// Add a prefix to avoid column name conflicts with left table.
new_cols.emplace_back(col.column, col.type, BlockUtil::RIHGT_COLUMN_PREFIX + col.name);

if (only_one)
break;
// Add a prefix to avoid column name conflicts with left table.
std::stringstream new_name;
// add a sequence to avoid duplicate name in some rare cases
if (names.find(col.name) == names.end())
{
new_name << BlockUtil::RIHGT_COLUMN_PREFIX << col.name;
names.insert(col.name);
}
else
{
new_name << BlockUtil::RIHGT_COLUMN_PREFIX << (seq++) << "_" << col.name;
}
new_cols.emplace_back(col.column, col.type, new_name.str());

if (only_one)
break;
}
return DB::Block(new_cols);
}
Expand Down

0 comments on commit 221f0f8

Please sign in to comment.