diff --git a/coral-spark/src/main/java/com/linkedin/coral/spark/CoralToSparkSqlCallConverter.java b/coral-spark/src/main/java/com/linkedin/coral/spark/CoralToSparkSqlCallConverter.java index 5170507c8..bebd38e94 100644 --- a/coral-spark/src/main/java/com/linkedin/coral/spark/CoralToSparkSqlCallConverter.java +++ b/coral-spark/src/main/java/com/linkedin/coral/spark/CoralToSparkSqlCallConverter.java @@ -15,8 +15,8 @@ import com.linkedin.coral.common.transformers.OperatorRenameSqlCallTransformer; import com.linkedin.coral.common.transformers.SqlCallTransformers; import com.linkedin.coral.spark.containers.SparkUDFInfo; -import com.linkedin.coral.spark.transformers.FallBackToLinkedInHiveUDFTransformer; import com.linkedin.coral.spark.transformers.FuzzyUnionGenericProjectTransformer; +import com.linkedin.coral.spark.transformers.HiveUDFTransformer; import com.linkedin.coral.spark.transformers.TransportUDFTransformer; import static com.linkedin.coral.spark.transformers.TransportUDFTransformer.*; @@ -27,7 +27,7 @@ * which containing a list of {@link com.linkedin.coral.common.transformers.SqlCallTransformer} to traverse the hierarchy of a {@link org.apache.calcite.sql.SqlCall} * and converts the functions from Coral operator to Spark operator if it is required * - * In this converter, we need to apply {@link TransportUDFTransformer} before {@link FallBackToLinkedInHiveUDFTransformer} + * In this converter, we need to apply {@link TransportUDFTransformer} before {@link HiveUDFTransformer} * because we should try to transform a UDF to an equivalent Transport UDF before falling back to LinkedIn Hive UDF. */ public class CoralToSparkSqlCallConverter extends SqlShuttle { @@ -154,7 +154,7 @@ public CoralToSparkSqlCallConverter(Set sparkUDFInfos) { new OperatorRenameSqlCallTransformer(SqlStdOperatorTable.CARDINALITY, 1, "size"), // Fall back to the original Hive UDF defined in StaticHiveFunctionRegistry after failing to apply transformers above - new FallBackToLinkedInHiveUDFTransformer(sparkUDFInfos), + new HiveUDFTransformer(sparkUDFInfos), // Transform `generic_project` function new FuzzyUnionGenericProjectTransformer(sparkUDFInfos)); diff --git a/coral-spark/src/main/java/com/linkedin/coral/spark/transformers/FallBackToLinkedInHiveUDFTransformer.java b/coral-spark/src/main/java/com/linkedin/coral/spark/transformers/HiveUDFTransformer.java similarity index 81% rename from coral-spark/src/main/java/com/linkedin/coral/spark/transformers/FallBackToLinkedInHiveUDFTransformer.java rename to coral-spark/src/main/java/com/linkedin/coral/spark/transformers/HiveUDFTransformer.java index a727ca37c..38406f942 100644 --- a/coral-spark/src/main/java/com/linkedin/coral/spark/transformers/FallBackToLinkedInHiveUDFTransformer.java +++ b/coral-spark/src/main/java/com/linkedin/coral/spark/transformers/HiveUDFTransformer.java @@ -1,5 +1,5 @@ /** - * Copyright 2023 LinkedIn Corporation. All rights reserved. + * Copyright 2023-2024 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ @@ -23,14 +23,12 @@ /** - * After failing to transform UDF with {@link TransportUDFTransformer}, - * we use this transformer to fall back to the original Hive UDF defined in - * {@link com.linkedin.coral.hive.hive2rel.functions.StaticHiveFunctionRegistry}. - * This is reasonable since Spark understands and has ability to run Hive UDF. - * Check `CoralSparkTest#testFallBackToLinkedInHiveUDFTransformer()` for an example. + * This transformer converts the Hive UDF SqlCall name from the UDF class name (e.g., `com.linkedin.HiveUDF`) + * to the corresponding view-dependent UDF name in the view text. It also adds the UDF information to `sparkUDFInfos`. + * Refer to `CoralSparkTest#testHiveUDFTransformer()` for an example. */ -public class FallBackToLinkedInHiveUDFTransformer extends SqlCallTransformer { - private static final Logger LOG = LoggerFactory.getLogger(FallBackToLinkedInHiveUDFTransformer.class); +public class HiveUDFTransformer extends SqlCallTransformer { + private static final Logger LOG = LoggerFactory.getLogger(HiveUDFTransformer.class); /** * Some LinkedIn UDFs get registered correctly in a SparkSession, and hence a DataFrame is successfully @@ -46,7 +44,7 @@ public class FallBackToLinkedInHiveUDFTransformer extends SqlCallTransformer { "com.linkedin.coral.hive.hive2rel.CoralTestUnsupportedUDF"); private final Set sparkUDFInfos; - public FallBackToLinkedInHiveUDFTransformer(Set sparkUDFInfos) { + public HiveUDFTransformer(Set sparkUDFInfos) { this.sparkUDFInfos = sparkUDFInfos; } diff --git a/coral-spark/src/test/java/com/linkedin/coral/spark/CoralSparkTest.java b/coral-spark/src/test/java/com/linkedin/coral/spark/CoralSparkTest.java index 454ea0ac7..927b499bd 100644 --- a/coral-spark/src/test/java/com/linkedin/coral/spark/CoralSparkTest.java +++ b/coral-spark/src/test/java/com/linkedin/coral/spark/CoralSparkTest.java @@ -137,7 +137,7 @@ public void testTransportUDFTransformer() { } @Test - public void testFallBackToLinkedInHiveUDFTransformer() { + public void testHiveUDFTransformer() { // Dali view foo_dali_udf2 contains a UDF not defined with OperatorBasedSqlCallTransformer or TransportUDFTransformer. // We need to fall back to the udf initially defined in HiveFunctionRegistry. // Then the function Name comes from Hive metastore in the format dbName_viewName_funcBaseName.