Skip to content

Commit

Permalink
Rename FallBackToLinkedInHiveUDFTransformer to HiveUDFTransformer (
Browse files Browse the repository at this point in the history
…linkedin#522)

* Rename FallBackToLinkedInHiveUDFTransformer to HiveUDFTransformer

* Spotless Apply

* Modify java doc
  • Loading branch information
ljfgem authored Aug 1, 2024
1 parent 8786af3 commit aa086d1
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
import com.linkedin.coral.common.transformers.OperatorRenameSqlCallTransformer;
import com.linkedin.coral.common.transformers.SqlCallTransformers;
import com.linkedin.coral.spark.containers.SparkUDFInfo;
import com.linkedin.coral.spark.transformers.FallBackToLinkedInHiveUDFTransformer;
import com.linkedin.coral.spark.transformers.FuzzyUnionGenericProjectTransformer;
import com.linkedin.coral.spark.transformers.HiveUDFTransformer;
import com.linkedin.coral.spark.transformers.TransportUDFTransformer;

import static com.linkedin.coral.spark.transformers.TransportUDFTransformer.*;
Expand All @@ -27,7 +27,7 @@
* which containing a list of {@link com.linkedin.coral.common.transformers.SqlCallTransformer} to traverse the hierarchy of a {@link org.apache.calcite.sql.SqlCall}
* and converts the functions from Coral operator to Spark operator if it is required
*
* In this converter, we need to apply {@link TransportUDFTransformer} before {@link FallBackToLinkedInHiveUDFTransformer}
* In this converter, we need to apply {@link TransportUDFTransformer} before {@link HiveUDFTransformer}
* because we should try to transform a UDF to an equivalent Transport UDF before falling back to LinkedIn Hive UDF.
*/
public class CoralToSparkSqlCallConverter extends SqlShuttle {
Expand Down Expand Up @@ -154,7 +154,7 @@ public CoralToSparkSqlCallConverter(Set<SparkUDFInfo> sparkUDFInfos) {
new OperatorRenameSqlCallTransformer(SqlStdOperatorTable.CARDINALITY, 1, "size"),

// Fall back to the original Hive UDF defined in StaticHiveFunctionRegistry after failing to apply transformers above
new FallBackToLinkedInHiveUDFTransformer(sparkUDFInfos),
new HiveUDFTransformer(sparkUDFInfos),

// Transform `generic_project` function
new FuzzyUnionGenericProjectTransformer(sparkUDFInfos));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2023 LinkedIn Corporation. All rights reserved.
* Copyright 2023-2024 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
Expand All @@ -23,14 +23,12 @@


/**
* After failing to transform UDF with {@link TransportUDFTransformer},
* we use this transformer to fall back to the original Hive UDF defined in
* {@link com.linkedin.coral.hive.hive2rel.functions.StaticHiveFunctionRegistry}.
* This is reasonable since Spark understands and has ability to run Hive UDF.
* Check `CoralSparkTest#testFallBackToLinkedInHiveUDFTransformer()` for an example.
* This transformer converts the Hive UDF SqlCall name from the UDF class name (e.g., `com.linkedin.HiveUDF`)
* to the corresponding view-dependent UDF name in the view text. It also adds the UDF information to `sparkUDFInfos`.
* Refer to `CoralSparkTest#testHiveUDFTransformer()` for an example.
*/
public class FallBackToLinkedInHiveUDFTransformer extends SqlCallTransformer {
private static final Logger LOG = LoggerFactory.getLogger(FallBackToLinkedInHiveUDFTransformer.class);
public class HiveUDFTransformer extends SqlCallTransformer {
private static final Logger LOG = LoggerFactory.getLogger(HiveUDFTransformer.class);

/**
* Some LinkedIn UDFs get registered correctly in a SparkSession, and hence a DataFrame is successfully
Expand All @@ -46,7 +44,7 @@ public class FallBackToLinkedInHiveUDFTransformer extends SqlCallTransformer {
"com.linkedin.coral.hive.hive2rel.CoralTestUnsupportedUDF");
private final Set<SparkUDFInfo> sparkUDFInfos;

public FallBackToLinkedInHiveUDFTransformer(Set<SparkUDFInfo> sparkUDFInfos) {
public HiveUDFTransformer(Set<SparkUDFInfo> sparkUDFInfos) {
this.sparkUDFInfos = sparkUDFInfos;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ public void testTransportUDFTransformer() {
}

@Test
public void testFallBackToLinkedInHiveUDFTransformer() {
public void testHiveUDFTransformer() {
// Dali view foo_dali_udf2 contains a UDF not defined with OperatorBasedSqlCallTransformer or TransportUDFTransformer.
// We need to fall back to the udf initially defined in HiveFunctionRegistry.
// Then the function Name comes from Hive metastore in the format dbName_viewName_funcBaseName.
Expand Down

0 comments on commit aa086d1

Please sign in to comment.