Skip to content

Commit

Permalink
Merge branch 'apache:master' into HIVE-28480
Browse files Browse the repository at this point in the history
  • Loading branch information
himanshu-mishra authored Sep 5, 2024
2 parents af93abf + e0bd9ea commit fe309b1
Show file tree
Hide file tree
Showing 195 changed files with 13,433 additions and 8,436 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/docker-GA-images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ jobs:
run: echo "TEZ_VERSION=$(mvn -f "pom.xml" -q help:evaluate -Dexpression=tez.version -DforceStdout)" >> $GITHUB_ENV

- name: Build project
run: mvn clean package -DskipTests -Pitests,iceberg -Pdist
run: mvn clean package -DskipTests -Pitests -Pdist

- name: Check for hive tar.gz
run: ls ./packaging/target/
Expand Down
6 changes: 3 additions & 3 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ export MAVEN_OPTS="-Xmx2g"
export -n HIVE_CONF_DIR
cp $SETTINGS .git/settings.xml
OPTS=" -s $PWD/.git/settings.xml -B -Dtest.groups= "
OPTS+=" -Pitests,qsplits,dist,errorProne,iceberg"
OPTS+=" -Pitests,qsplits,dist,errorProne"
OPTS+=" -Dorg.slf4j.simpleLogger.log.org.apache.maven.plugin.surefire.SurefirePlugin=INFO"
OPTS+=" -Dmaven.repo.local=$PWD/.git/m2"
git config extra.mavenOpts "$OPTS"
Expand Down Expand Up @@ -314,14 +314,14 @@ time docker rm -f dev_$dbType || true
set -e
dev-support/nightly
'''
buildHive("install -Dtest=noMatches -Pdist -Piceberg -pl packaging -am")
buildHive("install -Dtest=noMatches -Pdist -pl packaging -am")
}
stage('Verify') {
sh '''#!/bin/bash
set -e
tar -xzf packaging/target/apache-hive-*-nightly-*-src.tar.gz
'''
buildHive("install -Dtest=noMatches -Pdist,iceberg -f apache-hive-*-nightly-*/pom.xml")
buildHive("install -Dtest=noMatches -Pdist -f apache-hive-*-nightly-*/pom.xml")
}
}
}
Expand Down
12 changes: 11 additions & 1 deletion common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
Original file line number Diff line number Diff line change
Expand Up @@ -2012,6 +2012,8 @@ public static enum ConfVars {
"Set to 1 to make sure hash aggregation is never turned off."),
HIVE_MAP_AGGR_HASH_MIN_REDUCTION_LOWER_BOUND("hive.map.aggr.hash.min.reduction.lower.bound", (float) 0.4,
"Lower bound of Hash aggregate reduction filter. See also: hive.map.aggr.hash.min.reduction"),
HIVE_MAP_AGGR_HASH_FLUSH_SIZE_PERCENT("hive.map.aggr.hash.flush.size.percent", (float) 0.1,
"Percentage of hash table entries to flush in map-side group aggregation."),
HIVE_MAP_AGGR_HASH_MIN_REDUCTION_STATS_ADJUST("hive.map.aggr.hash.min.reduction.stats", true,
"Whether the value for hive.map.aggr.hash.min.reduction should be set statically using stats estimates. \n" +
"If this is enabled, the default value for hive.map.aggr.hash.min.reduction is only used as an upper-bound\n" +
Expand Down Expand Up @@ -3863,8 +3865,16 @@ public static enum ConfVars {
"partition columns or non-partition columns while displaying columns in describe\n" +
"table. From 0.12 onwards, they are displayed separately. This flag will let you\n" +
"get old behavior, if desired. See, test-case in patch for HIVE-6689."),
@Deprecated
HIVE_LINEAGE_INFO("hive.lineage.hook.info.enabled", false,
"Whether Hive provides lineage information to hooks."),
"Whether Hive provides lineage information to hooks." +
"Deprecated: use hive.lineage.statement.filter instead."),
HIVE_LINEAGE_STATEMENT_FILTER("hive.lineage.statement.filter", "ALL",
"Whether Hive provides lineage information to hooks for the specified statements only, " +
"the value is a comma-separated list (ex.: CREATE_MATERIALIZED_VIEW," +
"CREATE_TABLE,CREATE_TABLE_AS_SELECT). Possible values are: CREATE_TABLE, CREATE_TABLE_AS_SELECT, " +
"CREATE_VIEW, CREATE_MATERIALIZED_VIEW, LOAD, QUERY, ALL, NONE." +
" ALL means lineage information is always provided, NONE and empty string means never."),

HIVE_SSL_PROTOCOL_BLACKLIST("hive.ssl.protocol.blacklist", "SSLv2,SSLv3",
"SSL Versions to disable for all Hive Servers"),
Expand Down
2 changes: 1 addition & 1 deletion iceberg/iceberg-catalog/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-api</artifactId>
<artifactId>junit-jupiter-engine</artifactId>
<scope>test</scope>
</dependency>
<dependency>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,15 +101,6 @@ static void validateTableIsIceberg(Table table, String fullName) {
"Not an iceberg table: %s (type=%s)", fullName, tableType);
}

static boolean isHiveIcebergStorageHandler(String storageHandler) {
try {
Class<?> storageHandlerClass = Class.forName(storageHandler);
return Class.forName(HIVE_ICEBERG_STORAGE_HANDLER).isAssignableFrom(storageHandlerClass);
} catch (ClassNotFoundException e) {
throw new RuntimeException("Error checking storage handler class", e);
}
}

default void persistTable(Table hmsTable, boolean updateHiveTable, String metadataLocation)
throws TException, InterruptedException {
if (updateHiveTable) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -338,16 +338,7 @@ private void setHmsTableParameters(String newMetadataLocation, Table tbl, TableM
parameters.put(PREVIOUS_METADATA_LOCATION_PROP, currentMetadataLocation());
}

// If needed set the 'storage_handler' property to enable query from Hive
if (hiveEngineEnabled) {
String storageHandler = parameters.get(hive_metastoreConstants.META_TABLE_STORAGE);
// Check if META_TABLE_STORAGE is not present or is not an instance of ICEBERG_STORAGE_HANDLER
if (storageHandler == null || !HiveOperationsBase.isHiveIcebergStorageHandler(storageHandler)) {
parameters.put(hive_metastoreConstants.META_TABLE_STORAGE, HIVE_ICEBERG_STORAGE_HANDLER);
}
} else {
parameters.remove(hive_metastoreConstants.META_TABLE_STORAGE);
}
setStorageHandler(parameters, hiveEngineEnabled);

// Set the basic statistics
if (summary.get(SnapshotSummary.TOTAL_DATA_FILES_PROP) != null) {
Expand All @@ -368,6 +359,15 @@ private void setHmsTableParameters(String newMetadataLocation, Table tbl, TableM
tbl.setParameters(parameters);
}

private static void setStorageHandler(Map<String, String> parameters, boolean hiveEngineEnabled) {
// If needed set the 'storage_handler' property to enable query from Hive
if (hiveEngineEnabled) {
parameters.put(hive_metastoreConstants.META_TABLE_STORAGE, HiveOperationsBase.HIVE_ICEBERG_STORAGE_HANDLER);
} else {
parameters.remove(hive_metastoreConstants.META_TABLE_STORAGE);
}
}

@VisibleForTesting
void setSnapshotStats(TableMetadata metadata, Map<String, String> parameters) {
parameters.remove(TableProperties.CURRENT_SNAPSHOT_ID);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,13 @@
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.IntStream;
import org.apache.iceberg.expressions.BoundPredicate;
import org.apache.iceberg.expressions.BoundSetPredicate;
import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.expressions.ExpressionVisitors;
import org.apache.iceberg.expressions.UnboundPredicate;
import org.assertj.core.api.Assertions;
import org.junit.Assert;

public class TestHelpers {
Expand Down Expand Up @@ -84,23 +86,22 @@ public static <T> T roundTripSerialize(T type) throws IOException, ClassNotFound
}
}

// commented out because the schemaId() method was only added to org.apache.iceberg.Schema after the 0.11.0 release
// public static void assertSameSchemaList(List<Schema> list1, List<Schema> list2) {
// if (list1.size() != list2.size()) {
// Assert.fail("Should have same number of schemas in both lists");
// }
//
// IntStream.range(0, list1.size()).forEach(
// index -> {
// Schema schema1 = list1.get(index);
// Schema schema2 = list2.get(index);
// Assert.assertEquals("Should have matching schema id",
// schema1.schemaId(), schema2.schemaId());
// Assert.assertEquals("Should have matching schema struct",
// schema1.asStruct(), schema2.asStruct());
// }
// );
// }
public static void assertSameSchemaList(List<Schema> list1, List<Schema> list2) {
Assertions.assertThat(list1)
.as("Should have same number of schemas in both lists")
.hasSameSizeAs(list2);

IntStream.range(0, list1.size())
.forEach(
index -> {
Schema schema1 = list1.get(index);
Schema schema2 = list2.get(index);
Assert.assertEquals(
"Should have matching schema id", schema1.schemaId(), schema2.schemaId());
Assert.assertEquals(
"Should have matching schema struct", schema1.asStruct(), schema2.asStruct());
});
}

public static void assertSerializedMetadata(Table expected, Table actual) {
Assert.assertEquals("Name must match", expected.name(), actual.name());
Expand All @@ -120,6 +121,26 @@ public static void assertSerializedAndLoadedMetadata(Table expected, Table actua
Assert.assertEquals("History must match", expected.history(), actual.history());
}

public static void assertSameSchemaMap(Map<Integer, Schema> map1, Map<Integer, Schema> map2) {
Assertions.assertThat(map1)
.as("Should have same number of schemas in both maps")
.hasSameSizeAs(map2);

map1.forEach(
(schemaId, schema1) -> {
Schema schema2 = map2.get(schemaId);
Assert.assertNotNull(
String.format("Schema ID %s does not exist in map: %s", schemaId, map2), schema2);

Assert.assertEquals(
"Should have matching schema id", schema1.schemaId(), schema2.schemaId());
Assert.assertTrue(
String.format(
"Should be the same schema. Schema 1: %s, schema 2: %s", schema1, schema2),
schema1.sameSchema(schema2));
});
}

private static class CheckReferencesBound extends ExpressionVisitors.ExpressionVisitor<Void> {
private final String message;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,6 @@ public synchronized void testConcurrentConnections() throws InterruptedException

executorService.shutdown();
assertThat(executorService.awaitTermination(3, TimeUnit.MINUTES)).as("Timeout").isTrue();
assertThat(icebergTable.currentSnapshot().allManifests(icebergTable.io())).hasSize(20);
assertThat(icebergTable.snapshots()).hasSize(7);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@
import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.expressions.ResidualEvaluator;
import org.apache.iceberg.hive.CachedClientPool;
import org.apache.iceberg.hive.HiveOperationsBase;
import org.apache.iceberg.hive.HiveSchemaUtil;
import org.apache.iceberg.hive.HiveTableOperations;
import org.apache.iceberg.hive.MetastoreLock;
Expand Down Expand Up @@ -1075,7 +1074,7 @@ public void postGetTable(org.apache.hadoop.hive.metastore.api.Table hmsTable) {
hmsTable.getSd().getSerdeInfo().setSerializationLib(HiveIcebergSerDe.class.getName());
String storageHandler = hmsTable.getParameters().get(hive_metastoreConstants.META_TABLE_STORAGE);
// Check if META_TABLE_STORAGE is not present or is not an instance of ICEBERG_STORAGE_HANDLER
if (storageHandler == null || !HiveOperationsBase.isHiveIcebergStorageHandler(storageHandler)) {
if (storageHandler == null || !isHiveIcebergStorageHandler(storageHandler)) {
hmsTable.getParameters()
.put(hive_metastoreConstants.META_TABLE_STORAGE, HiveTableOperations.HIVE_ICEBERG_STORAGE_HANDLER);
}
Expand All @@ -1085,6 +1084,15 @@ public void postGetTable(org.apache.hadoop.hive.metastore.api.Table hmsTable) {
}
}

private static boolean isHiveIcebergStorageHandler(String storageHandler) {
try {
Class<?> storageHandlerClass = Class.forName(storageHandler);
return Class.forName(HIVE_ICEBERG_STORAGE_HANDLER).isAssignableFrom(storageHandlerClass);
} catch (ClassNotFoundException e) {
throw new RuntimeException("Error checking storage handler class", e);
}
}

@Override
public void preDropPartitions(org.apache.hadoop.hive.metastore.api.Table hmsTable,
EnvironmentContext context,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,18 @@

package org.apache.iceberg.mr.hive;

import java.lang.reflect.Method;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Executors;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.iceberg.FileFormat;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.data.Record;
import org.apache.iceberg.exceptions.ValidationException;
import org.apache.iceberg.hive.HiveTableOperations;
import org.apache.iceberg.mr.TestHelper;
import org.apache.iceberg.relocated.com.google.common.base.Throwables;
import org.apache.iceberg.util.Tasks;
Expand All @@ -37,22 +40,34 @@
import org.junit.Assume;
import org.junit.Before;
import org.junit.Test;
import org.mockito.MockedStatic;

import static org.apache.iceberg.mr.hive.HiveIcebergStorageHandlerTestUtils.init;
import static org.mockito.ArgumentMatchers.anyMap;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.CALLS_REAL_METHODS;
import static org.mockito.Mockito.mockStatic;

public class TestConflictingDataFiles extends HiveIcebergStorageHandlerWithEngineBase {

private final String storageHandlerStub = "'org.apache.iceberg.mr.hive.HiveIcebergStorageHandlerStub'";

@Before
public void setUpTables() {
public void setUpTables() throws NoSuchMethodException {
PartitionSpec spec =
PartitionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).identity("last_name")
.bucket("customer_id", 16).build();

// create and insert an initial batch of records
testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, spec, fileFormat,
HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_2, 2, Collections.emptyMap(), storageHandlerStub);
Method method = HiveTableOperations.class.getDeclaredMethod("setStorageHandler", Map.class, Boolean.TYPE);
method.setAccessible(true);

try (MockedStatic<HiveTableOperations> tableOps = mockStatic(HiveTableOperations.class, CALLS_REAL_METHODS)) {
tableOps.when(() -> method.invoke(null, anyMap(), eq(true)))
.thenAnswer(invocation -> null);
// create and insert an initial batch of records
testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, spec, fileFormat,
HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_2, 2, Collections.emptyMap(), storageHandlerStub);
}
// insert one more batch so that we have multiple data files within the same partition
shell.executeStatement(testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_1,
TableIdentifier.of("default", "customers"), false));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -372,4 +372,25 @@ public void testCTASForAllColumnTypes() {
String ctas = "CREATE TABLE target STORED BY ICEBERG STORED AS orc AS SELECT * FROM source";
shell.executeStatement(ctas);
}

@Test
public void testCTASAndCTLTWithAuth() {
shell.setHiveSessionValue("hive.security.authorization.enabled", true);
shell.setHiveSessionValue("hive.security.authorization.manager",
"org.apache.iceberg.mr.hive.CustomTestHiveAuthorizerFactory");
shell.setHiveSessionValue("hive.security.authorization.tables.on.storagehandlers", true);
TableIdentifier identifier = TableIdentifier.of("default", "customers");
String query = String.format("CREATE EXTERNAL TABLE customers (" +
"customer_id BIGINT," +
"first_name STRING, " +
"last_name STRING," +
"primary key (customer_id, first_name) disable novalidate) " +
"STORED BY ICEBERG %s TBLPROPERTIES ('%s'='%s')",
testTables.locationForCreateTableSQL(identifier),
InputFormatConfig.CATALOG_NAME,
testTables.catalogName());
shell.executeStatement(query);
shell.executeStatement("create table target_ctas stored by iceberg stored as orc as select * from customers");
shell.executeStatement("create table target_ctlt like customers stored by iceberg");
}
}
11 changes: 0 additions & 11 deletions iceberg/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
<google.errorprone.javac.version>9+181-r4173-1</google.errorprone.javac.version>
<google.errorprone.version>2.5.1</google.errorprone.version>
<assertj.version>3.24.2</assertj.version>
<junit.jupiter.version>5.10.0</junit.jupiter.version>
<awaitility.version>4.2.1</awaitility.version>
<immutables.value.version>2.10.0</immutables.value.version>
<validate.skip>false</validate.skip>
Expand Down Expand Up @@ -203,16 +202,6 @@
<artifactId>assertj-core</artifactId>
<version>${assertj.version}</version>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-api</artifactId>
<version>${junit.jupiter.version}</version>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-params</artifactId>
<version>${junit.jupiter.version}</version>
</dependency>
<dependency>
<groupId>org.awaitility</groupId>
<artifactId>awaitility</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1056,6 +1056,12 @@ public List<String> getFunctions(String catName, String dbName, String pattern)
return objectStore.getFunctions(catName, dbName, pattern);
}

@Override
public <T> List<T> getFunctionsRequest(String catName, String dbName,
String pattern, boolean isReturnNames) throws MetaException {
return objectStore.getFunctionsRequest(catName, dbName, pattern, isReturnNames);
}

@Override
public AggrStats get_aggr_stats_for(String catName, String dbName,
String tblName, List<String> partNames, List<String> colNames,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -680,6 +680,33 @@ private void checkSingleViewInput(List<HivePrivilegeObject> inputs) {
assertTrue("table name", viewName.equalsIgnoreCase(tableObj.getObjectName()));
}

@Test
public void DropDatabaseCascade() throws Exception {
String dbName = "dropDatabase";
reset(mockedAuthorizer);
driver.run("create database " + dbName);
final String tableName1 = "drop_tbl1", tableName2 = "drop_tbl2", funcName = "testdropfunc";
driver.run("create table " + dbName + "." + tableName1 + "(eid int, yoj int)");
driver.run("create table " + dbName + "." + tableName2 + "(eid int, name string)");
driver.run("create function " + dbName + "." + funcName + " as 'org.apache.hadoop.hive.ql.udf.UDFRand'");
reset(mockedAuthorizer);
int status = driver.compile("DROP DATABASE " + dbName + " CASCADE", true);
assertEquals(0, status);
Pair<List<HivePrivilegeObject>, List<HivePrivilegeObject>> io = getHivePrivilegeObjectInputs();
List<HivePrivilegeObject> inputs = io.getLeft();
assertEquals(1, inputs.size()); // database object
List<HivePrivilegeObject> outputs = io.getRight();
assertEquals(4, outputs.size()); // 2 tables, 1 function and 1 db
HivePrivilegeObject privilegeObject = outputs.get(0);
assertEquals("input type", HivePrivilegeObjectType.TABLE_OR_VIEW, privilegeObject.getType());
privilegeObject = outputs.get(1);
assertEquals("input type", HivePrivilegeObjectType.TABLE_OR_VIEW, privilegeObject.getType());
privilegeObject = outputs.get(2);
assertEquals("input type", HivePrivilegeObjectType.FUNCTION, privilegeObject.getType());
privilegeObject = outputs.get(3);
assertEquals("input type", HivePrivilegeObjectType.DATABASE, privilegeObject.getType());
}

/**
* @return pair with left value as inputs and right value as outputs,
* passed in current call to authorizer.checkPrivileges
Expand Down
Loading

0 comments on commit fe309b1

Please sign in to comment.