Skip to content

Commit

Permalink
[Hive] Avoid excessive HMS memory usage when executing AlterTable for…
Browse files Browse the repository at this point in the history
… a Paimon table containing a large number of fields (#4549)
  • Loading branch information
GangYang-HX authored Nov 24, 2024
1 parent 16a4058 commit 989a433
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,12 @@
import org.apache.flink.table.hive.LegacyHiveClasses;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.SerDeInfo;
Expand All @@ -85,6 +87,7 @@
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
Expand Down Expand Up @@ -875,11 +878,23 @@ private void alterTableToHms(Table table, Identifier identifier, TableSchema new
updateHmsTable(table, identifier, newSchema, newSchema.options().get("provider"), location);
clients.execute(
client ->
client.alter_table(
client.alter_table_with_environmentContext(
identifier.getDatabaseName(),
identifier.getTableName(),
table,
true));
createHiveEnvironmentContext()));
}

private EnvironmentContext createHiveEnvironmentContext() {
EnvironmentContext environmentContext = new EnvironmentContext();
environmentContext.putToProperties(StatsSetupConst.CASCADE, "true");
if (Objects.isNull(options)) {
return environmentContext;
}
environmentContext.putToProperties(
StatsSetupConst.DO_NOT_UPDATE_STATS,
options.getString(StatsSetupConst.DO_NOT_UPDATE_STATS, "false"));
return environmentContext;
}

@Override
Expand Down Expand Up @@ -1001,7 +1016,7 @@ public String warehouse() {
return warehouse;
}

private Table getHmsTable(Identifier identifier) throws TableNotExistException {
public Table getHmsTable(Identifier identifier) throws TableNotExistException {
try {
return clients.run(
client ->
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.paimon.hive;

import org.apache.paimon.catalog.Catalog;
import org.apache.paimon.catalog.CatalogContext;
import org.apache.paimon.catalog.Identifier;
import org.apache.paimon.fs.FileIO;
import org.apache.paimon.fs.Path;
import org.apache.paimon.options.CatalogOptions;
import org.apache.paimon.options.Options;
import org.apache.paimon.schema.Schema;
import org.apache.paimon.schema.SchemaChange;
import org.apache.paimon.types.DataField;
import org.apache.paimon.types.DataTypes;

import org.apache.paimon.shade.guava30.com.google.common.collect.Lists;
import org.apache.paimon.shade.guava30.com.google.common.collect.Maps;

import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.Table;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;

import java.util.Collections;
import java.util.UUID;

import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.METASTORECONNECTURLKEY;
import static org.assertj.core.api.Assertions.assertThat;

/** Verify that table stats has been updated. */
public class HiveTableStatsTest {
@TempDir java.nio.file.Path tempFile;
protected Catalog catalog;

@BeforeEach
public void setUp() throws Exception {
String warehouse = tempFile.toUri().toString();
HiveConf hiveConf = new HiveConf();
String jdoConnectionURL = "jdbc:derby:memory:" + UUID.randomUUID();
hiveConf.setVar(METASTORECONNECTURLKEY, jdoConnectionURL + ";create=true");
String metastoreClientClass = "org.apache.hadoop.hive.metastore.HiveMetaStoreClient";
Options catalogOptions = new Options();
catalogOptions.set(StatsSetupConst.DO_NOT_UPDATE_STATS, "true");
catalogOptions.set(CatalogOptions.WAREHOUSE, warehouse);
CatalogContext catalogContext = CatalogContext.create(catalogOptions);
FileIO fileIO = FileIO.get(new Path(warehouse), catalogContext);
catalog =
new HiveCatalog(fileIO, hiveConf, metastoreClientClass, catalogOptions, warehouse);
}

@Test
public void testAlterTable() throws Exception {
catalog.createDatabase("test_db", false);
// Alter table adds a new column to an existing table,but do not update stats
Identifier identifier = Identifier.create("test_db", "test_table");
catalog.createTable(
identifier,
new Schema(
Lists.newArrayList(new DataField(0, "col1", DataTypes.STRING())),
Collections.emptyList(),
Collections.emptyList(),
Maps.newHashMap(),
""),
false);
catalog.alterTable(
identifier,
Lists.newArrayList(
SchemaChange.addColumn("col2", DataTypes.DATE()),
SchemaChange.addColumn("col3", DataTypes.STRING(), "col3 field")),
false);
HiveCatalog hiveCatalog = (HiveCatalog) catalog;
Table table = hiveCatalog.getHmsTable(identifier);
assertThat(table.getParameters().get("COLUMN_STATS_ACCURATE")).isEqualTo(null);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.hadoop.hive.metastore.HiveMetaHookLoader;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Table;
Expand Down Expand Up @@ -51,4 +52,11 @@ public void alter_table(
throws InvalidOperationException, MetaException, TException {
throw new TException();
}

@Override
public void alter_table_with_environmentContext(
String defaultDatabaseName, String tblName, Table table, EnvironmentContext env)
throws InvalidOperationException, MetaException, TException {
throw new TException();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.hadoop.hive.metastore.HiveMetaHookLoader;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Table;
Expand Down Expand Up @@ -51,4 +52,11 @@ public void alter_table(
throws InvalidOperationException, MetaException, TException {
throw new TException();
}

@Override
public void alter_table_with_environmentContext(
String defaultDatabaseName, String tblName, Table table, EnvironmentContext env)
throws InvalidOperationException, MetaException, TException {
throw new TException();
}
}

0 comments on commit 989a433

Please sign in to comment.