Skip to content

Commit

Permalink
Change the table update calling method to alter_table_with_environmen…
Browse files Browse the repository at this point in the history
…tContext, and then set DO_NOT_UPDATE_STATS to prohibit updating hive table statistics.
  • Loading branch information
gang3.yang committed Nov 19, 2024
1 parent 8b77207 commit d5109b2
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,12 @@
import org.apache.flink.table.hive.LegacyHiveClasses;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.SerDeInfo;
Expand Down Expand Up @@ -436,7 +438,7 @@ private TableSchema getDataTableSchema(Identifier identifier, Table table)
}

return tableSchemaInFileSystem(
getTableLocation(identifier, table), identifier.getBranchNameOrDefault())
getTableLocation(identifier, table), identifier.getBranchNameOrDefault())
.orElseThrow(() -> new TableNotExistException(identifier));
}

Expand Down Expand Up @@ -846,13 +848,18 @@ private void alterTableToHms(Table table, Identifier identifier, TableSchema new
updateHmsTablePars(table, newSchema);
Path location = getTableLocation(identifier, table);
updateHmsTable(table, identifier, newSchema, newSchema.options().get("provider"), location);
EnvironmentContext environmentContext = new EnvironmentContext();
environmentContext.putToProperties(StatsSetupConst.CASCADE, "true");
environmentContext.putToProperties(
StatsSetupConst.DO_NOT_UPDATE_STATS,
options.getString(StatsSetupConst.DO_NOT_UPDATE_STATS, "false"));
clients.execute(
client ->
client.alter_table(
client.alter_table_with_environmentContext(
identifier.getDatabaseName(),
identifier.getTableName(),
table,
true));
environmentContext));
}

@Override
Expand Down Expand Up @@ -960,7 +967,7 @@ public String warehouse() {
return warehouse;
}

private Table getHmsTable(Identifier identifier) throws TableNotExistException {
public Table getHmsTable(Identifier identifier) throws TableNotExistException {
try {
return clients.run(
client ->
Expand All @@ -982,9 +989,9 @@ private Table getHmsTable(Identifier identifier) throws TableNotExistException {
private boolean isPaimonTable(Identifier identifier, Table table) {
return isPaimonTable(table)
&& tableSchemaInFileSystem(
getTableLocation(identifier, table),
identifier.getBranchNameOrDefault())
.isPresent();
getTableLocation(identifier, table),
identifier.getBranchNameOrDefault())
.isPresent();
}

private static boolean isPaimonTable(Table table) {
Expand Down Expand Up @@ -1341,4 +1348,4 @@ public static HiveConf createHiveConf(CatalogContext context) {
public static String possibleHiveConfPath() {
return System.getenv("HIVE_CONF_DIR");
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.paimon.hive;

import org.apache.paimon.catalog.CatalogTestBase;
import org.apache.paimon.catalog.Identifier;
import org.apache.paimon.options.Options;
import org.apache.paimon.schema.Schema;
import org.apache.paimon.schema.SchemaChange;
import org.apache.paimon.types.DataField;
import org.apache.paimon.types.DataTypes;

import org.apache.paimon.shade.guava30.com.google.common.collect.Lists;
import org.apache.paimon.shade.guava30.com.google.common.collect.Maps;

import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.Table;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

import java.util.Collections;
import java.util.UUID;

import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.METASTORECONNECTURLKEY;
import static org.assertj.core.api.Assertions.assertThat;

/** Verify that table stats has been updated. */
public class HiveTableStatsTest extends CatalogTestBase {

@BeforeEach
public void setUp() throws Exception {
super.setUp();
HiveConf hiveConf = new HiveConf();
String jdoConnectionURL = "jdbc:derby:memory:" + UUID.randomUUID();
hiveConf.setVar(METASTORECONNECTURLKEY, jdoConnectionURL + ";create=true");
String metastoreClientClass = "org.apache.hadoop.hive.metastore.HiveMetaStoreClient";
Options catalogOptions = new Options();
catalogOptions.set(StatsSetupConst.DO_NOT_UPDATE_STATS, "true");
catalog =
new HiveCatalog(fileIO, hiveConf, metastoreClientClass, catalogOptions, warehouse);
}

@Test
public void testAlterTable() throws Exception {
catalog.createDatabase("test_db", false);
// Alter table adds a new column to an existing table,but do not update stats
Identifier identifier = Identifier.create("test_db", "test_table");
catalog.createTable(
identifier,
new Schema(
Lists.newArrayList(new DataField(0, "col1", DataTypes.STRING())),
Collections.emptyList(),
Collections.emptyList(),
Maps.newHashMap(),
""),
false);
catalog.alterTable(
identifier,
Lists.newArrayList(
SchemaChange.addColumn("col2", DataTypes.DATE()),
SchemaChange.addColumn("col3", DataTypes.STRING(), "col3 field")),
false);
HiveCatalog hiveCatalog = (HiveCatalog) catalog;
Table table = hiveCatalog.getHmsTable(identifier);
assertThat(table.getParameters().get("COLUMN_STATS_ACCURATE")).isEqualTo(null);
}
}

0 comments on commit d5109b2

Please sign in to comment.