Skip to content

Commit

Permalink
Small backward compatibility updates for old versions of Hive (#121)
Browse files Browse the repository at this point in the history
  • Loading branch information
jphalip authored Jul 29, 2024
1 parent 486bbee commit b3ab687
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/
package com.google.cloud.hive.bigquery.connector;

import com.google.cloud.hive.bigquery.connector.config.HiveBigQueryConfig;
import java.util.*;
import javax.annotation.Nullable;
import org.apache.hadoop.conf.Configuration;
Expand All @@ -39,7 +40,7 @@ public static StructObjectInspector getRowObjectInspector(Configuration conf) {
String columnNameProperty = conf.get(serdeConstants.LIST_COLUMNS);
String columnTypeProperty = conf.get(serdeConstants.LIST_COLUMN_TYPES);
String columnNameDelimiter =
conf.get(serdeConstants.COLUMN_NAME_DELIMITER, String.valueOf(SerDeUtils.COMMA));
conf.get(HiveBigQueryConfig.HIVE_COLUMN_NAME_DELIMITER, String.valueOf(SerDeUtils.COMMA));
return getRowObjectInspector(columnNameProperty, columnTypeProperty, columnNameDelimiter);
}

Expand All @@ -48,7 +49,8 @@ public static StructObjectInspector getRowObjectInspector(Map<Object, Object> ta
String columnTypeProperty = (String) tableProperties.get(serdeConstants.LIST_COLUMN_TYPES);
String columnNameDelimiter =
String.valueOf(
tableProperties.getOrDefault(serdeConstants.COLUMN_NAME_DELIMITER, SerDeUtils.COMMA));
tableProperties.getOrDefault(
HiveBigQueryConfig.HIVE_COLUMN_NAME_DELIMITER, SerDeUtils.COMMA));
return getRowObjectInspector(columnNameProperty, columnTypeProperty, columnNameDelimiter);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,11 @@ public class HiveBigQueryConfig
public static final String STREAM_FILE_EXTENSION = "stream";
public static final String JOB_DETAILS_FILE = "job-details.json";
public static final String QUERY_ID = "bq.connector.query.id";
// Normally this constant is defined in `serdeConstants.COLUMN_NAME_DELIMITER` but it's not
// available in Hive 1 (see:
// https://github.com/apache/hive/blob/release-1.2.1/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java),
// so we redefine it here:
public static final String HIVE_COLUMN_NAME_DELIMITER = "column.name.delimiter";

// For internal use only
public static final String CONNECTOR_IN_TEST = "hive.bq.connector.in.test";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,8 @@ public static InputSplit[] createSplitsFromBigQueryReadStreams(JobConf jobConf,

// Retrieve the table's column names
String columnNameDelimiter =
jobConf.get(serdeConstants.COLUMN_NAME_DELIMITER, String.valueOf(SerDeUtils.COMMA));
jobConf.get(
HiveBigQueryConfig.HIVE_COLUMN_NAME_DELIMITER, String.valueOf(SerDeUtils.COMMA));
List<String> columnNames =
new ArrayList<>(
Arrays.asList(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ public static DataFileWriter<GenericRecord> createDataFileWriter(
GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<>(schema);
DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(gdw);
int level =
jobConf.getInt(AvroOutputFormat.DEFLATE_LEVEL_KEY, CodecFactory.DEFAULT_DEFLATE_LEVEL);
jobConf.getInt(AvroOutputFormat.DEFLATE_LEVEL_KEY, 6); // 6 = XZCodec.DEFAULT_COMPRESSION
String codecName = jobConf.get(AvroJob.OUTPUT_CODEC, "deflate");
CodecFactory factory =
codecName.equals(DataFileConstants.DEFLATE_CODEC)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@

import com.google.cloud.hive.bigquery.connector.HiveCompat;
import com.google.cloud.hive.bigquery.connector.config.HiveBigQueryConfig;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.math.RoundingMode;
import java.nio.ByteBuffer;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.serde2.io.*;
Expand All @@ -36,25 +38,28 @@ public class BigQueryValueConverter {

// In Hive 1, HiveDecimal doesn't have a `bigIntegerBytesScaled()` method.
// We use this static variable to identify if the method is available.
private static boolean hasBigIntegerBytesScaled;
private static Method bigIntegerBytesScaled;

static {
try {
Method method = HiveDecimal.class.getMethod("bigIntegerBytesScaled", int.class);
hasBigIntegerBytesScaled = (method != null);
bigIntegerBytesScaled = HiveDecimal.class.getMethod("bigIntegerBytesScaled", int.class);
} catch (NoSuchMethodException e) {
hasBigIntegerBytesScaled = false;
bigIntegerBytesScaled = null;
}
}

public static byte[] getBigIntegerBytesScaled(HiveDecimal hiveDecimal, int scale) {
if (hasBigIntegerBytesScaled) {
if (bigIntegerBytesScaled != null) {
// Use bigIntegerBytesScaled if available
return hiveDecimal.bigIntegerBytesScaled(scale);
try {
return (byte[]) bigIntegerBytesScaled.invoke(hiveDecimal, scale);
} catch (IllegalAccessException | InvocationTargetException e) {
throw new RuntimeException(e);
}
} else {
// Alternative approach for older Hive versions
BigDecimal decimal = hiveDecimal.bigDecimalValue();
BigDecimal scaledDecimal = decimal.setScale(scale, BigDecimal.ROUND_HALF_UP);
BigDecimal scaledDecimal = decimal.setScale(scale, RoundingMode.HALF_UP);
BigInteger bigInt = scaledDecimal.unscaledValue();
return bigInt.toByteArray();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/
package com.google.cloud.hive.bigquery.connector.utils.hcatalog;

import com.google.cloud.hive.bigquery.connector.config.HiveBigQueryConfig;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
Expand Down Expand Up @@ -91,7 +92,7 @@ public static void updateTablePropertiesForHCatalog(
private static Properties createProperties(HCatTableInfo tableInfo) {
Properties properties = new Properties();
properties.put(hive_metastoreConstants.META_TABLE_LOCATION, tableInfo.getTableLocation());
properties.put(serdeConstants.COLUMN_NAME_DELIMITER, String.valueOf(SerDeUtils.COMMA));
properties.put(HiveBigQueryConfig.HIVE_COLUMN_NAME_DELIMITER, String.valueOf(SerDeUtils.COMMA));
properties.put(
serdeConstants.LIST_COLUMNS,
String.join(String.valueOf(SerDeUtils.COMMA), tableInfo.getDataColumns().getFieldNames()));
Expand Down

0 comments on commit b3ab687

Please sign in to comment.