Skip to content

Commit

Permalink
[test](migrate) move 2 cases from p2 to p0 (apache#36787)
Browse files Browse the repository at this point in the history
- test_external_credit_data
- test_upper_case_column_name
Only move the hive related cases, the iceberg related will be done in
next PR
  • Loading branch information
morningman authored Jun 27, 2024
1 parent 1d2c42c commit a89bb1d
Show file tree
Hide file tree
Showing 39 changed files with 685 additions and 170 deletions.
2 changes: 1 addition & 1 deletion .licenserc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ header:
- "docker/thirdparties/docker-compose/hive/scripts/create_tpch1_orc.hql"
- "docker/thirdparties/docker-compose/hive/scripts/create_tpch1_parquet.hql"
- "docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/"
- "docker/thirdparties/docker-compose/hive/scripts/suites/**"
- "docker/thirdparties/docker-compose/hive/scripts/data/**"
- "docker/thirdparties/docker-compose/iceberg/spark-defaults.conf.tpl"
- "conf/mysql_ssl_default_certificate/*"
- "conf/mysql_ssl_default_certificate/client_certificate/ca.pem"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
create database if not exists multi_catalog;

use multi_catalog;

CREATE external TABLE `datev2_csv`(
`id` int,
`day` date)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
'/user/doris/suites/multi_catalog/datev2_csv'
TBLPROPERTIES (
'transient_lastDdlTime'='1688118691');

msck repair table datev2_csv;

Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash
set -x

CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"

## mkdir and put data to hdfs
cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/

# create table
hive -f "${CUR_DIR}/create_table.hql"

Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
create database if not exists multi_catalog;

use multi_catalog;

CREATE external TABLE `datev2_orc`(
`id` int,
`day` date)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
'/user/doris/suites/multi_catalog/datev2_orc'
TBLPROPERTIES (
'transient_lastDdlTime'='1688118707');

msck repair table datev2_orc;

Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash
set -x

CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"

## mkdir and put data to hdfs
cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/

# create table
hive -f "${CUR_DIR}"/create_table.hql

Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
create database if not exists multi_catalog;

use multi_catalog;

CREATE external TABLE `datev2_parquet`(
`id` int,
`day` date)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/suites/multi_catalog/datev2_parquet'
TBLPROPERTIES (
'transient_lastDdlTime'='1688118725');

msck repair table datev2_parquet;

Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash
set -x

CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"

## mkdir and put data to hdfs
cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/

# create table
hive -f "${CUR_DIR}"/create_table.hql

Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
create database if not exists multi_catalog;
use multi_catalog;

CREATE TABLE `hive_upper_case_orc`(
`id` int,
`name` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
'/user/doris/suites/multi_catalog/hive_upper_case_orc'
TBLPROPERTIES (
'spark.sql.create.version'='3.2.1',
'spark.sql.sources.schema'='{"type":"struct","fields":[{"name":"ID","type":"integer","nullable":true,"metadata":{}},{"name":"NAME","type":"string","nullable":true,"metadata":{}}]}',
'transient_lastDdlTime'='1674189057');

msck repair table hive_upper_case_orc;

Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash
set -x

CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"

## mkdir and put data to hdfs
cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/

# create table
hive -f "${CUR_DIR}"/create_table.hql

Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
create database if not exists multi_catalog;
use multi_catalog;

CREATE TABLE `hive_upper_case_parquet`(
`id` int,
`name` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/suites/multi_catalog/hive_upper_case_parquet'
TBLPROPERTIES (
'spark.sql.create.version'='3.2.1',
'spark.sql.sources.schema'='{"type":"struct","fields":[{"name":"ID","type":"integer","nullable":true,"metadata":{}},{"name":"NAME","type":"string","nullable":true,"metadata":{}}]}',
'transient_lastDdlTime'='1674189051');

msck repair table hive_upper_case_parquet;

Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash
set -x

CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"

## mkdir and put data to hdfs
cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/

# create table
hive -f "${CUR_DIR}"/create_table.hql

Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
create database if not exists multi_catalog;
use multi_catalog;

CREATE TABLE `parquet_lz4_compression`(
`col_int` int,
`col_smallint` smallint,
`col_tinyint` tinyint,
`col_bigint` bigint,
`col_float` float,
`col_double` double,
`col_boolean` boolean,
`col_string` string,
`col_char` char(10),
`col_varchar` varchar(25),
`col_date` date,
`col_timestamp` timestamp,
`col_decimal` decimal(10,2))
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/suites/multi_catalog/parquet_lz4_compression'
TBLPROPERTIES (
'parquet.compression'='LZ4',
'transient_lastDdlTime'='1700723950');

msck repair table parquet_lz4_compression;

Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash
set -x

CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"

## mkdir and put data to hdfs
cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/

# create table
hive -f "${CUR_DIR}"/create_table.hql

Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
create database if not exists multi_catalog;
use multi_catalog;

CREATE TABLE `parquet_lzo_compression`(
`col_int` int,
`col_smallint` smallint,
`col_tinyint` tinyint,
`col_bigint` bigint,
`col_float` float,
`col_double` double,
`col_boolean` boolean,
`col_string` string,
`col_char` char(10),
`col_varchar` varchar(25),
`col_date` date,
`col_timestamp` timestamp,
`col_decimal` decimal(10,2))
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/suites/multi_catalog/parquet_lzo_compression'
TBLPROPERTIES (
'parquet.compression'='LZO',
'transient_lastDdlTime'='1701173147');

msck repair table parquet_lzo_compression;

Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash
set -x

CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"

## mkdir and put data to hdfs
cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/

# create table
hive -f "${CUR_DIR}"/create_table.hql

Original file line number Diff line number Diff line change
Expand Up @@ -144,63 +144,9 @@ STORED AS INPUTFORMAT
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
'/user/doris/suites/test_hive_compress_type/test_compress_partitioned'
'/user/doris/suites/multi_catalog/test_compress_partitioned'
TBLPROPERTIES (
'transient_lastDdlTime'='1692589095');

CREATE TABLE `parquet_lz4_compression`(
`col_int` int,
`col_smallint` smallint,
`col_tinyint` tinyint,
`col_bigint` bigint,
`col_float` float,
`col_double` double,
`col_boolean` boolean,
`col_string` string,
`col_char` char(10),
`col_varchar` varchar(25),
`col_date` date,
`col_timestamp` timestamp,
`col_decimal` decimal(10,2))
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/suites/test_hive_compress_type/parquet_lz4_compression'
TBLPROPERTIES (
'parquet.compression'='LZ4',
'transient_lastDdlTime'='1700723950');

CREATE TABLE `parquet_lzo_compression`(
`col_int` int,
`col_smallint` smallint,
`col_tinyint` tinyint,
`col_bigint` bigint,
`col_float` float,
`col_double` double,
`col_boolean` boolean,
`col_string` string,
`col_char` char(10),
`col_varchar` varchar(25),
`col_date` date,
`col_timestamp` timestamp,
`col_decimal` decimal(10,2))
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/suites/test_hive_compress_type/parquet_lzo_compression'
TBLPROPERTIES (
'parquet.compression'='LZO',
'transient_lastDdlTime'='1701173147');

msck repair table test_compress_partitioned;
msck repair table parquet_lz4_compression;
msck repair table parquet_lzo_compression;

Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash
set -x

CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"

if [[ ! -d "${CUR_DIR}/data" ]]; then
echo "${CUR_DIR}/data does not exist"
cd "${CUR_DIR}" && rm -f data.tar.gz \
&& curl -O https://s3BucketName.s3Endpoint/regression/datalake/pipeline_data/multi_catalog/test_compress_partitioned/data.tar.gz \
&& tar xzf data.tar.gz
cd -
else
echo "${CUR_DIR}/data exist, continue !"
fi

## mkdir and put data to hdfs
hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/

# create table
hive -f "${CUR_DIR}"/create_table.hql

Loading

0 comments on commit a89bb1d

Please sign in to comment.