From 37f8ab424a3d5a8a018e69a2728d415549e2bc05 Mon Sep 17 00:00:00 2001 From: Yukang-Lian Date: Tue, 17 Dec 2024 11:05:22 +0800 Subject: [PATCH] 2 --- .../test_cloud_follower_show_data.groovy | 288 +++++++++++ .../test_cloud_mtmv_show_data.groovy | 381 ++++++++++++++ ...hange_add_and_drop_column_show_data.groovy | 382 ++++++++++++++ ...change_add_and_drop_index_show_data.groovy | 382 ++++++++++++++ ...ema_change_reorder_column_show_data.groovy | 355 +++++++++++++ ...t_cloud_delete_table_rows_show_data.groovy | 426 ++++++++++++++++ ...test_cloud_drop_partition_show_data.groovy | 353 +++++++++++++ .../test_cloud_drop_table_show_data.groovy | 426 ++++++++++++++++ ...t_cloud_recover_partition_show_data.groovy | 379 ++++++++++++++ .../test_cloud_recover_table_show_data.groovy | 465 ++++++++++++++++++ ...test_cloud_truncate_table_show_data.groovy | 426 ++++++++++++++++ ..._cloud_disable_compaction_show_data.groovy | 263 ++++++++++ ...t_cloud_inverted_index_v1_show_data.groovy | 267 ++++++++++ ...t_cloud_inverted_index_v2_show_data.groovy | 267 ++++++++++ .../test_cloud_lz4_show_data.groovy | 264 ++++++++++ .../test_cloud_zstd_show_data.groovy | 264 ++++++++++ .../test_cloud_agg_show_data.groovy | 263 ++++++++++ .../test_cloud_dup_show_data.groovy | 263 ++++++++++ .../test_cloud_mor_show_data.groovy | 264 ++++++++++ ..._cloud_mow_partial_update_show_data.groovy | 293 +++++++++++ .../test_cloud_mow_show_data.groovy} | 6 +- 21 files changed, 6674 insertions(+), 3 deletions(-) create mode 100644 regression-test/suites/show_data_p2/test_cloud_follower_show_data.groovy create mode 100644 regression-test/suites/show_data_p2/test_table_modification/test_cloud_mtmv_show_data.groovy create mode 100644 regression-test/suites/show_data_p2/test_table_modification/test_cloud_schema_change_add_and_drop_column_show_data.groovy create mode 100644 regression-test/suites/show_data_p2/test_table_modification/test_cloud_schema_change_add_and_drop_index_show_data.groovy create mode 100644 regression-test/suites/show_data_p2/test_table_modification/test_cloud_schema_change_reorder_column_show_data.groovy create mode 100644 regression-test/suites/show_data_p2/test_table_operation/test_cloud_delete_table_rows_show_data.groovy create mode 100644 regression-test/suites/show_data_p2/test_table_operation/test_cloud_drop_partition_show_data.groovy create mode 100644 regression-test/suites/show_data_p2/test_table_operation/test_cloud_drop_table_show_data.groovy create mode 100644 regression-test/suites/show_data_p2/test_table_operation/test_cloud_recover_partition_show_data.groovy create mode 100644 regression-test/suites/show_data_p2/test_table_operation/test_cloud_recover_table_show_data.groovy create mode 100644 regression-test/suites/show_data_p2/test_table_operation/test_cloud_truncate_table_show_data.groovy create mode 100644 regression-test/suites/show_data_p2/test_table_property/test_cloud_disable_compaction_show_data.groovy create mode 100644 regression-test/suites/show_data_p2/test_table_property/test_cloud_inverted_index_v1_show_data.groovy create mode 100644 regression-test/suites/show_data_p2/test_table_property/test_cloud_inverted_index_v2_show_data.groovy create mode 100644 regression-test/suites/show_data_p2/test_table_property/test_cloud_lz4_show_data.groovy create mode 100644 regression-test/suites/show_data_p2/test_table_property/test_cloud_zstd_show_data.groovy create mode 100644 regression-test/suites/show_data_p2/test_table_type/test_cloud_agg_show_data.groovy create mode 100644 regression-test/suites/show_data_p2/test_table_type/test_cloud_dup_show_data.groovy create mode 100644 regression-test/suites/show_data_p2/test_table_type/test_cloud_mor_show_data.groovy create mode 100644 regression-test/suites/show_data_p2/test_table_type/test_cloud_mow_partial_update_show_data.groovy rename regression-test/suites/show_data_p2/{test_show_mow_data.groovy => test_table_type/test_cloud_mow_show_data.groovy} (98%) diff --git a/regression-test/suites/show_data_p2/test_cloud_follower_show_data.groovy b/regression-test/suites/show_data_p2/test_cloud_follower_show_data.groovy new file mode 100644 index 00000000000000..6b1a24eb0171e7 --- /dev/null +++ b/regression-test/suites/show_data_p2/test_cloud_follower_show_data.groovy @@ -0,0 +1,288 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// The cases is copied from https://github.com/trinodb/trino/tree/master +// /testing/trino-product-tests/src/main/resources/sql-tests/testcases/tpcds +// and modified by Doris. +import org.codehaus.groovy.runtime.IOGroovyMethods + +// loading one data 10 times, expect data size not rising +suite("test_cloud_follower_show_data","p2") { + //cloud-mode + if (!isCloudMode()) { + logger.info("not cloud mode, not run") + return + } + + def get_follower_ip = { + def result = sql """show frontends;""" + logger.info("result:" + result) + for (int i = 0; i < result.size(); i++) { + if (result[i][7] == "FOLLOWER" && result[i][8] == "false" && result[i][11] == "true") { + return result[i][1] + } + } + return "null" + } + + def repeate_stream_load_same_data = { String tableName, int loadTimes -> + for (int i = 0; i < loadTimes; i++) { + streamLoad { + table tableName + set 'column_separator', '|' + set 'compress_type', 'GZ' + file """${getS3Url()}/regression/tpch/sf0.1/lineitem.tbl.gz""" + time 10000 // limit inflight 10s + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(json.NumberTotalRows, json.NumberLoadedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + } + + def get_tablets_from_table = { String table -> + def res = sql_return_maparray """show tablets from ${table}""" + return res + } + + def show_tablet_compaction = { HashMap tablet -> + StringBuilder sb = new StringBuilder(); + sb.append("curl -X GET ") + sb.append(tablet["CompactionStatus"]) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + return parseJson(out.trim()) + } + + def trigger_tablet_compaction = { HashMap tablet, String compact_type -> + //support trigger base/cumulative/full compaction + def tabletStatusBeforeCompaction = show_tablet_compaction(tablet) + + String tabletInBe = tablet + String showCompactionStatus = tablet["CompactionStatus"] + String triggerCompactionUrl = showCompactionStatus.split("show")[0] + "run?tablet_id=" + tablet["TabletId"] + "&compact_type=" + compact_type + StringBuilder sb = new StringBuilder(); + sb.append("curl -X POST ") + sb.append(triggerCompactionUrl) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + def outJson = parseJson(out) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + // if code = 0 means compaction happend, need to check + // other condition may indicate no suitable compaction condition + if ( code == 0 && outJson.status.toLowerCase() == "success" ){ + def compactionStatus = "RUNNING" + def tabletStatusAfterCompaction = null + long startTime = System.currentTimeMillis() + long timeoutTimestamp = startTime + 5 * 60 * 1000 // 5 min + do { + tabletStatusAfterCompaction = show_tablet_compaction(tablet) + logger.info("tabletStatusAfterCompaction class: " + tabletStatusAfterCompaction.class) + logger.info("hhhhhh: " + tabletStatusAfterCompaction.toString()) + if (tabletStatusAfterCompaction.rowsets.size() < tabletStatusBeforeCompaction.rowsets.size()){ + compactionStatus = 'FINISHED' + } + Thread.sleep(60 * 1000) + } while (timeoutTimestamp > System.currentTimeMillis() && (status != 'FINISHED')) + + if (status != "FINISHED") { + logger.info("compaction not Finish or failed") + return false + } + } + } + + def trigger_compaction = { List> tablets -> + for(def tablet: tablets) { + trigger_tablet_compaction(tablet, "cumulative") + trigger_tablet_compaction(tablet, "base") + trigger_tablet_compaction(tablet, "full") + } + } + + def caculate_table_data_size_in_backend_storage = { List> tablets -> + storageType = context.config.otherConfigs.get("storageProvider") + Double storageSize = 0 + + List tabletIds = [] + for(def tablet: tablets) { + tabletIds.add(tablet["TabletId"]) + } + + if (storageType.toLowerCase() == "oss") { + //cbs means cluster backend storage + ak = context.config.otherConfigs.get("cbsS3Ak") + sk = context.config.otherConfigs.get("cbsS3Sk") + endpoint = context.config.otherConfigs.get("cbsS3Endpoint") + bucketName = context.config.otherConfigs.get("cbsS3Bucket") + storagePrefix = context.config.otherConfigs.get("cbsS3Prefix") + + client = initOssClient(ak, sk, endpoint) + for(String tabletId: tabletIds) { + storageSize += calculateFolderLength(client, bucketName, storagePrefix + "/data/" + tabletId) + } + shutDownOssClient(client) + } + + if (storageType.toLowerCase() == "hdfs") { + fsName = context.config.otherConfigs.get("cbsFsName") + isKerberosFs = context.config.otherConfigs.get("cbsFsKerberos") + fsUser = context.config.otherConfigs.get("cbsFsUser") + storagePrefix = context.config.otherConfigs.get("cbsFsPrefix") + } + + return storageSize + } + + def translate_different_unit_to_MB = { String size, String unitField -> + Double sizeKb = 0.0 + if (unitField == "KB") { + sizeKb = Double.parseDouble(size) / 1024 + } else if (unitField == "MB") { + sizeKb = Double.parseDouble(size) + } else if (unitField == "GB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 + } else if (unitField == "TB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 * 1024 + } + return sizeKb + } + + def show_table_data_size_through_mysql = { String table, String url-> + connect("root", "", url) { + try { + def mysqlShowDataSize = 0L + def res = sql_return_maparray " show data from ${table}" + def tableSizeInfo = res[0] + def fields = tableSizeInfo["Size"].split(" ") + if (fields.length == 2 ){ + def sizeField = fields[0] + def unitField = fields[1] + mysqlShowDataSize = translate_different_unit_to_MB(sizeField, unitField) + } + return mysqlShowDataSize + } catch (Exception e) { + log.info(e.getMessage()) + } + } + } + + def caculate_table_data_size_through_api = { List> tablets -> + Double apiCaculateSize = 0 + for (HashMap tablet in tablets) { + def tabletStatus = show_tablet_compaction(tablet) + + for(String rowset: tabletStatus.rowsets){ + def fields = rowset.split(" ") + if (fields.length == 7) { + def sizeField = fields[-2] // the last field(size) + def unitField = fields[-1] // The second to last field(unit) + // 转换成 KB + apiCaculateSize += translate_different_unit_to_MB(sizeField, unitField ) + } + } + } + + return apiCaculateSize + } + + def main = { + def switch_ip = get_follower_ip() + def url + if (switch_ip != "null") { + def tokens = context.config.jdbcUrl.split('/') + url = tokens[0] + "//" + tokens[2] + "/" + "information_schema" + "?" + def new_jdbc_url = url.replaceAll(/\/\/[0-9.]+:/, "//${switch_ip}:") + logger.info("new_jdbc_url: " + new_jdbc_url) + } + tableName="test_cloud_mow_show_data" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL + ) + UNIQUE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName, url)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + } + + main() +} \ No newline at end of file diff --git a/regression-test/suites/show_data_p2/test_table_modification/test_cloud_mtmv_show_data.groovy b/regression-test/suites/show_data_p2/test_table_modification/test_cloud_mtmv_show_data.groovy new file mode 100644 index 00000000000000..8df8591a70855f --- /dev/null +++ b/regression-test/suites/show_data_p2/test_table_modification/test_cloud_mtmv_show_data.groovy @@ -0,0 +1,381 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// The cases is copied from https://github.com/trinodb/trino/tree/master +// /testing/trino-product-tests/src/main/resources/sql-tests/testcases/tpcds +// and modified by Doris. +import org.codehaus.groovy.runtime.IOGroovyMethods + +// loading one data 10 times, expect data size not rising +suite("test_cloud_mtmv_show_data","p2") { + //cloud-mode + if (!isCloudMode()) { + logger.info("not cloud mode, not run") + return + } + + String db = context.config.getDbNameByFile(context.file) + + def repeate_stream_load_same_data = { String tableName, int loadTimes -> + for (int i = 0; i < loadTimes; i++) { + streamLoad { + table tableName + set 'column_separator', '|' + set 'compress_type', 'GZ' + file """${getS3Url()}/regression/tpch/sf0.1/lineitem.tbl.gz""" + time 10000 // limit inflight 10s + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(json.NumberTotalRows, json.NumberLoadedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + } + + def get_tablets_from_table = { String table -> + def res = sql_return_maparray """show tablets from ${table}""" + return res + } + + def show_tablet_compaction = { HashMap tablet -> + StringBuilder sb = new StringBuilder(); + sb.append("curl -X GET ") + sb.append(tablet["CompactionStatus"]) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + return parseJson(out.trim()) + } + + def trigger_tablet_compaction = { HashMap tablet, String compact_type -> + //support trigger base/cumulative/full compaction + def tabletStatusBeforeCompaction = show_tablet_compaction(tablet) + + String tabletInBe = tablet + String showCompactionStatus = tablet["CompactionStatus"] + String triggerCompactionUrl = showCompactionStatus.split("show")[0] + "run?tablet_id=" + tablet["TabletId"] + "&compact_type=" + compact_type + StringBuilder sb = new StringBuilder(); + sb.append("curl -X POST ") + sb.append(triggerCompactionUrl) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + def outJson = parseJson(out) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + // if code = 0 means compaction happend, need to check + // other condition may indicate no suitable compaction condition + if ( code == 0 && outJson.status.toLowerCase() == "success" ){ + def compactionStatus = "RUNNING" + def tabletStatusAfterCompaction = null + long startTime = System.currentTimeMillis() + long timeoutTimestamp = startTime + 5 * 60 * 1000 // 5 min + do { + tabletStatusAfterCompaction = show_tablet_compaction(tablet) + logger.info("tabletStatusAfterCompaction class: " + tabletStatusAfterCompaction.class) + logger.info("hhhhhh: " + tabletStatusAfterCompaction.toString()) + if (tabletStatusAfterCompaction.rowsets.size() < tabletStatusBeforeCompaction.rowsets.size()){ + compactionStatus = 'FINISHED' + } + Thread.sleep(60 * 1000) + } while (timeoutTimestamp > System.currentTimeMillis() && (status != 'FINISHED')) + + if (status != "FINISHED") { + logger.info("compaction not Finish or failed") + return false + } + } + } + + def trigger_compaction = { List> tablets -> + for(def tablet: tablets) { + trigger_tablet_compaction(tablet, "cumulative") + trigger_tablet_compaction(tablet, "base") + trigger_tablet_compaction(tablet, "full") + } + } + + def caculate_table_data_size_in_backend_storage = { List> tablets -> + storageType = context.config.otherConfigs.get("storageProvider") + Double storageSize = 0 + + List tabletIds = [] + for(def tablet: tablets) { + tabletIds.add(tablet["TabletId"]) + } + + if (storageType.toLowerCase() == "oss") { + //cbs means cluster backend storage + ak = context.config.otherConfigs.get("cbsS3Ak") + sk = context.config.otherConfigs.get("cbsS3Sk") + endpoint = context.config.otherConfigs.get("cbsS3Endpoint") + bucketName = context.config.otherConfigs.get("cbsS3Bucket") + storagePrefix = context.config.otherConfigs.get("cbsS3Prefix") + + client = initOssClient(ak, sk, endpoint) + for(String tabletId: tabletIds) { + storageSize += calculateFolderLength(client, bucketName, storagePrefix + "/data/" + tabletId) + } + shutDownOssClient(client) + } + + if (storageType.toLowerCase() == "hdfs") { + fsName = context.config.otherConfigs.get("cbsFsName") + isKerberosFs = context.config.otherConfigs.get("cbsFsKerberos") + fsUser = context.config.otherConfigs.get("cbsFsUser") + storagePrefix = context.config.otherConfigs.get("cbsFsPrefix") + } + + return storageSize + } + + def translate_different_unit_to_MB = { String size, String unitField -> + Double sizeKb = 0.0 + if (unitField == "KB") { + sizeKb = Double.parseDouble(size) / 1024 + } else if (unitField == "MB") { + sizeKb = Double.parseDouble(size) + } else if (unitField == "GB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 + } else if (unitField == "TB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 * 1024 + } + return sizeKb + } + + def show_table_data_size_through_mysql = { String table -> + def mysqlShowDataSize = 0L + def res = sql_return_maparray " show data from ${table}" + def tableSizeInfo = res[0] + def fields = tableSizeInfo["Size"].split(" ") + if (fields.length == 2 ){ + def sizeField = fields[0] + def unitField = fields[1] + mysqlShowDataSize = translate_different_unit_to_MB(sizeField, unitField) + } + return mysqlShowDataSize + } + + def caculate_table_data_size_through_api = { List> tablets -> + Double apiCaculateSize = 0 + for (HashMap tablet in tablets) { + def tabletStatus = show_tablet_compaction(tablet) + + for(String rowset: tabletStatus.rowsets){ + def fields = rowset.split(" ") + if (fields.length == 7) { + def sizeField = fields[-2] // the last field(size) + def unitField = fields[-1] // The second to last field(unit) + // 转换成 KB + apiCaculateSize += translate_different_unit_to_MB(sizeField, unitField ) + } + } + } + + return apiCaculateSize + } + + def main = { + tableName="test_cloud_mtmv_show_data" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL + ) + UNIQUE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + + def mtmvName="test_cloud_mtmv_show_data_mtmv" + sql """DROP MATERIALIZED VIEW IF EXISTS ${mtmvName}""" + sql""" + CREATE MATERIALIZED VIEW ${mtmvName} + BUILD DEFERRED REFRESH COMPLETE ON MANUAL + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1') + AS + select * from + (select 1 as l_orderkey,'2023-12-10' as l_shipdate) as c_lineitem + left join orders on c_lineitem.l_orderkey = orders.o_orderkey and c_lineitem.l_shipdate = o_orderdate; + """ + sql """refresh materialized view ${mtmvName} auto;""" + def job_name = getJobName(db, "${mtmvName}"); + waitingMTMVTaskFinished(job_name) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + } + + def main_index = { + tableName="test_cloud_mtmv_show_data" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL, + index index_SHIPINSTRUCT (L_SHIPINSTRUCT) using inverted, + index index_SHIPMODE (L_SHIPMODE) using inverted, + index index_COMMENT (L_COMMENT) using inverted + ) + UNIQUE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + def mtmvName="test_cloud_mtmv_show_data_mtmv_index" + sql """DROP MATERIALIZED VIEW IF EXISTS ${mtmvName}""" + sql""" + CREATE MATERIALIZED VIEW ${mtmvName} + BUILD DEFERRED REFRESH COMPLETE ON MANUAL + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1') + AS + select * from + (select 1 as l_orderkey,'2023-12-10' as l_shipdate) as c_lineitem + left join orders on c_lineitem.l_orderkey = orders.o_orderkey and c_lineitem.l_shipdate = o_orderdate; + """ + sql """refresh materialized view ${mtmvName} auto;""" + def job_name = getJobName(db, "${mtmvName}"); + waitingMTMVTaskFinished(job_name) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + } + + main() + main_index() +} \ No newline at end of file diff --git a/regression-test/suites/show_data_p2/test_table_modification/test_cloud_schema_change_add_and_drop_column_show_data.groovy b/regression-test/suites/show_data_p2/test_table_modification/test_cloud_schema_change_add_and_drop_column_show_data.groovy new file mode 100644 index 00000000000000..760984564654ac --- /dev/null +++ b/regression-test/suites/show_data_p2/test_table_modification/test_cloud_schema_change_add_and_drop_column_show_data.groovy @@ -0,0 +1,382 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// The cases is copied from https://github.com/trinodb/trino/tree/master +// /testing/trino-product-tests/src/main/resources/sql-tests/testcases/tpcds +// and modified by Doris. +import org.codehaus.groovy.runtime.IOGroovyMethods + +// loading one data 10 times, expect data size not rising +suite("test_cloud_schema_change_add_and_drop_column_show_data","p2") { + //cloud-mode + if (!isCloudMode()) { + logger.info("not cloud mode, not run") + return + } + + def repeate_stream_load_same_data = { String tableName, int loadTimes -> + for (int i = 0; i < loadTimes; i++) { + streamLoad { + table tableName + set 'column_separator', '|' + set 'compress_type', 'GZ' + file """${getS3Url()}/regression/tpch/sf0.1/lineitem.tbl.gz""" + time 10000 // limit inflight 10s + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(json.NumberTotalRows, json.NumberLoadedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + } + + def get_tablets_from_table = { String table -> + def res = sql_return_maparray """show tablets from ${table}""" + return res + } + + def show_tablet_compaction = { HashMap tablet -> + StringBuilder sb = new StringBuilder(); + sb.append("curl -X GET ") + sb.append(tablet["CompactionStatus"]) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + return parseJson(out.trim()) + } + + def trigger_tablet_compaction = { HashMap tablet, String compact_type -> + //support trigger base/cumulative/full compaction + def tabletStatusBeforeCompaction = show_tablet_compaction(tablet) + + String tabletInBe = tablet + String showCompactionStatus = tablet["CompactionStatus"] + String triggerCompactionUrl = showCompactionStatus.split("show")[0] + "run?tablet_id=" + tablet["TabletId"] + "&compact_type=" + compact_type + StringBuilder sb = new StringBuilder(); + sb.append("curl -X POST ") + sb.append(triggerCompactionUrl) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + def outJson = parseJson(out) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + // if code = 0 means compaction happend, need to check + // other condition may indicate no suitable compaction condition + if ( code == 0 && outJson.status.toLowerCase() == "success" ){ + def compactionStatus = "RUNNING" + def tabletStatusAfterCompaction = null + long startTime = System.currentTimeMillis() + long timeoutTimestamp = startTime + 5 * 60 * 1000 // 5 min + do { + tabletStatusAfterCompaction = show_tablet_compaction(tablet) + logger.info("tabletStatusAfterCompaction class: " + tabletStatusAfterCompaction.class) + logger.info("hhhhhh: " + tabletStatusAfterCompaction.toString()) + if (tabletStatusAfterCompaction.rowsets.size() < tabletStatusBeforeCompaction.rowsets.size()){ + compactionStatus = 'FINISHED' + } + Thread.sleep(60 * 1000) + } while (timeoutTimestamp > System.currentTimeMillis() && (status != 'FINISHED')) + + if (status != "FINISHED") { + logger.info("compaction not Finish or failed") + return false + } + } + } + + def trigger_compaction = { List> tablets -> + for(def tablet: tablets) { + trigger_tablet_compaction(tablet, "cumulative") + trigger_tablet_compaction(tablet, "base") + trigger_tablet_compaction(tablet, "full") + } + } + + def caculate_table_data_size_in_backend_storage = { List> tablets -> + storageType = context.config.otherConfigs.get("storageProvider") + Double storageSize = 0 + + List tabletIds = [] + for(def tablet: tablets) { + tabletIds.add(tablet["TabletId"]) + } + + if (storageType.toLowerCase() == "oss") { + //cbs means cluster backend storage + ak = context.config.otherConfigs.get("cbsS3Ak") + sk = context.config.otherConfigs.get("cbsS3Sk") + endpoint = context.config.otherConfigs.get("cbsS3Endpoint") + bucketName = context.config.otherConfigs.get("cbsS3Bucket") + storagePrefix = context.config.otherConfigs.get("cbsS3Prefix") + + client = initOssClient(ak, sk, endpoint) + for(String tabletId: tabletIds) { + storageSize += calculateFolderLength(client, bucketName, storagePrefix + "/data/" + tabletId) + } + shutDownOssClient(client) + } + + if (storageType.toLowerCase() == "hdfs") { + fsName = context.config.otherConfigs.get("cbsFsName") + isKerberosFs = context.config.otherConfigs.get("cbsFsKerberos") + fsUser = context.config.otherConfigs.get("cbsFsUser") + storagePrefix = context.config.otherConfigs.get("cbsFsPrefix") + } + + return storageSize + } + + def translate_different_unit_to_MB = { String size, String unitField -> + Double sizeKb = 0.0 + if (unitField == "KB") { + sizeKb = Double.parseDouble(size) / 1024 + } else if (unitField == "MB") { + sizeKb = Double.parseDouble(size) + } else if (unitField == "GB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 + } else if (unitField == "TB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 * 1024 + } + return sizeKb + } + + def show_table_data_size_through_mysql = { String table -> + def mysqlShowDataSize = 0L + def res = sql_return_maparray " show data from ${table}" + def tableSizeInfo = res[0] + def fields = tableSizeInfo["Size"].split(" ") + if (fields.length == 2 ){ + def sizeField = fields[0] + def unitField = fields[1] + mysqlShowDataSize = translate_different_unit_to_MB(sizeField, unitField) + } + return mysqlShowDataSize + } + + def caculate_table_data_size_through_api = { List> tablets -> + Double apiCaculateSize = 0 + for (HashMap tablet in tablets) { + def tabletStatus = show_tablet_compaction(tablet) + + for(String rowset: tabletStatus.rowsets){ + def fields = rowset.split(" ") + if (fields.length == 7) { + def sizeField = fields[-2] // the last field(size) + def unitField = fields[-1] // The second to last field(unit) + // 转换成 KB + apiCaculateSize += translate_different_unit_to_MB(sizeField, unitField ) + } + } + } + + return apiCaculateSize + } + + def schema_change_add_column = { String tableName, Map sizeRecords, List> tablets -> + sql """ + ALTER TABLE ${tableName} add column l_test int after L_COMMENT; + """ + + waitForSchemaChangeDone { + sql """ SHOW ALTER TABLE COLUMN WHERE TableName='${tableName}' ORDER BY createtime DESC LIMIT 1 """ + time 600 + } + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after add column, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][-1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][-1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][-1]) + } + + def schema_change_drop_column = { String tableName, Map sizeRecords, List> tablets -> + sql """ + ALTER TABLE ${tableName} drop column L_COMMENT; + """ + + waitForSchemaChangeDone { + sql """ SHOW ALTER TABLE COLUMN WHERE TableName='${tableName}' ORDER BY createtime DESC LIMIT 1 """ + time 600 + } + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after drop column, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][-1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][-1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][-1]) + } + + def main = { + tableName="test_cloud_schema_change_add_and_drop_column_show_data" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL + ) + UNIQUE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + + schema_change_add_column(tableName, sizeRecords, tablets) + schema_change_drop_column(tableName, sizeRecords, tablets) + } + + def main_index = { + tableName="test_cloud_schema_change_add_and_drop_column_show_data_index" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL, + index index_SHIPINSTRUCT (L_SHIPINSTRUCT) using inverted, + index index_SHIPMODE (L_SHIPMODE) using inverted, + index index_COMMENT (L_COMMENT) using inverted + ) + UNIQUE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + + schema_change_add_column(tableName, sizeRecords, tablets) + schema_change_drop_column(tableName, sizeRecords, tablets) + } + + main() + main_index() +} \ No newline at end of file diff --git a/regression-test/suites/show_data_p2/test_table_modification/test_cloud_schema_change_add_and_drop_index_show_data.groovy b/regression-test/suites/show_data_p2/test_table_modification/test_cloud_schema_change_add_and_drop_index_show_data.groovy new file mode 100644 index 00000000000000..35c63c1ec52a2f --- /dev/null +++ b/regression-test/suites/show_data_p2/test_table_modification/test_cloud_schema_change_add_and_drop_index_show_data.groovy @@ -0,0 +1,382 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// The cases is copied from https://github.com/trinodb/trino/tree/master +// /testing/trino-product-tests/src/main/resources/sql-tests/testcases/tpcds +// and modified by Doris. +import org.codehaus.groovy.runtime.IOGroovyMethods + +// loading one data 10 times, expect data size not rising +suite("test_cloud_schema_change_add_and_drop_index_show_data","p2") { + //cloud-mode + if (!isCloudMode()) { + logger.info("not cloud mode, not run") + return + } + + def repeate_stream_load_same_data = { String tableName, int loadTimes -> + for (int i = 0; i < loadTimes; i++) { + streamLoad { + table tableName + set 'column_separator', '|' + set 'compress_type', 'GZ' + file """${getS3Url()}/regression/tpch/sf0.1/lineitem.tbl.gz""" + time 10000 // limit inflight 10s + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(json.NumberTotalRows, json.NumberLoadedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + } + + def get_tablets_from_table = { String table -> + def res = sql_return_maparray """show tablets from ${table}""" + return res + } + + def show_tablet_compaction = { HashMap tablet -> + StringBuilder sb = new StringBuilder(); + sb.append("curl -X GET ") + sb.append(tablet["CompactionStatus"]) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + return parseJson(out.trim()) + } + + def trigger_tablet_compaction = { HashMap tablet, String compact_type -> + //support trigger base/cumulative/full compaction + def tabletStatusBeforeCompaction = show_tablet_compaction(tablet) + + String tabletInBe = tablet + String showCompactionStatus = tablet["CompactionStatus"] + String triggerCompactionUrl = showCompactionStatus.split("show")[0] + "run?tablet_id=" + tablet["TabletId"] + "&compact_type=" + compact_type + StringBuilder sb = new StringBuilder(); + sb.append("curl -X POST ") + sb.append(triggerCompactionUrl) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + def outJson = parseJson(out) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + // if code = 0 means compaction happend, need to check + // other condition may indicate no suitable compaction condition + if ( code == 0 && outJson.status.toLowerCase() == "success" ){ + def compactionStatus = "RUNNING" + def tabletStatusAfterCompaction = null + long startTime = System.currentTimeMillis() + long timeoutTimestamp = startTime + 5 * 60 * 1000 // 5 min + do { + tabletStatusAfterCompaction = show_tablet_compaction(tablet) + logger.info("tabletStatusAfterCompaction class: " + tabletStatusAfterCompaction.class) + logger.info("hhhhhh: " + tabletStatusAfterCompaction.toString()) + if (tabletStatusAfterCompaction.rowsets.size() < tabletStatusBeforeCompaction.rowsets.size()){ + compactionStatus = 'FINISHED' + } + Thread.sleep(60 * 1000) + } while (timeoutTimestamp > System.currentTimeMillis() && (status != 'FINISHED')) + + if (status != "FINISHED") { + logger.info("compaction not Finish or failed") + return false + } + } + } + + def trigger_compaction = { List> tablets -> + for(def tablet: tablets) { + trigger_tablet_compaction(tablet, "cumulative") + trigger_tablet_compaction(tablet, "base") + trigger_tablet_compaction(tablet, "full") + } + } + + def caculate_table_data_size_in_backend_storage = { List> tablets -> + storageType = context.config.otherConfigs.get("storageProvider") + Double storageSize = 0 + + List tabletIds = [] + for(def tablet: tablets) { + tabletIds.add(tablet["TabletId"]) + } + + if (storageType.toLowerCase() == "oss") { + //cbs means cluster backend storage + ak = context.config.otherConfigs.get("cbsS3Ak") + sk = context.config.otherConfigs.get("cbsS3Sk") + endpoint = context.config.otherConfigs.get("cbsS3Endpoint") + bucketName = context.config.otherConfigs.get("cbsS3Bucket") + storagePrefix = context.config.otherConfigs.get("cbsS3Prefix") + + client = initOssClient(ak, sk, endpoint) + for(String tabletId: tabletIds) { + storageSize += calculateFolderLength(client, bucketName, storagePrefix + "/data/" + tabletId) + } + shutDownOssClient(client) + } + + if (storageType.toLowerCase() == "hdfs") { + fsName = context.config.otherConfigs.get("cbsFsName") + isKerberosFs = context.config.otherConfigs.get("cbsFsKerberos") + fsUser = context.config.otherConfigs.get("cbsFsUser") + storagePrefix = context.config.otherConfigs.get("cbsFsPrefix") + } + + return storageSize + } + + def translate_different_unit_to_MB = { String size, String unitField -> + Double sizeKb = 0.0 + if (unitField == "KB") { + sizeKb = Double.parseDouble(size) / 1024 + } else if (unitField == "MB") { + sizeKb = Double.parseDouble(size) + } else if (unitField == "GB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 + } else if (unitField == "TB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 * 1024 + } + return sizeKb + } + + def show_table_data_size_through_mysql = { String table -> + def mysqlShowDataSize = 0L + def res = sql_return_maparray " show data from ${table}" + def tableSizeInfo = res[0] + def fields = tableSizeInfo["Size"].split(" ") + if (fields.length == 2 ){ + def sizeField = fields[0] + def unitField = fields[1] + mysqlShowDataSize = translate_different_unit_to_MB(sizeField, unitField) + } + return mysqlShowDataSize + } + + def caculate_table_data_size_through_api = { List> tablets -> + Double apiCaculateSize = 0 + for (HashMap tablet in tablets) { + def tabletStatus = show_tablet_compaction(tablet) + + for(String rowset: tabletStatus.rowsets){ + def fields = rowset.split(" ") + if (fields.length == 7) { + def sizeField = fields[-2] // the last field(size) + def unitField = fields[-1] // The second to last field(unit) + // 转换成 KB + apiCaculateSize += translate_different_unit_to_MB(sizeField, unitField ) + } + } + } + + return apiCaculateSize + } + + def schema_change_add_index = { String tableName, Map sizeRecords, List> tablets -> + sql """ + ALTER TABLE ${tableName} add index index1 (L_LINESTATUS) using inverted; + """ + + waitForSchemaChangeDone { + sql """ SHOW ALTER TABLE index WHERE TableName='${tableName}' ORDER BY createtime DESC LIMIT 1 """ + time 600 + } + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after add column, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][-1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][-1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][-1]) + } + + def schema_change_drop_index = { String tableName, Map sizeRecords, List> tablets -> + sql """ + ALTER TABLE ${tableName} drop index index1; + """ + + waitForSchemaChangeDone { + sql """ SHOW ALTER TABLE index WHERE TableName='${tableName}' ORDER BY createtime DESC LIMIT 1 """ + time 600 + } + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after drop column, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][-1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][-1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][-1]) + } + + def main = { + tableName="test_cloud_schema_change_add_and_drop_index_show_data" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL + ) + UNIQUE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + + schema_change_add_index(tableName, sizeRecords, tablets) + schema_change_drop_index(tableName, sizeRecords, tablets) + } + + def main_index = { + tableName="test_cloud_schema_change_add_and_drop_index_show_data_index" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL, + index index_SHIPINSTRUCT (L_SHIPINSTRUCT) using inverted, + index index_SHIPMODE (L_SHIPMODE) using inverted, + index index_COMMENT (L_COMMENT) using inverted + ) + UNIQUE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + + schema_change_add_index(tableName, sizeRecords, tablets) + schema_change_drop_index(tableName, sizeRecords, tablets) + } + + main() + main_index() +} \ No newline at end of file diff --git a/regression-test/suites/show_data_p2/test_table_modification/test_cloud_schema_change_reorder_column_show_data.groovy b/regression-test/suites/show_data_p2/test_table_modification/test_cloud_schema_change_reorder_column_show_data.groovy new file mode 100644 index 00000000000000..4f4cc1e36eb1d0 --- /dev/null +++ b/regression-test/suites/show_data_p2/test_table_modification/test_cloud_schema_change_reorder_column_show_data.groovy @@ -0,0 +1,355 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// The cases is copied from https://github.com/trinodb/trino/tree/master +// /testing/trino-product-tests/src/main/resources/sql-tests/testcases/tpcds +// and modified by Doris. +import org.codehaus.groovy.runtime.IOGroovyMethods + +// loading one data 10 times, expect data size not rising +suite("test_cloud_schema_change_reorder_column_show_data","p2") { + //cloud-mode + if (!isCloudMode()) { + logger.info("not cloud mode, not run") + return + } + + def repeate_stream_load_same_data = { String tableName, int loadTimes -> + for (int i = 0; i < loadTimes; i++) { + streamLoad { + table tableName + set 'column_separator', '|' + set 'compress_type', 'GZ' + file """${getS3Url()}/regression/tpch/sf0.1/lineitem.tbl.gz""" + time 10000 // limit inflight 10s + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(json.NumberTotalRows, json.NumberLoadedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + } + + def get_tablets_from_table = { String table -> + def res = sql_return_maparray """show tablets from ${table}""" + return res + } + + def show_tablet_compaction = { HashMap tablet -> + StringBuilder sb = new StringBuilder(); + sb.append("curl -X GET ") + sb.append(tablet["CompactionStatus"]) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + return parseJson(out.trim()) + } + + def trigger_tablet_compaction = { HashMap tablet, String compact_type -> + //support trigger base/cumulative/full compaction + def tabletStatusBeforeCompaction = show_tablet_compaction(tablet) + + String tabletInBe = tablet + String showCompactionStatus = tablet["CompactionStatus"] + String triggerCompactionUrl = showCompactionStatus.split("show")[0] + "run?tablet_id=" + tablet["TabletId"] + "&compact_type=" + compact_type + StringBuilder sb = new StringBuilder(); + sb.append("curl -X POST ") + sb.append(triggerCompactionUrl) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + def outJson = parseJson(out) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + // if code = 0 means compaction happend, need to check + // other condition may indicate no suitable compaction condition + if ( code == 0 && outJson.status.toLowerCase() == "success" ){ + def compactionStatus = "RUNNING" + def tabletStatusAfterCompaction = null + long startTime = System.currentTimeMillis() + long timeoutTimestamp = startTime + 5 * 60 * 1000 // 5 min + do { + tabletStatusAfterCompaction = show_tablet_compaction(tablet) + logger.info("tabletStatusAfterCompaction class: " + tabletStatusAfterCompaction.class) + logger.info("hhhhhh: " + tabletStatusAfterCompaction.toString()) + if (tabletStatusAfterCompaction.rowsets.size() < tabletStatusBeforeCompaction.rowsets.size()){ + compactionStatus = 'FINISHED' + } + Thread.sleep(60 * 1000) + } while (timeoutTimestamp > System.currentTimeMillis() && (status != 'FINISHED')) + + if (status != "FINISHED") { + logger.info("compaction not Finish or failed") + return false + } + } + } + + def trigger_compaction = { List> tablets -> + for(def tablet: tablets) { + trigger_tablet_compaction(tablet, "cumulative") + trigger_tablet_compaction(tablet, "base") + trigger_tablet_compaction(tablet, "full") + } + } + + def caculate_table_data_size_in_backend_storage = { List> tablets -> + storageType = context.config.otherConfigs.get("storageProvider") + Double storageSize = 0 + + List tabletIds = [] + for(def tablet: tablets) { + tabletIds.add(tablet["TabletId"]) + } + + if (storageType.toLowerCase() == "oss") { + //cbs means cluster backend storage + ak = context.config.otherConfigs.get("cbsS3Ak") + sk = context.config.otherConfigs.get("cbsS3Sk") + endpoint = context.config.otherConfigs.get("cbsS3Endpoint") + bucketName = context.config.otherConfigs.get("cbsS3Bucket") + storagePrefix = context.config.otherConfigs.get("cbsS3Prefix") + + client = initOssClient(ak, sk, endpoint) + for(String tabletId: tabletIds) { + storageSize += calculateFolderLength(client, bucketName, storagePrefix + "/data/" + tabletId) + } + shutDownOssClient(client) + } + + if (storageType.toLowerCase() == "hdfs") { + fsName = context.config.otherConfigs.get("cbsFsName") + isKerberosFs = context.config.otherConfigs.get("cbsFsKerberos") + fsUser = context.config.otherConfigs.get("cbsFsUser") + storagePrefix = context.config.otherConfigs.get("cbsFsPrefix") + } + + return storageSize + } + + def translate_different_unit_to_MB = { String size, String unitField -> + Double sizeKb = 0.0 + if (unitField == "KB") { + sizeKb = Double.parseDouble(size) / 1024 + } else if (unitField == "MB") { + sizeKb = Double.parseDouble(size) + } else if (unitField == "GB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 + } else if (unitField == "TB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 * 1024 + } + return sizeKb + } + + def show_table_data_size_through_mysql = { String table -> + def mysqlShowDataSize = 0L + def res = sql_return_maparray " show data from ${table}" + def tableSizeInfo = res[0] + def fields = tableSizeInfo["Size"].split(" ") + if (fields.length == 2 ){ + def sizeField = fields[0] + def unitField = fields[1] + mysqlShowDataSize = translate_different_unit_to_MB(sizeField, unitField) + } + return mysqlShowDataSize + } + + def caculate_table_data_size_through_api = { List> tablets -> + Double apiCaculateSize = 0 + for (HashMap tablet in tablets) { + def tabletStatus = show_tablet_compaction(tablet) + + for(String rowset: tabletStatus.rowsets){ + def fields = rowset.split(" ") + if (fields.length == 7) { + def sizeField = fields[-2] // the last field(size) + def unitField = fields[-1] // The second to last field(unit) + // 转换成 KB + apiCaculateSize += translate_different_unit_to_MB(sizeField, unitField ) + } + } + } + + return apiCaculateSize + } + + def schema_change_reorder_column= { String tableName, Map sizeRecords, List> tablets -> + sql """ + ALTER TABLE ${tableName} modify column L_SHIPMODE CHAR(10) NOT NULL after L_COMMENT; + """ + + waitForSchemaChangeDone { + sql """ SHOW ALTER TABLE column WHERE TableName='${tableName}' ORDER BY createtime DESC LIMIT 1 """ + time 600 + } + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after add column, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][-1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][-1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][-1]) + } + + def main = { + tableName="test_cloud_schema_change_reorder_column_show_data" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL + ) + UNIQUE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + + schema_change_reorder_column(tableName, sizeRecords, tablets) + } + + def main_index = { + tableName="test_cloud_schema_change_reorder_column_show_data_index" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL, + index index_SHIPINSTRUCT (L_SHIPINSTRUCT) using inverted, + index index_SHIPMODE (L_SHIPMODE) using inverted, + index index_COMMENT (L_COMMENT) using inverted + ) + UNIQUE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + + schema_change_reorder_column(tableName, sizeRecords, tablets) + } + + main() + main_index() +} \ No newline at end of file diff --git a/regression-test/suites/show_data_p2/test_table_operation/test_cloud_delete_table_rows_show_data.groovy b/regression-test/suites/show_data_p2/test_table_operation/test_cloud_delete_table_rows_show_data.groovy new file mode 100644 index 00000000000000..711d4804f624a4 --- /dev/null +++ b/regression-test/suites/show_data_p2/test_table_operation/test_cloud_delete_table_rows_show_data.groovy @@ -0,0 +1,426 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// The cases is copied from https://github.com/trinodb/trino/tree/master +// /testing/trino-product-tests/src/main/resources/sql-tests/testcases/tpcds +// and modified by Doris. +import org.codehaus.groovy.runtime.IOGroovyMethods + +// loading one data 10 times, expect data size not rising +suite("test_cloud_delete_table_rows_show_data","p2") { + //cloud-mode + if (!isCloudMode()) { + logger.info("not cloud mode, not run") + return + } + + def repeate_stream_load_same_data = { String tableName, int loadTimes -> + for (int i = 0; i < loadTimes; i++) { + streamLoad { + table tableName + set 'column_separator', '|' + set 'compress_type', 'GZ' + file """${getS3Url()}/regression/tpch/sf0.1/lineitem.tbl.gz""" + time 10000 // limit inflight 10s + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(json.NumberTotalRows, json.NumberLoadedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + } + + def get_tablets_from_table = { String table -> + def res = sql_return_maparray """show tablets from ${table}""" + return res + } + + def show_tablet_compaction = { HashMap tablet -> + StringBuilder sb = new StringBuilder(); + sb.append("curl -X GET ") + sb.append(tablet["CompactionStatus"]) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + return parseJson(out.trim()) + } + + def trigger_tablet_compaction = { HashMap tablet, String compact_type -> + //support trigger base/cumulative/full compaction + def tabletStatusBeforeCompaction = show_tablet_compaction(tablet) + + String tabletInBe = tablet + String showCompactionStatus = tablet["CompactionStatus"] + String triggerCompactionUrl = showCompactionStatus.split("show")[0] + "run?tablet_id=" + tablet["TabletId"] + "&compact_type=" + compact_type + StringBuilder sb = new StringBuilder(); + sb.append("curl -X POST ") + sb.append(triggerCompactionUrl) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + def outJson = parseJson(out) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + // if code = 0 means compaction happend, need to check + // other condition may indicate no suitable compaction condition + if ( code == 0 && outJson.status.toLowerCase() == "success" ){ + def compactionStatus = "RUNNING" + def tabletStatusAfterCompaction = null + long startTime = System.currentTimeMillis() + long timeoutTimestamp = startTime + 5 * 60 * 1000 // 5 min + do { + tabletStatusAfterCompaction = show_tablet_compaction(tablet) + logger.info("tabletStatusAfterCompaction class: " + tabletStatusAfterCompaction.class) + logger.info("hhhhhh: " + tabletStatusAfterCompaction.toString()) + if (tabletStatusAfterCompaction.rowsets.size() < tabletStatusBeforeCompaction.rowsets.size()){ + compactionStatus = 'FINISHED' + } + Thread.sleep(60 * 1000) + } while (timeoutTimestamp > System.currentTimeMillis() && (status != 'FINISHED')) + + if (status != "FINISHED") { + logger.info("compaction not Finish or failed") + return false + } + } + } + + def trigger_compaction = { List> tablets -> + for(def tablet: tablets) { + trigger_tablet_compaction(tablet, "cumulative") + trigger_tablet_compaction(tablet, "base") + trigger_tablet_compaction(tablet, "full") + } + } + + def caculate_table_data_size_in_backend_storage = { List> tablets -> + storageType = context.config.otherConfigs.get("storageProvider") + Double storageSize = 0 + + List tabletIds = [] + for(def tablet: tablets) { + tabletIds.add(tablet["TabletId"]) + } + + if (storageType.toLowerCase() == "oss") { + //cbs means cluster backend storage + ak = context.config.otherConfigs.get("cbsS3Ak") + sk = context.config.otherConfigs.get("cbsS3Sk") + endpoint = context.config.otherConfigs.get("cbsS3Endpoint") + bucketName = context.config.otherConfigs.get("cbsS3Bucket") + storagePrefix = context.config.otherConfigs.get("cbsS3Prefix") + + client = initOssClient(ak, sk, endpoint) + for(String tabletId: tabletIds) { + storageSize += calculateFolderLength(client, bucketName, storagePrefix + "/data/" + tabletId) + } + shutDownOssClient(client) + } + + if (storageType.toLowerCase() == "hdfs") { + fsName = context.config.otherConfigs.get("cbsFsName") + isKerberosFs = context.config.otherConfigs.get("cbsFsKerberos") + fsUser = context.config.otherConfigs.get("cbsFsUser") + storagePrefix = context.config.otherConfigs.get("cbsFsPrefix") + } + + return storageSize + } + + def translate_different_unit_to_MB = { String size, String unitField -> + Double sizeKb = 0.0 + if (unitField == "KB") { + sizeKb = Double.parseDouble(size) / 1024 + } else if (unitField == "MB") { + sizeKb = Double.parseDouble(size) + } else if (unitField == "GB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 + } else if (unitField == "TB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 * 1024 + } + return sizeKb + } + + def show_table_data_size_through_mysql = { String table -> + def mysqlShowDataSize = 0L + def res = sql_return_maparray " show data from ${table}" + def tableSizeInfo = res[0] + def fields = tableSizeInfo["Size"].split(" ") + if (fields.length == 2 ){ + def sizeField = fields[0] + def unitField = fields[1] + mysqlShowDataSize = translate_different_unit_to_MB(sizeField, unitField) + } + return mysqlShowDataSize + } + + def caculate_table_data_size_through_api = { List> tablets -> + Double apiCaculateSize = 0 + for (HashMap tablet in tablets) { + def tabletStatus = show_tablet_compaction(tablet) + + for(String rowset: tabletStatus.rowsets){ + def fields = rowset.split(" ") + if (fields.length == 7) { + def sizeField = fields[-2] // the last field(size) + def unitField = fields[-1] // The second to last field(unit) + // 转换成 KB + apiCaculateSize += translate_different_unit_to_MB(sizeField, unitField ) + } + } + } + + return apiCaculateSize + } + + def normal_table = { + tableName="test_cloud_delete_table_rows_show_data" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL + ) + UNIQUE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + + sql """delete from ${tableName} where L_ORDERKEY >=0;""" + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + } + + def dynamic_partition_table = { + tableName="test_cloud_delete_table_rows_dynamic_partition_show_data" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL + ) + UNIQUE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + PARTITION BY RANGE(L_SHIPDATE) () + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + + sql """delete from ${tableName} where L_ORDERKEY >=0;""" + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + } + + def auto_partition_table = { + tableName="test_cloud_delete_table_rows_auto_partition_show_data" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL + ) + UNIQUE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + AUTO PARTITION BY RANGE (date_trunc(`L_SHIPDATE`, 'month')) + ( + ) + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + + sql """delete from ${tableName} where L_ORDERKEY >=0;""" + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + } + + normal_table() + dynamic_partition_table() + auto_partition_table() +} \ No newline at end of file diff --git a/regression-test/suites/show_data_p2/test_table_operation/test_cloud_drop_partition_show_data.groovy b/regression-test/suites/show_data_p2/test_table_operation/test_cloud_drop_partition_show_data.groovy new file mode 100644 index 00000000000000..f8062e089c8a06 --- /dev/null +++ b/regression-test/suites/show_data_p2/test_table_operation/test_cloud_drop_partition_show_data.groovy @@ -0,0 +1,353 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// The cases is copied from https://github.com/trinodb/trino/tree/master +// /testing/trino-product-tests/src/main/resources/sql-tests/testcases/tpcds +// and modified by Doris. +import org.codehaus.groovy.runtime.IOGroovyMethods + +// loading one data 10 times, expect data size not rising +suite("test_cloud_drop_partition_show_data","p2") { + //cloud-mode + if (!isCloudMode()) { + logger.info("not cloud mode, not run") + return + } + + def repeate_stream_load_same_data = { String tableName, int loadTimes -> + for (int i = 0; i < loadTimes; i++) { + streamLoad { + table tableName + set 'column_separator', '|' + set 'compress_type', 'GZ' + file """${getS3Url()}/regression/tpch/sf0.1/lineitem.tbl.gz""" + time 10000 // limit inflight 10s + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(json.NumberTotalRows, json.NumberLoadedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + } + + def get_tablets_from_table = { String table -> + def res = sql_return_maparray """show tablets from ${table}""" + return res + } + + def show_tablet_compaction = { HashMap tablet -> + StringBuilder sb = new StringBuilder(); + sb.append("curl -X GET ") + sb.append(tablet["CompactionStatus"]) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + return parseJson(out.trim()) + } + + def trigger_tablet_compaction = { HashMap tablet, String compact_type -> + //support trigger base/cumulative/full compaction + def tabletStatusBeforeCompaction = show_tablet_compaction(tablet) + + String tabletInBe = tablet + String showCompactionStatus = tablet["CompactionStatus"] + String triggerCompactionUrl = showCompactionStatus.split("show")[0] + "run?tablet_id=" + tablet["TabletId"] + "&compact_type=" + compact_type + StringBuilder sb = new StringBuilder(); + sb.append("curl -X POST ") + sb.append(triggerCompactionUrl) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + def outJson = parseJson(out) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + // if code = 0 means compaction happend, need to check + // other condition may indicate no suitable compaction condition + if ( code == 0 && outJson.status.toLowerCase() == "success" ){ + def compactionStatus = "RUNNING" + def tabletStatusAfterCompaction = null + long startTime = System.currentTimeMillis() + long timeoutTimestamp = startTime + 5 * 60 * 1000 // 5 min + do { + tabletStatusAfterCompaction = show_tablet_compaction(tablet) + logger.info("tabletStatusAfterCompaction class: " + tabletStatusAfterCompaction.class) + logger.info("hhhhhh: " + tabletStatusAfterCompaction.toString()) + if (tabletStatusAfterCompaction.rowsets.size() < tabletStatusBeforeCompaction.rowsets.size()){ + compactionStatus = 'FINISHED' + } + Thread.sleep(60 * 1000) + } while (timeoutTimestamp > System.currentTimeMillis() && (status != 'FINISHED')) + + if (status != "FINISHED") { + logger.info("compaction not Finish or failed") + return false + } + } + } + + def trigger_compaction = { List> tablets -> + for(def tablet: tablets) { + trigger_tablet_compaction(tablet, "cumulative") + trigger_tablet_compaction(tablet, "base") + trigger_tablet_compaction(tablet, "full") + } + } + + def caculate_table_data_size_in_backend_storage = { List> tablets -> + storageType = context.config.otherConfigs.get("storageProvider") + Double storageSize = 0 + + List tabletIds = [] + for(def tablet: tablets) { + tabletIds.add(tablet["TabletId"]) + } + + if (storageType.toLowerCase() == "oss") { + //cbs means cluster backend storage + ak = context.config.otherConfigs.get("cbsS3Ak") + sk = context.config.otherConfigs.get("cbsS3Sk") + endpoint = context.config.otherConfigs.get("cbsS3Endpoint") + bucketName = context.config.otherConfigs.get("cbsS3Bucket") + storagePrefix = context.config.otherConfigs.get("cbsS3Prefix") + + client = initOssClient(ak, sk, endpoint) + for(String tabletId: tabletIds) { + storageSize += calculateFolderLength(client, bucketName, storagePrefix + "/data/" + tabletId) + } + shutDownOssClient(client) + } + + if (storageType.toLowerCase() == "hdfs") { + fsName = context.config.otherConfigs.get("cbsFsName") + isKerberosFs = context.config.otherConfigs.get("cbsFsKerberos") + fsUser = context.config.otherConfigs.get("cbsFsUser") + storagePrefix = context.config.otherConfigs.get("cbsFsPrefix") + } + + return storageSize + } + + def translate_different_unit_to_MB = { String size, String unitField -> + Double sizeKb = 0.0 + if (unitField == "KB") { + sizeKb = Double.parseDouble(size) / 1024 + } else if (unitField == "MB") { + sizeKb = Double.parseDouble(size) + } else if (unitField == "GB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 + } else if (unitField == "TB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 * 1024 + } + return sizeKb + } + + def show_table_data_size_through_mysql = { String table -> + def mysqlShowDataSize = 0L + def res = sql_return_maparray " show data from ${table}" + def tableSizeInfo = res[0] + def fields = tableSizeInfo["Size"].split(" ") + if (fields.length == 2 ){ + def sizeField = fields[0] + def unitField = fields[1] + mysqlShowDataSize = translate_different_unit_to_MB(sizeField, unitField) + } + return mysqlShowDataSize + } + + def caculate_table_data_size_through_api = { List> tablets -> + Double apiCaculateSize = 0 + for (HashMap tablet in tablets) { + def tabletStatus = show_tablet_compaction(tablet) + + for(String rowset: tabletStatus.rowsets){ + def fields = rowset.split(" ") + if (fields.length == 7) { + def sizeField = fields[-2] // the last field(size) + def unitField = fields[-1] // The second to last field(unit) + // 转换成 KB + apiCaculateSize += translate_different_unit_to_MB(sizeField, unitField ) + } + } + } + + return apiCaculateSize + } + + def dynamic_partition_table = { + tableName="test_cloud_drop_partition_dynamic_partition_show_data" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL + ) + UNIQUE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + PARTITION BY RANGE(L_SHIPDATE) () + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + + sql """alter table ${tableName} drop partition xxx;""" + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + } + + def auto_partition_table = { + tableName="test_cloud_drop_partition_auto_partition_show_data" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL + ) + UNIQUE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + AUTO PARTITION BY RANGE (date_trunc(`L_SHIPDATE`, 'month')) + ( + ) + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + + sql """alter table ${tableName} drop partition xxx;""" + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + } + + dynamic_partition_table() + auto_partition_table() +} \ No newline at end of file diff --git a/regression-test/suites/show_data_p2/test_table_operation/test_cloud_drop_table_show_data.groovy b/regression-test/suites/show_data_p2/test_table_operation/test_cloud_drop_table_show_data.groovy new file mode 100644 index 00000000000000..9ef517301bec27 --- /dev/null +++ b/regression-test/suites/show_data_p2/test_table_operation/test_cloud_drop_table_show_data.groovy @@ -0,0 +1,426 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// The cases is copied from https://github.com/trinodb/trino/tree/master +// /testing/trino-product-tests/src/main/resources/sql-tests/testcases/tpcds +// and modified by Doris. +import org.codehaus.groovy.runtime.IOGroovyMethods + +// loading one data 10 times, expect data size not rising +suite("test_cloud_drop_table_show_data","p2") { + //cloud-mode + if (!isCloudMode()) { + logger.info("not cloud mode, not run") + return + } + + def repeate_stream_load_same_data = { String tableName, int loadTimes -> + for (int i = 0; i < loadTimes; i++) { + streamLoad { + table tableName + set 'column_separator', '|' + set 'compress_type', 'GZ' + file """${getS3Url()}/regression/tpch/sf0.1/lineitem.tbl.gz""" + time 10000 // limit inflight 10s + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(json.NumberTotalRows, json.NumberLoadedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + } + + def get_tablets_from_table = { String table -> + def res = sql_return_maparray """show tablets from ${table}""" + return res + } + + def show_tablet_compaction = { HashMap tablet -> + StringBuilder sb = new StringBuilder(); + sb.append("curl -X GET ") + sb.append(tablet["CompactionStatus"]) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + return parseJson(out.trim()) + } + + def trigger_tablet_compaction = { HashMap tablet, String compact_type -> + //support trigger base/cumulative/full compaction + def tabletStatusBeforeCompaction = show_tablet_compaction(tablet) + + String tabletInBe = tablet + String showCompactionStatus = tablet["CompactionStatus"] + String triggerCompactionUrl = showCompactionStatus.split("show")[0] + "run?tablet_id=" + tablet["TabletId"] + "&compact_type=" + compact_type + StringBuilder sb = new StringBuilder(); + sb.append("curl -X POST ") + sb.append(triggerCompactionUrl) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + def outJson = parseJson(out) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + // if code = 0 means compaction happend, need to check + // other condition may indicate no suitable compaction condition + if ( code == 0 && outJson.status.toLowerCase() == "success" ){ + def compactionStatus = "RUNNING" + def tabletStatusAfterCompaction = null + long startTime = System.currentTimeMillis() + long timeoutTimestamp = startTime + 5 * 60 * 1000 // 5 min + do { + tabletStatusAfterCompaction = show_tablet_compaction(tablet) + logger.info("tabletStatusAfterCompaction class: " + tabletStatusAfterCompaction.class) + logger.info("hhhhhh: " + tabletStatusAfterCompaction.toString()) + if (tabletStatusAfterCompaction.rowsets.size() < tabletStatusBeforeCompaction.rowsets.size()){ + compactionStatus = 'FINISHED' + } + Thread.sleep(60 * 1000) + } while (timeoutTimestamp > System.currentTimeMillis() && (status != 'FINISHED')) + + if (status != "FINISHED") { + logger.info("compaction not Finish or failed") + return false + } + } + } + + def trigger_compaction = { List> tablets -> + for(def tablet: tablets) { + trigger_tablet_compaction(tablet, "cumulative") + trigger_tablet_compaction(tablet, "base") + trigger_tablet_compaction(tablet, "full") + } + } + + def caculate_table_data_size_in_backend_storage = { List> tablets -> + storageType = context.config.otherConfigs.get("storageProvider") + Double storageSize = 0 + + List tabletIds = [] + for(def tablet: tablets) { + tabletIds.add(tablet["TabletId"]) + } + + if (storageType.toLowerCase() == "oss") { + //cbs means cluster backend storage + ak = context.config.otherConfigs.get("cbsS3Ak") + sk = context.config.otherConfigs.get("cbsS3Sk") + endpoint = context.config.otherConfigs.get("cbsS3Endpoint") + bucketName = context.config.otherConfigs.get("cbsS3Bucket") + storagePrefix = context.config.otherConfigs.get("cbsS3Prefix") + + client = initOssClient(ak, sk, endpoint) + for(String tabletId: tabletIds) { + storageSize += calculateFolderLength(client, bucketName, storagePrefix + "/data/" + tabletId) + } + shutDownOssClient(client) + } + + if (storageType.toLowerCase() == "hdfs") { + fsName = context.config.otherConfigs.get("cbsFsName") + isKerberosFs = context.config.otherConfigs.get("cbsFsKerberos") + fsUser = context.config.otherConfigs.get("cbsFsUser") + storagePrefix = context.config.otherConfigs.get("cbsFsPrefix") + } + + return storageSize + } + + def translate_different_unit_to_MB = { String size, String unitField -> + Double sizeKb = 0.0 + if (unitField == "KB") { + sizeKb = Double.parseDouble(size) / 1024 + } else if (unitField == "MB") { + sizeKb = Double.parseDouble(size) + } else if (unitField == "GB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 + } else if (unitField == "TB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 * 1024 + } + return sizeKb + } + + def show_table_data_size_through_mysql = { String table -> + def mysqlShowDataSize = 0L + def res = sql_return_maparray " show data from ${table}" + def tableSizeInfo = res[0] + def fields = tableSizeInfo["Size"].split(" ") + if (fields.length == 2 ){ + def sizeField = fields[0] + def unitField = fields[1] + mysqlShowDataSize = translate_different_unit_to_MB(sizeField, unitField) + } + return mysqlShowDataSize + } + + def caculate_table_data_size_through_api = { List> tablets -> + Double apiCaculateSize = 0 + for (HashMap tablet in tablets) { + def tabletStatus = show_tablet_compaction(tablet) + + for(String rowset: tabletStatus.rowsets){ + def fields = rowset.split(" ") + if (fields.length == 7) { + def sizeField = fields[-2] // the last field(size) + def unitField = fields[-1] // The second to last field(unit) + // 转换成 KB + apiCaculateSize += translate_different_unit_to_MB(sizeField, unitField ) + } + } + } + + return apiCaculateSize + } + + def normal_table = { + tableName="test_cloud_drop_table_normal_show_data" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL + ) + UNIQUE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + + sql """drop table ${tableName} force;""" + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + } + + def dynamic_partition_table = { + tableName="test_cloud_drop_table_dynamic_partition_show_data" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL + ) + UNIQUE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + PARTITION BY RANGE(L_SHIPDATE) () + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + + sql """drop table ${tableName} force;""" + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + } + + def auto_partition_table = { + tableName="test_cloud_drop_table_auto_partition_show_data" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL + ) + UNIQUE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + AUTO PARTITION BY RANGE (date_trunc(`L_SHIPDATE`, 'month')) + ( + ) + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + + sql """drop table ${tableName} force;""" + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + } + + normal_table() + dynamic_partition_table() + auto_partition_table() +} \ No newline at end of file diff --git a/regression-test/suites/show_data_p2/test_table_operation/test_cloud_recover_partition_show_data.groovy b/regression-test/suites/show_data_p2/test_table_operation/test_cloud_recover_partition_show_data.groovy new file mode 100644 index 00000000000000..040e47a99e7fa4 --- /dev/null +++ b/regression-test/suites/show_data_p2/test_table_operation/test_cloud_recover_partition_show_data.groovy @@ -0,0 +1,379 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// The cases is copied from https://github.com/trinodb/trino/tree/master +// /testing/trino-product-tests/src/main/resources/sql-tests/testcases/tpcds +// and modified by Doris. +import org.codehaus.groovy.runtime.IOGroovyMethods + +// loading one data 10 times, expect data size not rising +suite("test_cloud_recover_partition_show_data","p2") { + //cloud-mode + if (!isCloudMode()) { + logger.info("not cloud mode, not run") + return + } + + def repeate_stream_load_same_data = { String tableName, int loadTimes -> + for (int i = 0; i < loadTimes; i++) { + streamLoad { + table tableName + set 'column_separator', '|' + set 'compress_type', 'GZ' + file """${getS3Url()}/regression/tpch/sf0.1/lineitem.tbl.gz""" + time 10000 // limit inflight 10s + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(json.NumberTotalRows, json.NumberLoadedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + } + + def get_tablets_from_table = { String table -> + def res = sql_return_maparray """show tablets from ${table}""" + return res + } + + def show_tablet_compaction = { HashMap tablet -> + StringBuilder sb = new StringBuilder(); + sb.append("curl -X GET ") + sb.append(tablet["CompactionStatus"]) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + return parseJson(out.trim()) + } + + def trigger_tablet_compaction = { HashMap tablet, String compact_type -> + //support trigger base/cumulative/full compaction + def tabletStatusBeforeCompaction = show_tablet_compaction(tablet) + + String tabletInBe = tablet + String showCompactionStatus = tablet["CompactionStatus"] + String triggerCompactionUrl = showCompactionStatus.split("show")[0] + "run?tablet_id=" + tablet["TabletId"] + "&compact_type=" + compact_type + StringBuilder sb = new StringBuilder(); + sb.append("curl -X POST ") + sb.append(triggerCompactionUrl) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + def outJson = parseJson(out) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + // if code = 0 means compaction happend, need to check + // other condition may indicate no suitable compaction condition + if ( code == 0 && outJson.status.toLowerCase() == "success" ){ + def compactionStatus = "RUNNING" + def tabletStatusAfterCompaction = null + long startTime = System.currentTimeMillis() + long timeoutTimestamp = startTime + 5 * 60 * 1000 // 5 min + do { + tabletStatusAfterCompaction = show_tablet_compaction(tablet) + logger.info("tabletStatusAfterCompaction class: " + tabletStatusAfterCompaction.class) + logger.info("hhhhhh: " + tabletStatusAfterCompaction.toString()) + if (tabletStatusAfterCompaction.rowsets.size() < tabletStatusBeforeCompaction.rowsets.size()){ + compactionStatus = 'FINISHED' + } + Thread.sleep(60 * 1000) + } while (timeoutTimestamp > System.currentTimeMillis() && (status != 'FINISHED')) + + if (status != "FINISHED") { + logger.info("compaction not Finish or failed") + return false + } + } + } + + def trigger_compaction = { List> tablets -> + for(def tablet: tablets) { + trigger_tablet_compaction(tablet, "cumulative") + trigger_tablet_compaction(tablet, "base") + trigger_tablet_compaction(tablet, "full") + } + } + + def caculate_table_data_size_in_backend_storage = { List> tablets -> + storageType = context.config.otherConfigs.get("storageProvider") + Double storageSize = 0 + + List tabletIds = [] + for(def tablet: tablets) { + tabletIds.add(tablet["TabletId"]) + } + + if (storageType.toLowerCase() == "oss") { + //cbs means cluster backend storage + ak = context.config.otherConfigs.get("cbsS3Ak") + sk = context.config.otherConfigs.get("cbsS3Sk") + endpoint = context.config.otherConfigs.get("cbsS3Endpoint") + bucketName = context.config.otherConfigs.get("cbsS3Bucket") + storagePrefix = context.config.otherConfigs.get("cbsS3Prefix") + + client = initOssClient(ak, sk, endpoint) + for(String tabletId: tabletIds) { + storageSize += calculateFolderLength(client, bucketName, storagePrefix + "/data/" + tabletId) + } + shutDownOssClient(client) + } + + if (storageType.toLowerCase() == "hdfs") { + fsName = context.config.otherConfigs.get("cbsFsName") + isKerberosFs = context.config.otherConfigs.get("cbsFsKerberos") + fsUser = context.config.otherConfigs.get("cbsFsUser") + storagePrefix = context.config.otherConfigs.get("cbsFsPrefix") + } + + return storageSize + } + + def translate_different_unit_to_MB = { String size, String unitField -> + Double sizeKb = 0.0 + if (unitField == "KB") { + sizeKb = Double.parseDouble(size) / 1024 + } else if (unitField == "MB") { + sizeKb = Double.parseDouble(size) + } else if (unitField == "GB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 + } else if (unitField == "TB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 * 1024 + } + return sizeKb + } + + def show_table_data_size_through_mysql = { String table -> + def mysqlShowDataSize = 0L + def res = sql_return_maparray " show data from ${table}" + def tableSizeInfo = res[0] + def fields = tableSizeInfo["Size"].split(" ") + if (fields.length == 2 ){ + def sizeField = fields[0] + def unitField = fields[1] + mysqlShowDataSize = translate_different_unit_to_MB(sizeField, unitField) + } + return mysqlShowDataSize + } + + def caculate_table_data_size_through_api = { List> tablets -> + Double apiCaculateSize = 0 + for (HashMap tablet in tablets) { + def tabletStatus = show_tablet_compaction(tablet) + + for(String rowset: tabletStatus.rowsets){ + def fields = rowset.split(" ") + if (fields.length == 7) { + def sizeField = fields[-2] // the last field(size) + def unitField = fields[-1] // The second to last field(unit) + // 转换成 KB + apiCaculateSize += translate_different_unit_to_MB(sizeField, unitField ) + } + } + } + + return apiCaculateSize + } + + def dynamic_partition_table = { + tableName="test_cloud_recover_partition_dynamic_partition_show_data" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL + ) + UNIQUE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + PARTITION BY RANGE(L_SHIPDATE) () + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + + sql """alter table ${tableName} drop partition xxx;""" + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + + sql """recover partition partition from ${tableName};""" + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + } + + def auto_partition_table = { + tableName="test_cloud_recover_partition_auto_partition_show_data" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL + ) + UNIQUE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + AUTO PARTITION BY RANGE (date_trunc(`L_SHIPDATE`, 'month')) + ( + ) + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + + sql """alter table ${tableName} drop partition xxx;""" + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + + sql """recover partition partition from ${tableName};""" + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + } + + dynamic_partition_table() + auto_partition_table() +} \ No newline at end of file diff --git a/regression-test/suites/show_data_p2/test_table_operation/test_cloud_recover_table_show_data.groovy b/regression-test/suites/show_data_p2/test_table_operation/test_cloud_recover_table_show_data.groovy new file mode 100644 index 00000000000000..d50862387a6651 --- /dev/null +++ b/regression-test/suites/show_data_p2/test_table_operation/test_cloud_recover_table_show_data.groovy @@ -0,0 +1,465 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// The cases is copied from https://github.com/trinodb/trino/tree/master +// /testing/trino-product-tests/src/main/resources/sql-tests/testcases/tpcds +// and modified by Doris. +import org.codehaus.groovy.runtime.IOGroovyMethods + +// loading one data 10 times, expect data size not rising +suite("test_cloud_recover_table_show_data","p2") { + //cloud-mode + if (!isCloudMode()) { + logger.info("not cloud mode, not run") + return + } + + def repeate_stream_load_same_data = { String tableName, int loadTimes -> + for (int i = 0; i < loadTimes; i++) { + streamLoad { + table tableName + set 'column_separator', '|' + set 'compress_type', 'GZ' + file """${getS3Url()}/regression/tpch/sf0.1/lineitem.tbl.gz""" + time 10000 // limit inflight 10s + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(json.NumberTotalRows, json.NumberLoadedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + } + + def get_tablets_from_table = { String table -> + def res = sql_return_maparray """show tablets from ${table}""" + return res + } + + def show_tablet_compaction = { HashMap tablet -> + StringBuilder sb = new StringBuilder(); + sb.append("curl -X GET ") + sb.append(tablet["CompactionStatus"]) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + return parseJson(out.trim()) + } + + def trigger_tablet_compaction = { HashMap tablet, String compact_type -> + //support trigger base/cumulative/full compaction + def tabletStatusBeforeCompaction = show_tablet_compaction(tablet) + + String tabletInBe = tablet + String showCompactionStatus = tablet["CompactionStatus"] + String triggerCompactionUrl = showCompactionStatus.split("show")[0] + "run?tablet_id=" + tablet["TabletId"] + "&compact_type=" + compact_type + StringBuilder sb = new StringBuilder(); + sb.append("curl -X POST ") + sb.append(triggerCompactionUrl) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + def outJson = parseJson(out) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + // if code = 0 means compaction happend, need to check + // other condition may indicate no suitable compaction condition + if ( code == 0 && outJson.status.toLowerCase() == "success" ){ + def compactionStatus = "RUNNING" + def tabletStatusAfterCompaction = null + long startTime = System.currentTimeMillis() + long timeoutTimestamp = startTime + 5 * 60 * 1000 // 5 min + do { + tabletStatusAfterCompaction = show_tablet_compaction(tablet) + logger.info("tabletStatusAfterCompaction class: " + tabletStatusAfterCompaction.class) + logger.info("hhhhhh: " + tabletStatusAfterCompaction.toString()) + if (tabletStatusAfterCompaction.rowsets.size() < tabletStatusBeforeCompaction.rowsets.size()){ + compactionStatus = 'FINISHED' + } + Thread.sleep(60 * 1000) + } while (timeoutTimestamp > System.currentTimeMillis() && (status != 'FINISHED')) + + if (status != "FINISHED") { + logger.info("compaction not Finish or failed") + return false + } + } + } + + def trigger_compaction = { List> tablets -> + for(def tablet: tablets) { + trigger_tablet_compaction(tablet, "cumulative") + trigger_tablet_compaction(tablet, "base") + trigger_tablet_compaction(tablet, "full") + } + } + + def caculate_table_data_size_in_backend_storage = { List> tablets -> + storageType = context.config.otherConfigs.get("storageProvider") + Double storageSize = 0 + + List tabletIds = [] + for(def tablet: tablets) { + tabletIds.add(tablet["TabletId"]) + } + + if (storageType.toLowerCase() == "oss") { + //cbs means cluster backend storage + ak = context.config.otherConfigs.get("cbsS3Ak") + sk = context.config.otherConfigs.get("cbsS3Sk") + endpoint = context.config.otherConfigs.get("cbsS3Endpoint") + bucketName = context.config.otherConfigs.get("cbsS3Bucket") + storagePrefix = context.config.otherConfigs.get("cbsS3Prefix") + + client = initOssClient(ak, sk, endpoint) + for(String tabletId: tabletIds) { + storageSize += calculateFolderLength(client, bucketName, storagePrefix + "/data/" + tabletId) + } + shutDownOssClient(client) + } + + if (storageType.toLowerCase() == "hdfs") { + fsName = context.config.otherConfigs.get("cbsFsName") + isKerberosFs = context.config.otherConfigs.get("cbsFsKerberos") + fsUser = context.config.otherConfigs.get("cbsFsUser") + storagePrefix = context.config.otherConfigs.get("cbsFsPrefix") + } + + return storageSize + } + + def translate_different_unit_to_MB = { String size, String unitField -> + Double sizeKb = 0.0 + if (unitField == "KB") { + sizeKb = Double.parseDouble(size) / 1024 + } else if (unitField == "MB") { + sizeKb = Double.parseDouble(size) + } else if (unitField == "GB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 + } else if (unitField == "TB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 * 1024 + } + return sizeKb + } + + def show_table_data_size_through_mysql = { String table -> + def mysqlShowDataSize = 0L + def res = sql_return_maparray " show data from ${table}" + def tableSizeInfo = res[0] + def fields = tableSizeInfo["Size"].split(" ") + if (fields.length == 2 ){ + def sizeField = fields[0] + def unitField = fields[1] + mysqlShowDataSize = translate_different_unit_to_MB(sizeField, unitField) + } + return mysqlShowDataSize + } + + def caculate_table_data_size_through_api = { List> tablets -> + Double apiCaculateSize = 0 + for (HashMap tablet in tablets) { + def tabletStatus = show_tablet_compaction(tablet) + + for(String rowset: tabletStatus.rowsets){ + def fields = rowset.split(" ") + if (fields.length == 7) { + def sizeField = fields[-2] // the last field(size) + def unitField = fields[-1] // The second to last field(unit) + // 转换成 KB + apiCaculateSize += translate_different_unit_to_MB(sizeField, unitField ) + } + } + } + + return apiCaculateSize + } + + def normal_table = { + tableName="test_cloud_recover_table_normal_show_data" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL + ) + UNIQUE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + + sql """drop table ${tableName} force;""" + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + + sql """recover table ${tableName};""" + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + } + + def dynamic_partition_table = { + tableName="test_cloud_recover_table_dynamic_partition_show_data" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL + ) + UNIQUE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + PARTITION BY RANGE(L_SHIPDATE) () + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + + sql """drop table ${tableName} force;""" + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + + sql """recover table ${tableName};""" + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + } + + def auto_partition_table = { + tableName="test_cloud_recover_table_auto_partition_show_data" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL + ) + UNIQUE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + AUTO PARTITION BY RANGE (date_trunc(`L_SHIPDATE`, 'month')) + ( + ) + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + + sql """drop table ${tableName} force;""" + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + + sql """recover table ${tableName};""" + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + } + + normal_table() + dynamic_partition_table() + auto_partition_table() +} \ No newline at end of file diff --git a/regression-test/suites/show_data_p2/test_table_operation/test_cloud_truncate_table_show_data.groovy b/regression-test/suites/show_data_p2/test_table_operation/test_cloud_truncate_table_show_data.groovy new file mode 100644 index 00000000000000..72145e106aba08 --- /dev/null +++ b/regression-test/suites/show_data_p2/test_table_operation/test_cloud_truncate_table_show_data.groovy @@ -0,0 +1,426 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// The cases is copied from https://github.com/trinodb/trino/tree/master +// /testing/trino-product-tests/src/main/resources/sql-tests/testcases/tpcds +// and modified by Doris. +import org.codehaus.groovy.runtime.IOGroovyMethods + +// loading one data 10 times, expect data size not rising +suite("test_cloud_truncate_table_show_data","p2") { + //cloud-mode + if (!isCloudMode()) { + logger.info("not cloud mode, not run") + return + } + + def repeate_stream_load_same_data = { String tableName, int loadTimes -> + for (int i = 0; i < loadTimes; i++) { + streamLoad { + table tableName + set 'column_separator', '|' + set 'compress_type', 'GZ' + file """${getS3Url()}/regression/tpch/sf0.1/lineitem.tbl.gz""" + time 10000 // limit inflight 10s + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(json.NumberTotalRows, json.NumberLoadedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + } + + def get_tablets_from_table = { String table -> + def res = sql_return_maparray """show tablets from ${table}""" + return res + } + + def show_tablet_compaction = { HashMap tablet -> + StringBuilder sb = new StringBuilder(); + sb.append("curl -X GET ") + sb.append(tablet["CompactionStatus"]) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + return parseJson(out.trim()) + } + + def trigger_tablet_compaction = { HashMap tablet, String compact_type -> + //support trigger base/cumulative/full compaction + def tabletStatusBeforeCompaction = show_tablet_compaction(tablet) + + String tabletInBe = tablet + String showCompactionStatus = tablet["CompactionStatus"] + String triggerCompactionUrl = showCompactionStatus.split("show")[0] + "run?tablet_id=" + tablet["TabletId"] + "&compact_type=" + compact_type + StringBuilder sb = new StringBuilder(); + sb.append("curl -X POST ") + sb.append(triggerCompactionUrl) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + def outJson = parseJson(out) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + // if code = 0 means compaction happend, need to check + // other condition may indicate no suitable compaction condition + if ( code == 0 && outJson.status.toLowerCase() == "success" ){ + def compactionStatus = "RUNNING" + def tabletStatusAfterCompaction = null + long startTime = System.currentTimeMillis() + long timeoutTimestamp = startTime + 5 * 60 * 1000 // 5 min + do { + tabletStatusAfterCompaction = show_tablet_compaction(tablet) + logger.info("tabletStatusAfterCompaction class: " + tabletStatusAfterCompaction.class) + logger.info("hhhhhh: " + tabletStatusAfterCompaction.toString()) + if (tabletStatusAfterCompaction.rowsets.size() < tabletStatusBeforeCompaction.rowsets.size()){ + compactionStatus = 'FINISHED' + } + Thread.sleep(60 * 1000) + } while (timeoutTimestamp > System.currentTimeMillis() && (status != 'FINISHED')) + + if (status != "FINISHED") { + logger.info("compaction not Finish or failed") + return false + } + } + } + + def trigger_compaction = { List> tablets -> + for(def tablet: tablets) { + trigger_tablet_compaction(tablet, "cumulative") + trigger_tablet_compaction(tablet, "base") + trigger_tablet_compaction(tablet, "full") + } + } + + def caculate_table_data_size_in_backend_storage = { List> tablets -> + storageType = context.config.otherConfigs.get("storageProvider") + Double storageSize = 0 + + List tabletIds = [] + for(def tablet: tablets) { + tabletIds.add(tablet["TabletId"]) + } + + if (storageType.toLowerCase() == "oss") { + //cbs means cluster backend storage + ak = context.config.otherConfigs.get("cbsS3Ak") + sk = context.config.otherConfigs.get("cbsS3Sk") + endpoint = context.config.otherConfigs.get("cbsS3Endpoint") + bucketName = context.config.otherConfigs.get("cbsS3Bucket") + storagePrefix = context.config.otherConfigs.get("cbsS3Prefix") + + client = initOssClient(ak, sk, endpoint) + for(String tabletId: tabletIds) { + storageSize += calculateFolderLength(client, bucketName, storagePrefix + "/data/" + tabletId) + } + shutDownOssClient(client) + } + + if (storageType.toLowerCase() == "hdfs") { + fsName = context.config.otherConfigs.get("cbsFsName") + isKerberosFs = context.config.otherConfigs.get("cbsFsKerberos") + fsUser = context.config.otherConfigs.get("cbsFsUser") + storagePrefix = context.config.otherConfigs.get("cbsFsPrefix") + } + + return storageSize + } + + def translate_different_unit_to_MB = { String size, String unitField -> + Double sizeKb = 0.0 + if (unitField == "KB") { + sizeKb = Double.parseDouble(size) / 1024 + } else if (unitField == "MB") { + sizeKb = Double.parseDouble(size) + } else if (unitField == "GB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 + } else if (unitField == "TB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 * 1024 + } + return sizeKb + } + + def show_table_data_size_through_mysql = { String table -> + def mysqlShowDataSize = 0L + def res = sql_return_maparray " show data from ${table}" + def tableSizeInfo = res[0] + def fields = tableSizeInfo["Size"].split(" ") + if (fields.length == 2 ){ + def sizeField = fields[0] + def unitField = fields[1] + mysqlShowDataSize = translate_different_unit_to_MB(sizeField, unitField) + } + return mysqlShowDataSize + } + + def caculate_table_data_size_through_api = { List> tablets -> + Double apiCaculateSize = 0 + for (HashMap tablet in tablets) { + def tabletStatus = show_tablet_compaction(tablet) + + for(String rowset: tabletStatus.rowsets){ + def fields = rowset.split(" ") + if (fields.length == 7) { + def sizeField = fields[-2] // the last field(size) + def unitField = fields[-1] // The second to last field(unit) + // 转换成 KB + apiCaculateSize += translate_different_unit_to_MB(sizeField, unitField ) + } + } + } + + return apiCaculateSize + } + + def normal_table = { + tableName="test_cloud_truncate_table_normal_show_data" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL + ) + UNIQUE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + + sql """truncate table ${tableName} force;""" + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + } + + def dynamic_partition_table = { + tableName="test_cloud_truncate_table_dynamic_partition_show_data" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL + ) + UNIQUE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + PARTITION BY RANGE(L_SHIPDATE) () + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + + sql """truncate table ${tableName} force;""" + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + } + + def auto_partition_table = { + tableName="test_cloud_truncate_table_auto_partition_show_data" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL + ) + UNIQUE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + AUTO PARTITION BY RANGE (date_trunc(`L_SHIPDATE`, 'month')) + ( + ) + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + + sql """truncate table ${tableName} force;""" + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + } + + normal_table() + dynamic_partition_table() + auto_partition_table() +} \ No newline at end of file diff --git a/regression-test/suites/show_data_p2/test_table_property/test_cloud_disable_compaction_show_data.groovy b/regression-test/suites/show_data_p2/test_table_property/test_cloud_disable_compaction_show_data.groovy new file mode 100644 index 00000000000000..3de8e194f33f63 --- /dev/null +++ b/regression-test/suites/show_data_p2/test_table_property/test_cloud_disable_compaction_show_data.groovy @@ -0,0 +1,263 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// The cases is copied from https://github.com/trinodb/trino/tree/master +// /testing/trino-product-tests/src/main/resources/sql-tests/testcases/tpcds +// and modified by Doris. +import org.codehaus.groovy.runtime.IOGroovyMethods + +// loading one data 10 times, expect data size not rising +suite("test_cloud_disable_compaction_show_data","p2") { + //cloud-mode + if (!isCloudMode()) { + logger.info("not cloud mode, not run") + return + } + + def repeate_stream_load_same_data = { String tableName, int loadTimes -> + for (int i = 0; i < loadTimes; i++) { + streamLoad { + table tableName + set 'column_separator', '|' + set 'compress_type', 'GZ' + file """${getS3Url()}/regression/tpch/sf0.1/lineitem.tbl.gz""" + time 10000 // limit inflight 10s + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(json.NumberTotalRows, json.NumberLoadedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + } + + def get_tablets_from_table = { String table -> + def res = sql_return_maparray """show tablets from ${table}""" + return res + } + + def show_tablet_compaction = { HashMap tablet -> + StringBuilder sb = new StringBuilder(); + sb.append("curl -X GET ") + sb.append(tablet["CompactionStatus"]) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + return parseJson(out.trim()) + } + + def trigger_tablet_compaction = { HashMap tablet, String compact_type -> + //support trigger base/cumulative/full compaction + def tabletStatusBeforeCompaction = show_tablet_compaction(tablet) + + String tabletInBe = tablet + String showCompactionStatus = tablet["CompactionStatus"] + String triggerCompactionUrl = showCompactionStatus.split("show")[0] + "run?tablet_id=" + tablet["TabletId"] + "&compact_type=" + compact_type + StringBuilder sb = new StringBuilder(); + sb.append("curl -X POST ") + sb.append(triggerCompactionUrl) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + def outJson = parseJson(out) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + // if code = 0 means compaction happend, need to check + // other condition may indicate no suitable compaction condition + if ( code == 0 && outJson.status.toLowerCase() == "success" ){ + def compactionStatus = "RUNNING" + def tabletStatusAfterCompaction = null + long startTime = System.currentTimeMillis() + long timeoutTimestamp = startTime + 5 * 60 * 1000 // 5 min + do { + tabletStatusAfterCompaction = show_tablet_compaction(tablet) + logger.info("tabletStatusAfterCompaction class: " + tabletStatusAfterCompaction.class) + logger.info("hhhhhh: " + tabletStatusAfterCompaction.toString()) + if (tabletStatusAfterCompaction.rowsets.size() < tabletStatusBeforeCompaction.rowsets.size()){ + compactionStatus = 'FINISHED' + } + Thread.sleep(60 * 1000) + } while (timeoutTimestamp > System.currentTimeMillis() && (status != 'FINISHED')) + + if (status != "FINISHED") { + logger.info("compaction not Finish or failed") + return false + } + } + } + + def trigger_compaction = { List> tablets -> + for(def tablet: tablets) { + trigger_tablet_compaction(tablet, "cumulative") + trigger_tablet_compaction(tablet, "base") + trigger_tablet_compaction(tablet, "full") + } + } + + def caculate_table_data_size_in_backend_storage = { List> tablets -> + storageType = context.config.otherConfigs.get("storageProvider") + Double storageSize = 0 + + List tabletIds = [] + for(def tablet: tablets) { + tabletIds.add(tablet["TabletId"]) + } + + if (storageType.toLowerCase() == "oss") { + //cbs means cluster backend storage + ak = context.config.otherConfigs.get("cbsS3Ak") + sk = context.config.otherConfigs.get("cbsS3Sk") + endpoint = context.config.otherConfigs.get("cbsS3Endpoint") + bucketName = context.config.otherConfigs.get("cbsS3Bucket") + storagePrefix = context.config.otherConfigs.get("cbsS3Prefix") + + client = initOssClient(ak, sk, endpoint) + for(String tabletId: tabletIds) { + storageSize += calculateFolderLength(client, bucketName, storagePrefix + "/data/" + tabletId) + } + shutDownOssClient(client) + } + + if (storageType.toLowerCase() == "hdfs") { + fsName = context.config.otherConfigs.get("cbsFsName") + isKerberosFs = context.config.otherConfigs.get("cbsFsKerberos") + fsUser = context.config.otherConfigs.get("cbsFsUser") + storagePrefix = context.config.otherConfigs.get("cbsFsPrefix") + } + + return storageSize + } + + def translate_different_unit_to_MB = { String size, String unitField -> + Double sizeKb = 0.0 + if (unitField == "KB") { + sizeKb = Double.parseDouble(size) / 1024 + } else if (unitField == "MB") { + sizeKb = Double.parseDouble(size) + } else if (unitField == "GB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 + } else if (unitField == "TB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 * 1024 + } + return sizeKb + } + + def show_table_data_size_through_mysql = { String table -> + def mysqlShowDataSize = 0L + def res = sql_return_maparray " show data from ${table}" + def tableSizeInfo = res[0] + def fields = tableSizeInfo["Size"].split(" ") + if (fields.length == 2 ){ + def sizeField = fields[0] + def unitField = fields[1] + mysqlShowDataSize = translate_different_unit_to_MB(sizeField, unitField) + } + return mysqlShowDataSize + } + + def caculate_table_data_size_through_api = { List> tablets -> + Double apiCaculateSize = 0 + for (HashMap tablet in tablets) { + def tabletStatus = show_tablet_compaction(tablet) + + for(String rowset: tabletStatus.rowsets){ + def fields = rowset.split(" ") + if (fields.length == 7) { + def sizeField = fields[-2] // the last field(size) + def unitField = fields[-1] // The second to last field(unit) + // 转换成 KB + apiCaculateSize += translate_different_unit_to_MB(sizeField, unitField ) + } + } + } + + return apiCaculateSize + } + + def main = { + tableName="test_cloud_disable_compaction_show_data" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL + ) + UNIQUE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + } + + main() +} \ No newline at end of file diff --git a/regression-test/suites/show_data_p2/test_table_property/test_cloud_inverted_index_v1_show_data.groovy b/regression-test/suites/show_data_p2/test_table_property/test_cloud_inverted_index_v1_show_data.groovy new file mode 100644 index 00000000000000..33554360ccd0f2 --- /dev/null +++ b/regression-test/suites/show_data_p2/test_table_property/test_cloud_inverted_index_v1_show_data.groovy @@ -0,0 +1,267 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// The cases is copied from https://github.com/trinodb/trino/tree/master +// /testing/trino-product-tests/src/main/resources/sql-tests/testcases/tpcds +// and modified by Doris. +import org.codehaus.groovy.runtime.IOGroovyMethods + +// loading one data 10 times, expect data size not rising +suite("test_cloud_inverted_index_v1_show_data","p2") { + //cloud-mode + if (!isCloudMode()) { + logger.info("not cloud mode, not run") + return + } + + def repeate_stream_load_same_data = { String tableName, int loadTimes -> + for (int i = 0; i < loadTimes; i++) { + streamLoad { + table tableName + set 'column_separator', '|' + set 'compress_type', 'GZ' + file """${getS3Url()}/regression/tpch/sf0.1/lineitem.tbl.gz""" + time 10000 // limit inflight 10s + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(json.NumberTotalRows, json.NumberLoadedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + } + + def get_tablets_from_table = { String table -> + def res = sql_return_maparray """show tablets from ${table}""" + return res + } + + def show_tablet_compaction = { HashMap tablet -> + StringBuilder sb = new StringBuilder(); + sb.append("curl -X GET ") + sb.append(tablet["CompactionStatus"]) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + return parseJson(out.trim()) + } + + def trigger_tablet_compaction = { HashMap tablet, String compact_type -> + //support trigger base/cumulative/full compaction + def tabletStatusBeforeCompaction = show_tablet_compaction(tablet) + + String tabletInBe = tablet + String showCompactionStatus = tablet["CompactionStatus"] + String triggerCompactionUrl = showCompactionStatus.split("show")[0] + "run?tablet_id=" + tablet["TabletId"] + "&compact_type=" + compact_type + StringBuilder sb = new StringBuilder(); + sb.append("curl -X POST ") + sb.append(triggerCompactionUrl) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + def outJson = parseJson(out) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + // if code = 0 means compaction happend, need to check + // other condition may indicate no suitable compaction condition + if ( code == 0 && outJson.status.toLowerCase() == "success" ){ + def compactionStatus = "RUNNING" + def tabletStatusAfterCompaction = null + long startTime = System.currentTimeMillis() + long timeoutTimestamp = startTime + 5 * 60 * 1000 // 5 min + do { + tabletStatusAfterCompaction = show_tablet_compaction(tablet) + logger.info("tabletStatusAfterCompaction class: " + tabletStatusAfterCompaction.class) + logger.info("hhhhhh: " + tabletStatusAfterCompaction.toString()) + if (tabletStatusAfterCompaction.rowsets.size() < tabletStatusBeforeCompaction.rowsets.size()){ + compactionStatus = 'FINISHED' + } + Thread.sleep(60 * 1000) + } while (timeoutTimestamp > System.currentTimeMillis() && (status != 'FINISHED')) + + if (status != "FINISHED") { + logger.info("compaction not Finish or failed") + return false + } + } + } + + def trigger_compaction = { List> tablets -> + for(def tablet: tablets) { + trigger_tablet_compaction(tablet, "cumulative") + trigger_tablet_compaction(tablet, "base") + trigger_tablet_compaction(tablet, "full") + } + } + + def caculate_table_data_size_in_backend_storage = { List> tablets -> + storageType = context.config.otherConfigs.get("storageProvider") + Double storageSize = 0 + + List tabletIds = [] + for(def tablet: tablets) { + tabletIds.add(tablet["TabletId"]) + } + + if (storageType.toLowerCase() == "oss") { + //cbs means cluster backend storage + ak = context.config.otherConfigs.get("cbsS3Ak") + sk = context.config.otherConfigs.get("cbsS3Sk") + endpoint = context.config.otherConfigs.get("cbsS3Endpoint") + bucketName = context.config.otherConfigs.get("cbsS3Bucket") + storagePrefix = context.config.otherConfigs.get("cbsS3Prefix") + + client = initOssClient(ak, sk, endpoint) + for(String tabletId: tabletIds) { + storageSize += calculateFolderLength(client, bucketName, storagePrefix + "/data/" + tabletId) + } + shutDownOssClient(client) + } + + if (storageType.toLowerCase() == "hdfs") { + fsName = context.config.otherConfigs.get("cbsFsName") + isKerberosFs = context.config.otherConfigs.get("cbsFsKerberos") + fsUser = context.config.otherConfigs.get("cbsFsUser") + storagePrefix = context.config.otherConfigs.get("cbsFsPrefix") + } + + return storageSize + } + + def translate_different_unit_to_MB = { String size, String unitField -> + Double sizeKb = 0.0 + if (unitField == "KB") { + sizeKb = Double.parseDouble(size) / 1024 + } else if (unitField == "MB") { + sizeKb = Double.parseDouble(size) + } else if (unitField == "GB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 + } else if (unitField == "TB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 * 1024 + } + return sizeKb + } + + def show_table_data_size_through_mysql = { String table -> + def mysqlShowDataSize = 0L + def res = sql_return_maparray " show data from ${table}" + def tableSizeInfo = res[0] + def fields = tableSizeInfo["Size"].split(" ") + if (fields.length == 2 ){ + def sizeField = fields[0] + def unitField = fields[1] + mysqlShowDataSize = translate_different_unit_to_MB(sizeField, unitField) + } + return mysqlShowDataSize + } + + def caculate_table_data_size_through_api = { List> tablets -> + Double apiCaculateSize = 0 + for (HashMap tablet in tablets) { + def tabletStatus = show_tablet_compaction(tablet) + + for(String rowset: tabletStatus.rowsets){ + def fields = rowset.split(" ") + if (fields.length == 7) { + def sizeField = fields[-2] // the last field(size) + def unitField = fields[-1] // The second to last field(unit) + // 转换成 KB + apiCaculateSize += translate_different_unit_to_MB(sizeField, unitField ) + } + } + } + + return apiCaculateSize + } + + def main = { + tableName="test_cloud_inverted_index_v1_show_data" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL, + index index_SHIPINSTRUCT (L_SHIPINSTRUCT) using inverted, + index index_SHIPMODE (L_SHIPMODE) using inverted, + index index_COMMENT (L_COMMENT) using inverted + ) + UNIQUE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "inverted_index_storage_format" = "V1", + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + } + + main() +} \ No newline at end of file diff --git a/regression-test/suites/show_data_p2/test_table_property/test_cloud_inverted_index_v2_show_data.groovy b/regression-test/suites/show_data_p2/test_table_property/test_cloud_inverted_index_v2_show_data.groovy new file mode 100644 index 00000000000000..4dd53c4ee5ecfb --- /dev/null +++ b/regression-test/suites/show_data_p2/test_table_property/test_cloud_inverted_index_v2_show_data.groovy @@ -0,0 +1,267 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// The cases is copied from https://github.com/trinodb/trino/tree/master +// /testing/trino-product-tests/src/main/resources/sql-tests/testcases/tpcds +// and modified by Doris. +import org.codehaus.groovy.runtime.IOGroovyMethods + +// loading one data 10 times, expect data size not rising +suite("test_cloud_inverted_index_v2_show_data","p2") { + //cloud-mode + if (!isCloudMode()) { + logger.info("not cloud mode, not run") + return + } + + def repeate_stream_load_same_data = { String tableName, int loadTimes -> + for (int i = 0; i < loadTimes; i++) { + streamLoad { + table tableName + set 'column_separator', '|' + set 'compress_type', 'GZ' + file """${getS3Url()}/regression/tpch/sf0.1/lineitem.tbl.gz""" + time 10000 // limit inflight 10s + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(json.NumberTotalRows, json.NumberLoadedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + } + + def get_tablets_from_table = { String table -> + def res = sql_return_maparray """show tablets from ${table}""" + return res + } + + def show_tablet_compaction = { HashMap tablet -> + StringBuilder sb = new StringBuilder(); + sb.append("curl -X GET ") + sb.append(tablet["CompactionStatus"]) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + return parseJson(out.trim()) + } + + def trigger_tablet_compaction = { HashMap tablet, String compact_type -> + //support trigger base/cumulative/full compaction + def tabletStatusBeforeCompaction = show_tablet_compaction(tablet) + + String tabletInBe = tablet + String showCompactionStatus = tablet["CompactionStatus"] + String triggerCompactionUrl = showCompactionStatus.split("show")[0] + "run?tablet_id=" + tablet["TabletId"] + "&compact_type=" + compact_type + StringBuilder sb = new StringBuilder(); + sb.append("curl -X POST ") + sb.append(triggerCompactionUrl) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + def outJson = parseJson(out) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + // if code = 0 means compaction happend, need to check + // other condition may indicate no suitable compaction condition + if ( code == 0 && outJson.status.toLowerCase() == "success" ){ + def compactionStatus = "RUNNING" + def tabletStatusAfterCompaction = null + long startTime = System.currentTimeMillis() + long timeoutTimestamp = startTime + 5 * 60 * 1000 // 5 min + do { + tabletStatusAfterCompaction = show_tablet_compaction(tablet) + logger.info("tabletStatusAfterCompaction class: " + tabletStatusAfterCompaction.class) + logger.info("hhhhhh: " + tabletStatusAfterCompaction.toString()) + if (tabletStatusAfterCompaction.rowsets.size() < tabletStatusBeforeCompaction.rowsets.size()){ + compactionStatus = 'FINISHED' + } + Thread.sleep(60 * 1000) + } while (timeoutTimestamp > System.currentTimeMillis() && (status != 'FINISHED')) + + if (status != "FINISHED") { + logger.info("compaction not Finish or failed") + return false + } + } + } + + def trigger_compaction = { List> tablets -> + for(def tablet: tablets) { + trigger_tablet_compaction(tablet, "cumulative") + trigger_tablet_compaction(tablet, "base") + trigger_tablet_compaction(tablet, "full") + } + } + + def caculate_table_data_size_in_backend_storage = { List> tablets -> + storageType = context.config.otherConfigs.get("storageProvider") + Double storageSize = 0 + + List tabletIds = [] + for(def tablet: tablets) { + tabletIds.add(tablet["TabletId"]) + } + + if (storageType.toLowerCase() == "oss") { + //cbs means cluster backend storage + ak = context.config.otherConfigs.get("cbsS3Ak") + sk = context.config.otherConfigs.get("cbsS3Sk") + endpoint = context.config.otherConfigs.get("cbsS3Endpoint") + bucketName = context.config.otherConfigs.get("cbsS3Bucket") + storagePrefix = context.config.otherConfigs.get("cbsS3Prefix") + + client = initOssClient(ak, sk, endpoint) + for(String tabletId: tabletIds) { + storageSize += calculateFolderLength(client, bucketName, storagePrefix + "/data/" + tabletId) + } + shutDownOssClient(client) + } + + if (storageType.toLowerCase() == "hdfs") { + fsName = context.config.otherConfigs.get("cbsFsName") + isKerberosFs = context.config.otherConfigs.get("cbsFsKerberos") + fsUser = context.config.otherConfigs.get("cbsFsUser") + storagePrefix = context.config.otherConfigs.get("cbsFsPrefix") + } + + return storageSize + } + + def translate_different_unit_to_MB = { String size, String unitField -> + Double sizeKb = 0.0 + if (unitField == "KB") { + sizeKb = Double.parseDouble(size) / 1024 + } else if (unitField == "MB") { + sizeKb = Double.parseDouble(size) + } else if (unitField == "GB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 + } else if (unitField == "TB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 * 1024 + } + return sizeKb + } + + def show_table_data_size_through_mysql = { String table -> + def mysqlShowDataSize = 0L + def res = sql_return_maparray " show data from ${table}" + def tableSizeInfo = res[0] + def fields = tableSizeInfo["Size"].split(" ") + if (fields.length == 2 ){ + def sizeField = fields[0] + def unitField = fields[1] + mysqlShowDataSize = translate_different_unit_to_MB(sizeField, unitField) + } + return mysqlShowDataSize + } + + def caculate_table_data_size_through_api = { List> tablets -> + Double apiCaculateSize = 0 + for (HashMap tablet in tablets) { + def tabletStatus = show_tablet_compaction(tablet) + + for(String rowset: tabletStatus.rowsets){ + def fields = rowset.split(" ") + if (fields.length == 7) { + def sizeField = fields[-2] // the last field(size) + def unitField = fields[-1] // The second to last field(unit) + // 转换成 KB + apiCaculateSize += translate_different_unit_to_MB(sizeField, unitField ) + } + } + } + + return apiCaculateSize + } + + def main = { + tableName="test_cloud_inverted_index_v2_show_data" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL, + index index_SHIPINSTRUCT (L_SHIPINSTRUCT) using inverted, + index index_SHIPMODE (L_SHIPMODE) using inverted, + index index_COMMENT (L_COMMENT) using inverted + ) + UNIQUE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "inverted_index_storage_format" = "V2", + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + } + + main() +} \ No newline at end of file diff --git a/regression-test/suites/show_data_p2/test_table_property/test_cloud_lz4_show_data.groovy b/regression-test/suites/show_data_p2/test_table_property/test_cloud_lz4_show_data.groovy new file mode 100644 index 00000000000000..a81f37df39a7ee --- /dev/null +++ b/regression-test/suites/show_data_p2/test_table_property/test_cloud_lz4_show_data.groovy @@ -0,0 +1,264 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// The cases is copied from https://github.com/trinodb/trino/tree/master +// /testing/trino-product-tests/src/main/resources/sql-tests/testcases/tpcds +// and modified by Doris. +import org.codehaus.groovy.runtime.IOGroovyMethods + +// loading one data 10 times, expect data size not rising +suite("test_dup_show_data_in_cloud","p2") { + //cloud-mode + if (!isCloudMode()) { + logger.info("not cloud mode, not run") + return + } + + def repeate_stream_load_same_data = { String tableName, int loadTimes -> + for (int i = 0; i < loadTimes; i++) { + streamLoad { + table tableName + set 'column_separator', '|' + set 'compress_type', 'GZ' + file """${getS3Url()}/regression/tpch/sf0.1/lineitem.tbl.gz""" + time 10000 // limit inflight 10s + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(json.NumberTotalRows, json.NumberLoadedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + } + + def get_tablets_from_table = { String table -> + def res = sql_return_maparray """show tablets from ${table}""" + return res + } + + def show_tablet_compaction = { HashMap tablet -> + StringBuilder sb = new StringBuilder(); + sb.append("curl -X GET ") + sb.append(tablet["CompactionStatus"]) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + return parseJson(out.trim()) + } + + def trigger_tablet_compaction = { HashMap tablet, String compact_type -> + //support trigger base/cumulative/full compaction + def tabletStatusBeforeCompaction = show_tablet_compaction(tablet) + + String tabletInBe = tablet + String showCompactionStatus = tablet["CompactionStatus"] + String triggerCompactionUrl = showCompactionStatus.split("show")[0] + "run?tablet_id=" + tablet["TabletId"] + "&compact_type=" + compact_type + StringBuilder sb = new StringBuilder(); + sb.append("curl -X POST ") + sb.append(triggerCompactionUrl) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + def outJson = parseJson(out) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + // if code = 0 means compaction happend, need to check + // other condition may indicate no suitable compaction condition + if ( code == 0 && outJson.status.toLowerCase() == "success" ){ + def compactionStatus = "RUNNING" + def tabletStatusAfterCompaction = null + long startTime = System.currentTimeMillis() + long timeoutTimestamp = startTime + 5 * 60 * 1000 // 5 min + do { + tabletStatusAfterCompaction = show_tablet_compaction(tablet) + logger.info("tabletStatusAfterCompaction class: " + tabletStatusAfterCompaction.class) + logger.info("hhhhhh: " + tabletStatusAfterCompaction.toString()) + if (tabletStatusAfterCompaction.rowsets.size() < tabletStatusBeforeCompaction.rowsets.size()){ + compactionStatus = 'FINISHED' + } + Thread.sleep(60 * 1000) + } while (timeoutTimestamp > System.currentTimeMillis() && (status != 'FINISHED')) + + if (status != "FINISHED") { + logger.info("compaction not Finish or failed") + return false + } + } + } + + def trigger_compaction = { List> tablets -> + for(def tablet: tablets) { + trigger_tablet_compaction(tablet, "cumulative") + trigger_tablet_compaction(tablet, "base") + trigger_tablet_compaction(tablet, "full") + } + } + + def caculate_table_data_size_in_backend_storage = { List> tablets -> + storageType = context.config.otherConfigs.get("storageProvider") + Double storageSize = 0 + + List tabletIds = [] + for(def tablet: tablets) { + tabletIds.add(tablet["TabletId"]) + } + + if (storageType.toLowerCase() == "oss") { + //cbs means cluster backend storage + ak = context.config.otherConfigs.get("cbsS3Ak") + sk = context.config.otherConfigs.get("cbsS3Sk") + endpoint = context.config.otherConfigs.get("cbsS3Endpoint") + bucketName = context.config.otherConfigs.get("cbsS3Bucket") + storagePrefix = context.config.otherConfigs.get("cbsS3Prefix") + + client = initOssClient(ak, sk, endpoint) + for(String tabletId: tabletIds) { + storageSize += calculateFolderLength(client, bucketName, storagePrefix + "/data/" + tabletId) + } + shutDownOssClient(client) + } + + if (storageType.toLowerCase() == "hdfs") { + fsName = context.config.otherConfigs.get("cbsFsName") + isKerberosFs = context.config.otherConfigs.get("cbsFsKerberos") + fsUser = context.config.otherConfigs.get("cbsFsUser") + storagePrefix = context.config.otherConfigs.get("cbsFsPrefix") + } + + return storageSize + } + + def translate_different_unit_to_MB = { String size, String unitField -> + Double sizeKb = 0.0 + if (unitField == "KB") { + sizeKb = Double.parseDouble(size) / 1024 + } else if (unitField == "MB") { + sizeKb = Double.parseDouble(size) + } else if (unitField == "GB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 + } else if (unitField == "TB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 * 1024 + } + return sizeKb + } + + def show_table_data_size_through_mysql = { String table -> + def mysqlShowDataSize = 0L + def res = sql_return_maparray " show data from ${table}" + def tableSizeInfo = res[0] + def fields = tableSizeInfo["Size"].split(" ") + if (fields.length == 2 ){ + def sizeField = fields[0] + def unitField = fields[1] + mysqlShowDataSize = translate_different_unit_to_MB(sizeField, unitField) + } + return mysqlShowDataSize + } + + def caculate_table_data_size_through_api = { List> tablets -> + Double apiCaculateSize = 0 + for (HashMap tablet in tablets) { + def tabletStatus = show_tablet_compaction(tablet) + + for(String rowset: tabletStatus.rowsets){ + def fields = rowset.split(" ") + if (fields.length == 7) { + def sizeField = fields[-2] // the last field(size) + def unitField = fields[-1] // The second to last field(unit) + // 转换成 KB + apiCaculateSize += translate_different_unit_to_MB(sizeField, unitField ) + } + } + } + + return apiCaculateSize + } + + def main = { + tableName="lineitem_dup" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL + ) + DUPLICATE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "compression" = "lz4", + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + } + + main() +} diff --git a/regression-test/suites/show_data_p2/test_table_property/test_cloud_zstd_show_data.groovy b/regression-test/suites/show_data_p2/test_table_property/test_cloud_zstd_show_data.groovy new file mode 100644 index 00000000000000..5483f722668ac4 --- /dev/null +++ b/regression-test/suites/show_data_p2/test_table_property/test_cloud_zstd_show_data.groovy @@ -0,0 +1,264 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// The cases is copied from https://github.com/trinodb/trino/tree/master +// /testing/trino-product-tests/src/main/resources/sql-tests/testcases/tpcds +// and modified by Doris. +import org.codehaus.groovy.runtime.IOGroovyMethods + +// loading one data 10 times, expect data size not rising +suite("test_cloud_lz4_show_data","p2") { + //cloud-mode + if (!isCloudMode()) { + logger.info("not cloud mode, not run") + return + } + + def repeate_stream_load_same_data = { String tableName, int loadTimes -> + for (int i = 0; i < loadTimes; i++) { + streamLoad { + table tableName + set 'column_separator', '|' + set 'compress_type', 'GZ' + file """${getS3Url()}/regression/tpch/sf0.1/lineitem.tbl.gz""" + time 10000 // limit inflight 10s + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(json.NumberTotalRows, json.NumberLoadedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + } + + def get_tablets_from_table = { String table -> + def res = sql_return_maparray """show tablets from ${table}""" + return res + } + + def show_tablet_compaction = { HashMap tablet -> + StringBuilder sb = new StringBuilder(); + sb.append("curl -X GET ") + sb.append(tablet["CompactionStatus"]) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + return parseJson(out.trim()) + } + + def trigger_tablet_compaction = { HashMap tablet, String compact_type -> + //support trigger base/cumulative/full compaction + def tabletStatusBeforeCompaction = show_tablet_compaction(tablet) + + String tabletInBe = tablet + String showCompactionStatus = tablet["CompactionStatus"] + String triggerCompactionUrl = showCompactionStatus.split("show")[0] + "run?tablet_id=" + tablet["TabletId"] + "&compact_type=" + compact_type + StringBuilder sb = new StringBuilder(); + sb.append("curl -X POST ") + sb.append(triggerCompactionUrl) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + def outJson = parseJson(out) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + // if code = 0 means compaction happend, need to check + // other condition may indicate no suitable compaction condition + if ( code == 0 && outJson.status.toLowerCase() == "success" ){ + def compactionStatus = "RUNNING" + def tabletStatusAfterCompaction = null + long startTime = System.currentTimeMillis() + long timeoutTimestamp = startTime + 5 * 60 * 1000 // 5 min + do { + tabletStatusAfterCompaction = show_tablet_compaction(tablet) + logger.info("tabletStatusAfterCompaction class: " + tabletStatusAfterCompaction.class) + logger.info("hhhhhh: " + tabletStatusAfterCompaction.toString()) + if (tabletStatusAfterCompaction.rowsets.size() < tabletStatusBeforeCompaction.rowsets.size()){ + compactionStatus = 'FINISHED' + } + Thread.sleep(60 * 1000) + } while (timeoutTimestamp > System.currentTimeMillis() && (status != 'FINISHED')) + + if (status != "FINISHED") { + logger.info("compaction not Finish or failed") + return false + } + } + } + + def trigger_compaction = { List> tablets -> + for(def tablet: tablets) { + trigger_tablet_compaction(tablet, "cumulative") + trigger_tablet_compaction(tablet, "base") + trigger_tablet_compaction(tablet, "full") + } + } + + def caculate_table_data_size_in_backend_storage = { List> tablets -> + storageType = context.config.otherConfigs.get("storageProvider") + Double storageSize = 0 + + List tabletIds = [] + for(def tablet: tablets) { + tabletIds.add(tablet["TabletId"]) + } + + if (storageType.toLowerCase() == "oss") { + //cbs means cluster backend storage + ak = context.config.otherConfigs.get("cbsS3Ak") + sk = context.config.otherConfigs.get("cbsS3Sk") + endpoint = context.config.otherConfigs.get("cbsS3Endpoint") + bucketName = context.config.otherConfigs.get("cbsS3Bucket") + storagePrefix = context.config.otherConfigs.get("cbsS3Prefix") + + client = initOssClient(ak, sk, endpoint) + for(String tabletId: tabletIds) { + storageSize += calculateFolderLength(client, bucketName, storagePrefix + "/data/" + tabletId) + } + shutDownOssClient(client) + } + + if (storageType.toLowerCase() == "hdfs") { + fsName = context.config.otherConfigs.get("cbsFsName") + isKerberosFs = context.config.otherConfigs.get("cbsFsKerberos") + fsUser = context.config.otherConfigs.get("cbsFsUser") + storagePrefix = context.config.otherConfigs.get("cbsFsPrefix") + } + + return storageSize + } + + def translate_different_unit_to_MB = { String size, String unitField -> + Double sizeKb = 0.0 + if (unitField == "KB") { + sizeKb = Double.parseDouble(size) / 1024 + } else if (unitField == "MB") { + sizeKb = Double.parseDouble(size) + } else if (unitField == "GB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 + } else if (unitField == "TB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 * 1024 + } + return sizeKb + } + + def show_table_data_size_through_mysql = { String table -> + def mysqlShowDataSize = 0L + def res = sql_return_maparray " show data from ${table}" + def tableSizeInfo = res[0] + def fields = tableSizeInfo["Size"].split(" ") + if (fields.length == 2 ){ + def sizeField = fields[0] + def unitField = fields[1] + mysqlShowDataSize = translate_different_unit_to_MB(sizeField, unitField) + } + return mysqlShowDataSize + } + + def caculate_table_data_size_through_api = { List> tablets -> + Double apiCaculateSize = 0 + for (HashMap tablet in tablets) { + def tabletStatus = show_tablet_compaction(tablet) + + for(String rowset: tabletStatus.rowsets){ + def fields = rowset.split(" ") + if (fields.length == 7) { + def sizeField = fields[-2] // the last field(size) + def unitField = fields[-1] // The second to last field(unit) + // 转换成 KB + apiCaculateSize += translate_different_unit_to_MB(sizeField, unitField ) + } + } + } + + return apiCaculateSize + } + + def main = { + tableName="test_cloud_lz4_show_data" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL + ) + DUPLICATE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "compression" = "zstd", + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + } + + main() +} diff --git a/regression-test/suites/show_data_p2/test_table_type/test_cloud_agg_show_data.groovy b/regression-test/suites/show_data_p2/test_table_type/test_cloud_agg_show_data.groovy new file mode 100644 index 00000000000000..33973b7e4171ff --- /dev/null +++ b/regression-test/suites/show_data_p2/test_table_type/test_cloud_agg_show_data.groovy @@ -0,0 +1,263 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// The cases is copied from https://github.com/trinodb/trino/tree/master +// /testing/trino-product-tests/src/main/resources/sql-tests/testcases/tpcds +// and modified by Doris. +import org.codehaus.groovy.runtime.IOGroovyMethods + +// loading one data 10 times, expect data size not rising +suite("test_cloud_agg_show_data","p2") { + //cloud-mode + if (!isCloudMode()) { + logger.info("not cloud mode, not run") + return + } + + def repeate_stream_load_same_data = { String tableName, int loadTimes -> + for (int i = 0; i < loadTimes; i++) { + streamLoad { + table tableName + set 'column_separator', '|' + set 'compress_type', 'GZ' + file """${getS3Url()}/regression/tpch/sf0.1/lineitem.tbl.gz""" + time 10000 // limit inflight 10s + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(json.NumberTotalRows, json.NumberLoadedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + } + + def get_tablets_from_table = { String table -> + def res = sql_return_maparray """show tablets from ${table}""" + return res + } + + def show_tablet_compaction = { HashMap tablet -> + StringBuilder sb = new StringBuilder(); + sb.append("curl -X GET ") + sb.append(tablet["CompactionStatus"]) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + return parseJson(out.trim()) + } + + def trigger_tablet_compaction = { HashMap tablet, String compact_type -> + //support trigger base/cumulative/full compaction + def tabletStatusBeforeCompaction = show_tablet_compaction(tablet) + + String tabletInBe = tablet + String showCompactionStatus = tablet["CompactionStatus"] + String triggerCompactionUrl = showCompactionStatus.split("show")[0] + "run?tablet_id=" + tablet["TabletId"] + "&compact_type=" + compact_type + StringBuilder sb = new StringBuilder(); + sb.append("curl -X POST ") + sb.append(triggerCompactionUrl) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + def outJson = parseJson(out) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + // if code = 0 means compaction happend, need to check + // other condition may indicate no suitable compaction condition + if ( code == 0 && outJson.status.toLowerCase() == "success" ){ + def compactionStatus = "RUNNING" + def tabletStatusAfterCompaction = null + long startTime = System.currentTimeMillis() + long timeoutTimestamp = startTime + 5 * 60 * 1000 // 5 min + do { + tabletStatusAfterCompaction = show_tablet_compaction(tablet) + logger.info("tabletStatusAfterCompaction class: " + tabletStatusAfterCompaction.class) + logger.info("hhhhhh: " + tabletStatusAfterCompaction.toString()) + if (tabletStatusAfterCompaction.rowsets.size() < tabletStatusBeforeCompaction.rowsets.size()){ + compactionStatus = 'FINISHED' + } + Thread.sleep(60 * 1000) + } while (timeoutTimestamp > System.currentTimeMillis() && (status != 'FINISHED')) + + if (status != "FINISHED") { + logger.info("compaction not Finish or failed") + return false + } + } + } + + def trigger_compaction = { List> tablets -> + for(def tablet: tablets) { + trigger_tablet_compaction(tablet, "cumulative") + trigger_tablet_compaction(tablet, "base") + trigger_tablet_compaction(tablet, "full") + } + } + + def caculate_table_data_size_in_backend_storage = { List> tablets -> + storageType = context.config.otherConfigs.get("storageProvider") + Double storageSize = 0 + + List tabletIds = [] + for(def tablet: tablets) { + tabletIds.add(tablet["TabletId"]) + } + + if (storageType.toLowerCase() == "oss") { + //cbs means cluster backend storage + ak = context.config.otherConfigs.get("cbsS3Ak") + sk = context.config.otherConfigs.get("cbsS3Sk") + endpoint = context.config.otherConfigs.get("cbsS3Endpoint") + bucketName = context.config.otherConfigs.get("cbsS3Bucket") + storagePrefix = context.config.otherConfigs.get("cbsS3Prefix") + + client = initOssClient(ak, sk, endpoint) + for(String tabletId: tabletIds) { + storageSize += calculateFolderLength(client, bucketName, storagePrefix + "/data/" + tabletId) + } + shutDownOssClient(client) + } + + if (storageType.toLowerCase() == "hdfs") { + fsName = context.config.otherConfigs.get("cbsFsName") + isKerberosFs = context.config.otherConfigs.get("cbsFsKerberos") + fsUser = context.config.otherConfigs.get("cbsFsUser") + storagePrefix = context.config.otherConfigs.get("cbsFsPrefix") + } + + return storageSize + } + + def translate_different_unit_to_MB = { String size, String unitField -> + Double sizeKb = 0.0 + if (unitField == "KB") { + sizeKb = Double.parseDouble(size) / 1024 + } else if (unitField == "MB") { + sizeKb = Double.parseDouble(size) + } else if (unitField == "GB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 + } else if (unitField == "TB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 * 1024 + } + return sizeKb + } + + def show_table_data_size_through_mysql = { String table -> + def mysqlShowDataSize = 0L + def res = sql_return_maparray " show data from ${table}" + def tableSizeInfo = res[0] + def fields = tableSizeInfo["Size"].split(" ") + if (fields.length == 2 ){ + def sizeField = fields[0] + def unitField = fields[1] + mysqlShowDataSize = translate_different_unit_to_MB(sizeField, unitField) + } + return mysqlShowDataSize + } + + def caculate_table_data_size_through_api = { List> tablets -> + Double apiCaculateSize = 0 + for (HashMap tablet in tablets) { + def tabletStatus = show_tablet_compaction(tablet) + + for(String rowset: tabletStatus.rowsets){ + def fields = rowset.split(" ") + if (fields.length == 7) { + def sizeField = fields[-2] // the last field(size) + def unitField = fields[-1] // The second to last field(unit) + // 转换成 KB + apiCaculateSize += translate_different_unit_to_MB(sizeField, unitField ) + } + } + } + + return apiCaculateSize + } + + def main = { + tableName="test_cloud_agg_show_data" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS lineitem_mow_agg + ( L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) SUM, + L_EXTENDEDPRICE DECIMAL(15,2) SUM, + L_DISCOUNT DECIMAL(15,2) SUM, + L_TAX DECIMAL(15,2) SUM, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT MAX, + L_COMMITDATE DATE NOT MAX, + L_RECEIPTDATE DATE NOT MAX, + L_SHIPINSTRUCT CHAR(25) REPLACE, + L_SHIPMODE CHAR(10) REPLACE, + L_COMMENT VARCHAR(44) REPLACE + ) + AGGREGATE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER, L_RETURNFLAG, L_LINESTATUS) + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + } + + main() +} \ No newline at end of file diff --git a/regression-test/suites/show_data_p2/test_table_type/test_cloud_dup_show_data.groovy b/regression-test/suites/show_data_p2/test_table_type/test_cloud_dup_show_data.groovy new file mode 100644 index 00000000000000..bf642367a513d8 --- /dev/null +++ b/regression-test/suites/show_data_p2/test_table_type/test_cloud_dup_show_data.groovy @@ -0,0 +1,263 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// The cases is copied from https://github.com/trinodb/trino/tree/master +// /testing/trino-product-tests/src/main/resources/sql-tests/testcases/tpcds +// and modified by Doris. +import org.codehaus.groovy.runtime.IOGroovyMethods + +// loading one data 10 times, expect data size not rising +suite("test_cloud_dup_show_data","p2") { + //cloud-mode + if (!isCloudMode()) { + logger.info("not cloud mode, not run") + return + } + + def repeate_stream_load_same_data = { String tableName, int loadTimes -> + for (int i = 0; i < loadTimes; i++) { + streamLoad { + table tableName + set 'column_separator', '|' + set 'compress_type', 'GZ' + file """${getS3Url()}/regression/tpch/sf0.1/lineitem.tbl.gz""" + time 10000 // limit inflight 10s + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(json.NumberTotalRows, json.NumberLoadedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + } + + def get_tablets_from_table = { String table -> + def res = sql_return_maparray """show tablets from ${table}""" + return res + } + + def show_tablet_compaction = { HashMap tablet -> + StringBuilder sb = new StringBuilder(); + sb.append("curl -X GET ") + sb.append(tablet["CompactionStatus"]) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + return parseJson(out.trim()) + } + + def trigger_tablet_compaction = { HashMap tablet, String compact_type -> + //support trigger base/cumulative/full compaction + def tabletStatusBeforeCompaction = show_tablet_compaction(tablet) + + String tabletInBe = tablet + String showCompactionStatus = tablet["CompactionStatus"] + String triggerCompactionUrl = showCompactionStatus.split("show")[0] + "run?tablet_id=" + tablet["TabletId"] + "&compact_type=" + compact_type + StringBuilder sb = new StringBuilder(); + sb.append("curl -X POST ") + sb.append(triggerCompactionUrl) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + def outJson = parseJson(out) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + // if code = 0 means compaction happend, need to check + // other condition may indicate no suitable compaction condition + if ( code == 0 && outJson.status.toLowerCase() == "success" ){ + def compactionStatus = "RUNNING" + def tabletStatusAfterCompaction = null + long startTime = System.currentTimeMillis() + long timeoutTimestamp = startTime + 5 * 60 * 1000 // 5 min + do { + tabletStatusAfterCompaction = show_tablet_compaction(tablet) + logger.info("tabletStatusAfterCompaction class: " + tabletStatusAfterCompaction.class) + logger.info("hhhhhh: " + tabletStatusAfterCompaction.toString()) + if (tabletStatusAfterCompaction.rowsets.size() < tabletStatusBeforeCompaction.rowsets.size()){ + compactionStatus = 'FINISHED' + } + Thread.sleep(60 * 1000) + } while (timeoutTimestamp > System.currentTimeMillis() && (status != 'FINISHED')) + + if (status != "FINISHED") { + logger.info("compaction not Finish or failed") + return false + } + } + } + + def trigger_compaction = { List> tablets -> + for(def tablet: tablets) { + trigger_tablet_compaction(tablet, "cumulative") + trigger_tablet_compaction(tablet, "base") + trigger_tablet_compaction(tablet, "full") + } + } + + def caculate_table_data_size_in_backend_storage = { List> tablets -> + storageType = context.config.otherConfigs.get("storageProvider") + Double storageSize = 0 + + List tabletIds = [] + for(def tablet: tablets) { + tabletIds.add(tablet["TabletId"]) + } + + if (storageType.toLowerCase() == "oss") { + //cbs means cluster backend storage + ak = context.config.otherConfigs.get("cbsS3Ak") + sk = context.config.otherConfigs.get("cbsS3Sk") + endpoint = context.config.otherConfigs.get("cbsS3Endpoint") + bucketName = context.config.otherConfigs.get("cbsS3Bucket") + storagePrefix = context.config.otherConfigs.get("cbsS3Prefix") + + client = initOssClient(ak, sk, endpoint) + for(String tabletId: tabletIds) { + storageSize += calculateFolderLength(client, bucketName, storagePrefix + "/data/" + tabletId) + } + shutDownOssClient(client) + } + + if (storageType.toLowerCase() == "hdfs") { + fsName = context.config.otherConfigs.get("cbsFsName") + isKerberosFs = context.config.otherConfigs.get("cbsFsKerberos") + fsUser = context.config.otherConfigs.get("cbsFsUser") + storagePrefix = context.config.otherConfigs.get("cbsFsPrefix") + } + + return storageSize + } + + def translate_different_unit_to_MB = { String size, String unitField -> + Double sizeKb = 0.0 + if (unitField == "KB") { + sizeKb = Double.parseDouble(size) / 1024 + } else if (unitField == "MB") { + sizeKb = Double.parseDouble(size) + } else if (unitField == "GB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 + } else if (unitField == "TB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 * 1024 + } + return sizeKb + } + + def show_table_data_size_through_mysql = { String table -> + def mysqlShowDataSize = 0L + def res = sql_return_maparray " show data from ${table}" + def tableSizeInfo = res[0] + def fields = tableSizeInfo["Size"].split(" ") + if (fields.length == 2 ){ + def sizeField = fields[0] + def unitField = fields[1] + mysqlShowDataSize = translate_different_unit_to_MB(sizeField, unitField) + } + return mysqlShowDataSize + } + + def caculate_table_data_size_through_api = { List> tablets -> + Double apiCaculateSize = 0 + for (HashMap tablet in tablets) { + def tabletStatus = show_tablet_compaction(tablet) + + for(String rowset: tabletStatus.rowsets){ + def fields = rowset.split(" ") + if (fields.length == 7) { + def sizeField = fields[-2] // the last field(size) + def unitField = fields[-1] // The second to last field(unit) + // 转换成 KB + apiCaculateSize += translate_different_unit_to_MB(sizeField, unitField ) + } + } + } + + return apiCaculateSize + } + + def main = { + tableName="test_cloud_dup_show_data" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL + ) + DUPLICATE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + } + + main() +} \ No newline at end of file diff --git a/regression-test/suites/show_data_p2/test_table_type/test_cloud_mor_show_data.groovy b/regression-test/suites/show_data_p2/test_table_type/test_cloud_mor_show_data.groovy new file mode 100644 index 00000000000000..4623ffd8a98a8c --- /dev/null +++ b/regression-test/suites/show_data_p2/test_table_type/test_cloud_mor_show_data.groovy @@ -0,0 +1,264 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// The cases is copied from https://github.com/trinodb/trino/tree/master +// /testing/trino-product-tests/src/main/resources/sql-tests/testcases/tpcds +// and modified by Doris. +import org.codehaus.groovy.runtime.IOGroovyMethods + +// loading one data 10 times, expect data size not rising +suite("test_cloud_mor_show_data","p2") { + //cloud-mode + if (!isCloudMode()) { + logger.info("not cloud mode, not run") + return + } + + def repeate_stream_load_same_data = { String tableName, int loadTimes -> + for (int i = 0; i < loadTimes; i++) { + streamLoad { + table tableName + set 'column_separator', '|' + set 'compress_type', 'GZ' + file """${getS3Url()}/regression/tpch/sf0.1/lineitem.tbl.gz""" + time 10000 // limit inflight 10s + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(json.NumberTotalRows, json.NumberLoadedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + } + + def get_tablets_from_table = { String table -> + def res = sql_return_maparray """show tablets from ${table}""" + return res + } + + def show_tablet_compaction = { HashMap tablet -> + StringBuilder sb = new StringBuilder(); + sb.append("curl -X GET ") + sb.append(tablet["CompactionStatus"]) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + return parseJson(out.trim()) + } + + def trigger_tablet_compaction = { HashMap tablet, String compact_type -> + //support trigger base/cumulative/full compaction + def tabletStatusBeforeCompaction = show_tablet_compaction(tablet) + + String tabletInBe = tablet + String showCompactionStatus = tablet["CompactionStatus"] + String triggerCompactionUrl = showCompactionStatus.split("show")[0] + "run?tablet_id=" + tablet["TabletId"] + "&compact_type=" + compact_type + StringBuilder sb = new StringBuilder(); + sb.append("curl -X POST ") + sb.append(triggerCompactionUrl) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + def outJson = parseJson(out) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + // if code = 0 means compaction happend, need to check + // other condition may indicate no suitable compaction condition + if ( code == 0 && outJson.status.toLowerCase() == "success" ){ + def compactionStatus = "RUNNING" + def tabletStatusAfterCompaction = null + long startTime = System.currentTimeMillis() + long timeoutTimestamp = startTime + 5 * 60 * 1000 // 5 min + do { + tabletStatusAfterCompaction = show_tablet_compaction(tablet) + logger.info("tabletStatusAfterCompaction class: " + tabletStatusAfterCompaction.class) + logger.info("hhhhhh: " + tabletStatusAfterCompaction.toString()) + if (tabletStatusAfterCompaction.rowsets.size() < tabletStatusBeforeCompaction.rowsets.size()){ + compactionStatus = 'FINISHED' + } + Thread.sleep(60 * 1000) + } while (timeoutTimestamp > System.currentTimeMillis() && (status != 'FINISHED')) + + if (status != "FINISHED") { + logger.info("compaction not Finish or failed") + return false + } + } + } + + def trigger_compaction = { List> tablets -> + for(def tablet: tablets) { + trigger_tablet_compaction(tablet, "cumulative") + trigger_tablet_compaction(tablet, "base") + trigger_tablet_compaction(tablet, "full") + } + } + + def caculate_table_data_size_in_backend_storage = { List> tablets -> + storageType = context.config.otherConfigs.get("storageProvider") + Double storageSize = 0 + + List tabletIds = [] + for(def tablet: tablets) { + tabletIds.add(tablet["TabletId"]) + } + + if (storageType.toLowerCase() == "oss") { + //cbs means cluster backend storage + ak = context.config.otherConfigs.get("cbsS3Ak") + sk = context.config.otherConfigs.get("cbsS3Sk") + endpoint = context.config.otherConfigs.get("cbsS3Endpoint") + bucketName = context.config.otherConfigs.get("cbsS3Bucket") + storagePrefix = context.config.otherConfigs.get("cbsS3Prefix") + + client = initOssClient(ak, sk, endpoint) + for(String tabletId: tabletIds) { + storageSize += calculateFolderLength(client, bucketName, storagePrefix + "/data/" + tabletId) + } + shutDownOssClient(client) + } + + if (storageType.toLowerCase() == "hdfs") { + fsName = context.config.otherConfigs.get("cbsFsName") + isKerberosFs = context.config.otherConfigs.get("cbsFsKerberos") + fsUser = context.config.otherConfigs.get("cbsFsUser") + storagePrefix = context.config.otherConfigs.get("cbsFsPrefix") + } + + return storageSize + } + + def translate_different_unit_to_MB = { String size, String unitField -> + Double sizeKb = 0.0 + if (unitField == "KB") { + sizeKb = Double.parseDouble(size) / 1024 + } else if (unitField == "MB") { + sizeKb = Double.parseDouble(size) + } else if (unitField == "GB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 + } else if (unitField == "TB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 * 1024 + } + return sizeKb + } + + def show_table_data_size_through_mysql = { String table -> + def mysqlShowDataSize = 0L + def res = sql_return_maparray " show data from ${table}" + def tableSizeInfo = res[0] + def fields = tableSizeInfo["Size"].split(" ") + if (fields.length == 2 ){ + def sizeField = fields[0] + def unitField = fields[1] + mysqlShowDataSize = translate_different_unit_to_MB(sizeField, unitField) + } + return mysqlShowDataSize + } + + def caculate_table_data_size_through_api = { List> tablets -> + Double apiCaculateSize = 0 + for (HashMap tablet in tablets) { + def tabletStatus = show_tablet_compaction(tablet) + + for(String rowset: tabletStatus.rowsets){ + def fields = rowset.split(" ") + if (fields.length == 7) { + def sizeField = fields[-2] // the last field(size) + def unitField = fields[-1] // The second to last field(unit) + // 转换成 KB + apiCaculateSize += translate_different_unit_to_MB(sizeField, unitField ) + } + } + } + + return apiCaculateSize + } + + def main = { + tableName="test_cloud_mor_show_data" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL + ) + UNIQUE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "enable_unique_key_merge_on_write" = "false", + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + } + + main() +} \ No newline at end of file diff --git a/regression-test/suites/show_data_p2/test_table_type/test_cloud_mow_partial_update_show_data.groovy b/regression-test/suites/show_data_p2/test_table_type/test_cloud_mow_partial_update_show_data.groovy new file mode 100644 index 00000000000000..954fcb5e9017d8 --- /dev/null +++ b/regression-test/suites/show_data_p2/test_table_type/test_cloud_mow_partial_update_show_data.groovy @@ -0,0 +1,293 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// The cases is copied from https://github.com/trinodb/trino/tree/master +// /testing/trino-product-tests/src/main/resources/sql-tests/testcases/tpcds +// and modified by Doris. +import org.codehaus.groovy.runtime.IOGroovyMethods + +// loading one data 10 times, expect data size not rising +suite("test_cloud_mow_partial_update_show_data","p2") { + //cloud-mode + if (!isCloudMode()) { + logger.info("not cloud mode, not run") + return + } + + def repeate_stream_load_same_data = { String tableName, int loadTimes -> + for (int i = 0; i < loadTimes; i++) { + streamLoad { + table tableName + set 'column_separator', '|' + set 'compress_type', 'GZ' + file """${getS3Url()}/regression/tpch/sf0.1/lineitem.tbl.gz""" + time 10000 // limit inflight 10s + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(json.NumberTotalRows, json.NumberLoadedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + } + + def repeate_partial_update_same_data = { String tableName, int loadTimes -> + for (int i = 0; i < loadTimes; i++) { + streamLoad { + table tableName + set 'column_separator', '|' + set 'compress_type', 'GZ' + set 'columns', 'name, value' + set 'partial_columns', 'true' + file """${getS3Url()}/regression/tpch/sf0.1/lineitem.tbl.gz""" + time 10000 // limit inflight 10s + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(json.NumberTotalRows, json.NumberLoadedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + } + + def get_tablets_from_table = { String table -> + def res = sql_return_maparray """show tablets from ${table}""" + return res + } + + def show_tablet_compaction = { HashMap tablet -> + StringBuilder sb = new StringBuilder(); + sb.append("curl -X GET ") + sb.append(tablet["CompactionStatus"]) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + return parseJson(out.trim()) + } + + def trigger_tablet_compaction = { HashMap tablet, String compact_type -> + //support trigger base/cumulative/full compaction + def tabletStatusBeforeCompaction = show_tablet_compaction(tablet) + + String tabletInBe = tablet + String showCompactionStatus = tablet["CompactionStatus"] + String triggerCompactionUrl = showCompactionStatus.split("show")[0] + "run?tablet_id=" + tablet["TabletId"] + "&compact_type=" + compact_type + StringBuilder sb = new StringBuilder(); + sb.append("curl -X POST ") + sb.append(triggerCompactionUrl) + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream()))); + out = process.getText() + def outJson = parseJson(out) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + // if code = 0 means compaction happend, need to check + // other condition may indicate no suitable compaction condition + if ( code == 0 && outJson.status.toLowerCase() == "success" ){ + def compactionStatus = "RUNNING" + def tabletStatusAfterCompaction = null + long startTime = System.currentTimeMillis() + long timeoutTimestamp = startTime + 5 * 60 * 1000 // 5 min + do { + tabletStatusAfterCompaction = show_tablet_compaction(tablet) + logger.info("tabletStatusAfterCompaction class: " + tabletStatusAfterCompaction.class) + logger.info("hhhhhh: " + tabletStatusAfterCompaction.toString()) + if (tabletStatusAfterCompaction.rowsets.size() < tabletStatusBeforeCompaction.rowsets.size()){ + compactionStatus = 'FINISHED' + } + Thread.sleep(60 * 1000) + } while (timeoutTimestamp > System.currentTimeMillis() && (status != 'FINISHED')) + + if (status != "FINISHED") { + logger.info("compaction not Finish or failed") + return false + } + } + } + + def trigger_compaction = { List> tablets -> + for(def tablet: tablets) { + trigger_tablet_compaction(tablet, "cumulative") + trigger_tablet_compaction(tablet, "base") + trigger_tablet_compaction(tablet, "full") + } + } + + def caculate_table_data_size_in_backend_storage = { List> tablets -> + storageType = context.config.otherConfigs.get("storageProvider") + Double storageSize = 0 + + List tabletIds = [] + for(def tablet: tablets) { + tabletIds.add(tablet["TabletId"]) + } + + if (storageType.toLowerCase() == "oss") { + //cbs means cluster backend storage + ak = context.config.otherConfigs.get("cbsS3Ak") + sk = context.config.otherConfigs.get("cbsS3Sk") + endpoint = context.config.otherConfigs.get("cbsS3Endpoint") + bucketName = context.config.otherConfigs.get("cbsS3Bucket") + storagePrefix = context.config.otherConfigs.get("cbsS3Prefix") + + client = initOssClient(ak, sk, endpoint) + for(String tabletId: tabletIds) { + storageSize += calculateFolderLength(client, bucketName, storagePrefix + "/data/" + tabletId) + } + shutDownOssClient(client) + } + + if (storageType.toLowerCase() == "hdfs") { + fsName = context.config.otherConfigs.get("cbsFsName") + isKerberosFs = context.config.otherConfigs.get("cbsFsKerberos") + fsUser = context.config.otherConfigs.get("cbsFsUser") + storagePrefix = context.config.otherConfigs.get("cbsFsPrefix") + } + + return storageSize + } + + def translate_different_unit_to_MB = { String size, String unitField -> + Double sizeKb = 0.0 + if (unitField == "KB") { + sizeKb = Double.parseDouble(size) / 1024 + } else if (unitField == "MB") { + sizeKb = Double.parseDouble(size) + } else if (unitField == "GB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 + } else if (unitField == "TB") { + sizeKb = Double.parseDouble(size) * 1024 * 1024 * 1024 + } + return sizeKb + } + + def show_table_data_size_through_mysql = { String table -> + def mysqlShowDataSize = 0L + def res = sql_return_maparray " show data from ${table}" + def tableSizeInfo = res[0] + def fields = tableSizeInfo["Size"].split(" ") + if (fields.length == 2 ){ + def sizeField = fields[0] + def unitField = fields[1] + mysqlShowDataSize = translate_different_unit_to_MB(sizeField, unitField) + } + return mysqlShowDataSize + } + + def caculate_table_data_size_through_api = { List> tablets -> + Double apiCaculateSize = 0 + for (HashMap tablet in tablets) { + def tabletStatus = show_tablet_compaction(tablet) + + for(String rowset: tabletStatus.rowsets){ + def fields = rowset.split(" ") + if (fields.length == 7) { + def sizeField = fields[-2] // the last field(size) + def unitField = fields[-1] // The second to last field(unit) + // 转换成 KB + apiCaculateSize += translate_different_unit_to_MB(sizeField, unitField ) + } + } + } + + return apiCaculateSize + } + + def main = { + tableName="test_cloud_mow_partial_update_show_data" + sql "DROP TABLE IF EXISTS ${tableName};" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName}( + L_ORDERKEY INTEGER NOT NULL, + L_PARTKEY INTEGER NOT NULL, + L_SUPPKEY INTEGER NOT NULL, + L_LINENUMBER INTEGER NOT NULL, + L_QUANTITY DECIMAL(15,2) NOT NULL, + L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, + L_DISCOUNT DECIMAL(15,2) NOT NULL, + L_TAX DECIMAL(15,2) NOT NULL, + L_RETURNFLAG CHAR(1) NOT NULL, + L_LINESTATUS CHAR(1) NOT NULL, + L_SHIPDATE DATE NOT NULL, + L_COMMITDATE DATE NOT NULL, + L_RECEIPTDATE DATE NOT NULL, + L_SHIPINSTRUCT CHAR(25) NOT NULL, + L_SHIPMODE CHAR(10) NOT NULL, + L_COMMENT VARCHAR(44) NOT NULL + ) + UNIQUE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + List tablets = get_tablets_from_table(tableName) + def loadTimes = [1, 10] + Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] + for (int i in loadTimes){ + // stream load 1 time, record each size + repeate_stream_load_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + repeate_partial_update_same_data(tableName, i) + def rows = sql_return_maparray "select count(*) as count from ${tableName};" + logger.info("after partial update, table ${tableName} has ${rows[0]["count"]} rows") + // 加一下触发compaction的机制 + trigger_compaction(tablets) + + // 然后 sleep 5min, 等fe汇报完 + sleep(300 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + sleep(300 * 1000) + logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") + + } + + // expect mysqlSize == apiSize == storageSize + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) + // expect load 1 times == load 10 times + assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) + assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) + assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) + } + + main() +} diff --git a/regression-test/suites/show_data_p2/test_show_mow_data.groovy b/regression-test/suites/show_data_p2/test_table_type/test_cloud_mow_show_data.groovy similarity index 98% rename from regression-test/suites/show_data_p2/test_show_mow_data.groovy rename to regression-test/suites/show_data_p2/test_table_type/test_cloud_mow_show_data.groovy index a5d25bb6c3b649..d8ab20d68fac11 100644 --- a/regression-test/suites/show_data_p2/test_show_mow_data.groovy +++ b/regression-test/suites/show_data_p2/test_table_type/test_cloud_mow_show_data.groovy @@ -21,7 +21,7 @@ import org.codehaus.groovy.runtime.IOGroovyMethods // loading one data 10 times, expect data size not rising -suite("test_mow_show_data_in_cloud","p2") { +suite("test_cloud_mow_show_data","p2") { //cloud-mode if (!isCloudMode()) { logger.info("not cloud mode, not run") @@ -34,7 +34,7 @@ suite("test_mow_show_data_in_cloud","p2") { table tableName set 'column_separator', '|' set 'compress_type', 'GZ' - file """${getS3Url()}/regression/tpch/sf1/lineitem.csv.split00.gz""" + file """${getS3Url()}/regression/tpch/sf0.1/lineitem.tbl.gz""" time 10000 // limit inflight 10s check { result, exception, startTime, endTime -> if (exception != null) { @@ -201,7 +201,7 @@ suite("test_mow_show_data_in_cloud","p2") { } def main = { - tableName="lineitem_mow" + tableName="test_cloud_mow_show_data" sql "DROP TABLE IF EXISTS ${tableName};" sql """ CREATE TABLE IF NOT EXISTS ${tableName}(