Skip to content

Commit

Permalink
[Refactor](executor)Add information_schema.workload_groups (apache#32195
Browse files Browse the repository at this point in the history
)
  • Loading branch information
wangbo authored Mar 14, 2024
1 parent 2e3d35f commit ace7792
Show file tree
Hide file tree
Showing 24 changed files with 330 additions and 278 deletions.
3 changes: 3 additions & 0 deletions be/src/exec/schema_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#include "exec/schema_scanner/schema_user_privileges_scanner.h"
#include "exec/schema_scanner/schema_variables_scanner.h"
#include "exec/schema_scanner/schema_views_scanner.h"
#include "exec/schema_scanner/schema_workload_groups_scanner.h"
#include "olap/hll.h"
#include "runtime/define_primitive_type.h"
#include "util/string_util.h"
Expand Down Expand Up @@ -155,6 +156,8 @@ std::unique_ptr<SchemaScanner> SchemaScanner::create(TSchemaTableType::type type
return SchemaBackendActiveTasksScanner::create_unique();
case TSchemaTableType::SCH_ACTIVE_QUERIES:
return SchemaActiveQueriesScanner::create_unique();
case TSchemaTableType::SCH_WORKLOAD_GROUPS:
return SchemaWorkloadGroupsScanner::create_unique();
default:
return SchemaDummyScanner::create_unique();
break;
Expand Down
17 changes: 7 additions & 10 deletions be/src/exec/schema_scanner/schema_active_queries_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,20 +50,17 @@ Status SchemaActiveQueriesScanner::start(RuntimeState* state) {
Status SchemaActiveQueriesScanner::_get_active_queries_block_from_fe() {
TNetworkAddress master_addr = ExecEnv::GetInstance()->master_info()->network_address;

TQueriesMetadataParams tqueries_meta_params;
tqueries_meta_params.__set_relay_to_other_fe(true);

TMetadataTableRequestParams metadata_table_params;
metadata_table_params.__set_metadata_type(TMetadataType::QUERIES);
metadata_table_params.__set_queries_metadata_params(tqueries_meta_params);
TSchemaTableRequestParams schema_table_params;
for (int i = 0; i < _s_tbls_columns.size(); i++) {
metadata_table_params.__isset.columns_name = true;
metadata_table_params.columns_name.emplace_back(_s_tbls_columns[i].name);
schema_table_params.__isset.columns_name = true;
schema_table_params.columns_name.emplace_back(_s_tbls_columns[i].name);
}
schema_table_params.replay_to_other_fe = true;
schema_table_params.__isset.replay_to_other_fe = true;

TFetchSchemaTableDataRequest request;
request.__set_schema_table_name(TSchemaTableName::SCHEMA_TABLE);
request.__set_metada_table_params(metadata_table_params);
request.__set_schema_table_name(TSchemaTableName::ACTIVE_QUERIES);
request.__set_schema_table_params(schema_table_params);

TFetchSchemaTableDataResult result;

Expand Down
166 changes: 166 additions & 0 deletions be/src/exec/schema_scanner/schema_workload_groups_scanner.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "exec/schema_scanner/schema_workload_groups_scanner.h"

#include "runtime/client_cache.h"
#include "runtime/exec_env.h"
#include "runtime/runtime_state.h"
#include "util/thrift_rpc_helper.h"
#include "vec/common/string_ref.h"
#include "vec/core/block.h"
#include "vec/data_types/data_type_factory.hpp"

namespace doris {
std::vector<SchemaScanner::ColumnDesc> SchemaWorkloadGroupsScanner::_s_tbls_columns = {
{"ID", TYPE_BIGINT, sizeof(int64_t), true},
{"NAME", TYPE_VARCHAR, sizeof(StringRef), true},
{"CPU_SHARE", TYPE_BIGINT, sizeof(int64_t), true},
{"MEMORY_LIMIT", TYPE_VARCHAR, sizeof(StringRef), true},
{"ENABLE_MEMORY_OVERCOMMIT", TYPE_VARCHAR, sizeof(StringRef), true},
{"MAX_CONCURRENCY", TYPE_BIGINT, sizeof(int64_t), true},
{"MAX_QUEUE_SIZE", TYPE_BIGINT, sizeof(int64_t), true},
{"QUEUE_TIMEOUT", TYPE_BIGINT, sizeof(int64_t), true},
{"CPU_HARD_LIMIT", TYPE_STRING, sizeof(StringRef), true},
{"SCAN_THREAD_NUM", TYPE_BIGINT, sizeof(int64_t), true},
{"MAX_REMOTE_SCAN_THREAD_NUM", TYPE_BIGINT, sizeof(int64_t), true},
{"MIN_REMOTE_SCAN_THREAD_NUM", TYPE_BIGINT, sizeof(int64_t), true}};

SchemaWorkloadGroupsScanner::SchemaWorkloadGroupsScanner()
: SchemaScanner(_s_tbls_columns, TSchemaTableType::SCH_WORKLOAD_GROUPS) {}

SchemaWorkloadGroupsScanner::~SchemaWorkloadGroupsScanner() {}

Status SchemaWorkloadGroupsScanner::start(RuntimeState* state) {
_block_rows_limit = state->batch_size();
_rpc_timeout = state->execution_timeout() * 1000;
return Status::OK();
}

Status SchemaWorkloadGroupsScanner::_get_workload_groups_block_from_fe() {
TNetworkAddress master_addr = ExecEnv::GetInstance()->master_info()->network_address;

TSchemaTableRequestParams schema_table_request_params;
for (int i = 0; i < _s_tbls_columns.size(); i++) {
schema_table_request_params.__isset.columns_name = true;
schema_table_request_params.columns_name.emplace_back(_s_tbls_columns[i].name);
}
schema_table_request_params.__set_current_user_ident(*_param->common_param->current_user_ident);

TFetchSchemaTableDataRequest request;
request.__set_schema_table_name(TSchemaTableName::WORKLOAD_GROUPS);
request.__set_schema_table_params(schema_table_request_params);

TFetchSchemaTableDataResult result;

RETURN_IF_ERROR(ThriftRpcHelper::rpc<FrontendServiceClient>(
master_addr.hostname, master_addr.port,
[&request, &result](FrontendServiceConnection& client) {
client->fetchSchemaTableData(result, request);
},
_rpc_timeout));

Status status(Status::create(result.status));
if (!status.ok()) {
LOG(WARNING) << "fetch workload groups from FE failed, errmsg=" << status;
return status;
}
std::vector<TRow> result_data = result.data_batch;

_workload_groups_block = vectorized::Block::create_unique();
for (int i = 0; i < _s_tbls_columns.size(); ++i) {
TypeDescriptor descriptor(_s_tbls_columns[i].type);
auto data_type = vectorized::DataTypeFactory::instance().create_data_type(descriptor, true);
_workload_groups_block->insert(vectorized::ColumnWithTypeAndName(
data_type->create_column(), data_type, _s_tbls_columns[i].name));
}

_workload_groups_block->reserve(_block_rows_limit);

if (result_data.size() > 0) {
int col_size = result_data[0].column_value.size();
if (col_size != _s_tbls_columns.size()) {
return Status::InternalError<false>(
"workload groups schema is not match for FE and BE");
}
}

// todo(wb) reuse this callback function
auto insert_string_value = [&](int col_index, std::string str_val, vectorized::Block* block) {
vectorized::MutableColumnPtr mutable_col_ptr;
mutable_col_ptr = std::move(*block->get_by_position(col_index).column).assume_mutable();
auto* nullable_column =
reinterpret_cast<vectorized::ColumnNullable*>(mutable_col_ptr.get());
vectorized::IColumn* col_ptr = &nullable_column->get_nested_column();
reinterpret_cast<vectorized::ColumnString*>(col_ptr)->insert_data(str_val.data(),
str_val.size());
nullable_column->get_null_map_data().emplace_back(0);
};
auto insert_int_value = [&](int col_index, int64_t int_val, vectorized::Block* block) {
vectorized::MutableColumnPtr mutable_col_ptr;
mutable_col_ptr = std::move(*block->get_by_position(col_index).column).assume_mutable();
auto* nullable_column =
reinterpret_cast<vectorized::ColumnNullable*>(mutable_col_ptr.get());
vectorized::IColumn* col_ptr = &nullable_column->get_nested_column();
reinterpret_cast<vectorized::ColumnVector<vectorized::Int64>*>(col_ptr)->insert_value(
int_val);
nullable_column->get_null_map_data().emplace_back(0);
};

for (int i = 0; i < result_data.size(); i++) {
TRow row = result_data[i];

for (int j = 0; j < _s_tbls_columns.size(); j++) {
if (_s_tbls_columns[j].type == TYPE_BIGINT) {
insert_int_value(j, row.column_value[j].longVal, _workload_groups_block.get());
} else {
insert_string_value(j, row.column_value[j].stringVal, _workload_groups_block.get());
}
}
}
return Status::OK();
}

Status SchemaWorkloadGroupsScanner::get_next_block(vectorized::Block* block, bool* eos) {
if (!_is_init) {
return Status::InternalError("Used before initialized.");
}

if (nullptr == block || nullptr == eos) {
return Status::InternalError("input pointer is nullptr.");
}

if (_workload_groups_block == nullptr) {
RETURN_IF_ERROR(_get_workload_groups_block_from_fe());
_total_rows = _workload_groups_block->rows();
}

if (_row_idx == _total_rows) {
*eos = true;
return Status::OK();
}

int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx);
vectorized::MutableBlock mblock = vectorized::MutableBlock::build_mutable_block(block);
mblock.add_rows(_workload_groups_block.get(), _row_idx, current_batch_rows);
_row_idx += current_batch_rows;

*eos = _row_idx == _total_rows;
return Status::OK();
}

} // namespace doris
52 changes: 52 additions & 0 deletions be/src/exec/schema_scanner/schema_workload_groups_scanner.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <vector>

#include "common/status.h"
#include "exec/schema_scanner.h"

namespace doris {
class RuntimeState;
namespace vectorized {
class Block;
} // namespace vectorized

class SchemaWorkloadGroupsScanner : public SchemaScanner {
ENABLE_FACTORY_CREATOR(SchemaWorkloadGroupsScanner);

public:
SchemaWorkloadGroupsScanner();
~SchemaWorkloadGroupsScanner() override;

Status start(RuntimeState* state) override;
Status get_next_block(vectorized::Block* block, bool* eos) override;

static std::vector<SchemaScanner::ColumnDesc> _s_tbls_columns;

private:
Status _get_workload_groups_block_from_fe();

int _block_rows_limit = 4096;
int _row_idx = 0;
int _total_rows = 0;
std::unique_ptr<vectorized::Block> _workload_groups_block = nullptr;
int _rpc_timeout = 3000;
};
}; // namespace doris
42 changes: 0 additions & 42 deletions be/src/vec/exec/scan/vmeta_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,9 +235,6 @@ Status VMetaScanner::_fetch_metadata(const TMetaScanRange& meta_scan_range) {
case TMetadataType::FRONTENDS_DISKS:
RETURN_IF_ERROR(_build_frontends_disks_metadata_request(meta_scan_range, &request));
break;
case TMetadataType::WORKLOAD_GROUPS:
RETURN_IF_ERROR(_build_workload_groups_metadata_request(meta_scan_range, &request));
break;
case TMetadataType::WORKLOAD_SCHED_POLICY:
RETURN_IF_ERROR(_build_workload_sched_policy_metadata_request(meta_scan_range, &request));
break;
Expand All @@ -253,9 +250,6 @@ Status VMetaScanner::_fetch_metadata(const TMetaScanRange& meta_scan_range) {
case TMetadataType::TASKS:
RETURN_IF_ERROR(_build_tasks_metadata_request(meta_scan_range, &request));
break;
case TMetadataType::QUERIES:
RETURN_IF_ERROR(_build_queries_metadata_request(meta_scan_range, &request));
break;
default:
_meta_eos = true;
return Status::OK();
Expand Down Expand Up @@ -365,23 +359,6 @@ Status VMetaScanner::_build_frontends_disks_metadata_request(
return Status::OK();
}

Status VMetaScanner::_build_workload_groups_metadata_request(
const TMetaScanRange& meta_scan_range, TFetchSchemaTableDataRequest* request) {
VLOG_CRITICAL << "VMetaScanner::_build_workload_groups_metadata_request";

// create request
request->__set_cluster_name("");
request->__set_schema_table_name(TSchemaTableName::METADATA_TABLE);

// create TMetadataTableRequestParams
TMetadataTableRequestParams metadata_table_params;
metadata_table_params.__set_metadata_type(TMetadataType::WORKLOAD_GROUPS);
metadata_table_params.__set_current_user_ident(_user_identity);

request->__set_metada_table_params(metadata_table_params);
return Status::OK();
}

Status VMetaScanner::_build_workload_sched_policy_metadata_request(
const TMetaScanRange& meta_scan_range, TFetchSchemaTableDataRequest* request) {
VLOG_CRITICAL << "VMetaScanner::_build_workload_sched_policy_metadata_request";
Expand Down Expand Up @@ -473,25 +450,6 @@ Status VMetaScanner::_build_tasks_metadata_request(const TMetaScanRange& meta_sc
return Status::OK();
}

Status VMetaScanner::_build_queries_metadata_request(const TMetaScanRange& meta_scan_range,
TFetchSchemaTableDataRequest* request) {
VLOG_CRITICAL << "VMetaScanner::_build_queries_metadata_request";
if (!meta_scan_range.__isset.queries_params) {
return Status::InternalError("Can not find TQueriesMetadataParams from meta_scan_range.");
}
// create request
request->__set_cluster_name("");
request->__set_schema_table_name(TSchemaTableName::METADATA_TABLE);

// create TMetadataTableRequestParams
TMetadataTableRequestParams metadata_table_params;
metadata_table_params.__set_metadata_type(TMetadataType::QUERIES);
metadata_table_params.__set_queries_metadata_params(meta_scan_range.queries_params);

request->__set_metada_table_params(metadata_table_params);
return Status::OK();
}

Status VMetaScanner::close(RuntimeState* state) {
VLOG_CRITICAL << "VMetaScanner::close";
RETURN_IF_ERROR(VScanner::close(state));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ public enum SchemaTableType {
SCH_METADATA_NAME_IDS("METADATA_NAME_IDS", "METADATA_NAME_IDS", TSchemaTableType.SCH_METADATA_NAME_IDS),
SCH_PROFILING("PROFILING", "PROFILING", TSchemaTableType.SCH_PROFILING),
SCH_BACKEND_ACTIVE_TASKS("BACKEND_ACTIVE_TASKS", "BACKEND_ACTIVE_TASKS", TSchemaTableType.SCH_BACKEND_ACTIVE_TASKS),
SCH_ACTIVE_QUERIES("ACTIVE_QUERIES", "ACTIVE_QUERIES", TSchemaTableType.SCH_ACTIVE_QUERIES);
SCH_ACTIVE_QUERIES("ACTIVE_QUERIES", "ACTIVE_QUERIES", TSchemaTableType.SCH_ACTIVE_QUERIES),
SCH_WORKLOAD_GROUPS("WORKLOAD_GROUPS", "WORKLOAD_GROUPS", TSchemaTableType.SCH_WORKLOAD_GROUPS);
private static final String dbName = "INFORMATION_SCHEMA";
private static SelectList fullSelectLists;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
import org.apache.doris.nereids.trees.expressions.functions.table.Numbers;
import org.apache.doris.nereids.trees.expressions.functions.table.S3;
import org.apache.doris.nereids.trees.expressions.functions.table.Tasks;
import org.apache.doris.nereids.trees.expressions.functions.table.WorkloadGroups;

import com.google.common.collect.ImmutableList;

Expand All @@ -56,8 +55,7 @@ public class BuiltinTableValuedFunctions implements FunctionHelper {
tableValued(S3.class, "s3"),
tableValued(MvInfos.class, "mv_infos"),
tableValued(Jobs.class, "jobs"),
tableValued(Tasks.class, "tasks"),
tableValued(WorkloadGroups.class, "workload_groups")
tableValued(Tasks.class, "tasks")
);

public static final BuiltinTableValuedFunctions INSTANCE = new BuiltinTableValuedFunctions();
Expand Down
14 changes: 14 additions & 0 deletions fe/fe-core/src/main/java/org/apache/doris/catalog/SchemaTable.java
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,20 @@ public class SchemaTable extends Table {
.column("FRONTEND_INSTANCE", ScalarType.createVarchar(256))
.column("SQL", ScalarType.createStringType())
.build()))
.put("workload_groups", new SchemaTable(SystemIdGenerator.getNextId(), "workload_groups", TableType.SCHEMA,
builder().column("ID", ScalarType.createType(PrimitiveType.BIGINT))
.column("NAME", ScalarType.createVarchar(256))
.column("CPU_SHARE", ScalarType.createType(PrimitiveType.BIGINT))
.column("MEMORY_LIMIT", ScalarType.createVarchar(256))
.column("ENABLE_MEMORY_OVERCOMMIT", ScalarType.createVarchar(256))
.column("MAX_CONCURRENCY", ScalarType.createType(PrimitiveType.BIGINT))
.column("MAX_QUEUE_SIZE", ScalarType.createType(PrimitiveType.BIGINT))
.column("QUEUE_TIMEOUT", ScalarType.createType(PrimitiveType.BIGINT))
.column("CPU_HARD_LIMIT", ScalarType.createStringType())
.column("SCAN_THREAD_NUM", ScalarType.createType(PrimitiveType.BIGINT))
.column("MAX_REMOTE_SCAN_THREAD_NUM", ScalarType.createType(PrimitiveType.BIGINT))
.column("MIN_REMOTE_SCAN_THREAD_NUM", ScalarType.createType(PrimitiveType.BIGINT))
.build()))
.build();

protected SchemaTable(long id, String name, TableType type, List<Column> baseSchema) {
Expand Down
Loading

0 comments on commit ace7792

Please sign in to comment.