Skip to content

Commit

Permalink
feature: bamboo-engine metrics add hostname label (#34)
Browse files Browse the repository at this point in the history
* feature: bamboo-engine metrics add hostname label

* minor: flake8 fix
  • Loading branch information
homholueng authored Dec 6, 2021
1 parent 0722af2 commit d190569
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 26 deletions.
2 changes: 1 addition & 1 deletion bamboo_engine/__version__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@
specific language governing permissions and limitations under the License.
"""

__version__ = "1.5.2"
__version__ = "1.6.0"
10 changes: 8 additions & 2 deletions bamboo_engine/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
Node,
)
from .utils.string import get_lower_case_name
from .utils.host import get_hostname

logger = logging.getLogger("bamboo_engine")

Expand All @@ -59,6 +60,7 @@ class Engine:

def __init__(self, runtime: EngineRuntimeInterface):
self.runtime = runtime
self._hostname = get_hostname()

# api
def run_pipeline(
Expand Down Expand Up @@ -738,7 +740,9 @@ def execute(self, process_id: int, node_id: str, root_pipeline_id: str, parent_p
type_label = self._get_metrics_node_type(node)
execute_start = time.time()
execute_result = handler.execute(process_info, loop, inner_loop, version)
ENGINE_NODE_EXECUTE_TIME.labels(type_label).observe(time.time() - execute_start)
ENGINE_NODE_EXECUTE_TIME.labels(type=type_label, hostname=self._hostname).observe(
time.time() - execute_start
)

# 进程是否要进入睡眠
if execute_result.should_sleep:
Expand Down Expand Up @@ -945,7 +949,9 @@ def schedule(
)
schedule_start = time.time()
schedule_result = handler.schedule(process_info, state.loop, state.inner_loop, schedule, callback_data)
ENGINE_NODE_SCHEDULE_TIME.labels(type_label).observe(time.time() - schedule_start)
ENGINE_NODE_SCHEDULE_TIME.labels(type=type_label, hostname=self._hostname).observe(
time.time() - schedule_start
)

if schedule_result.has_next_schedule:
self.runtime.set_next_schedule(
Expand Down
66 changes: 44 additions & 22 deletions bamboo_engine/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,14 @@

import os
import time

from functools import wraps

from prometheus_client import Gauge, Histogram

from .utils.host import get_hostname

HOST_NAME = get_hostname()


def decode_buckets(buckets_list):
return [float(x) for x in buckets_list.split(",")]
Expand Down Expand Up @@ -57,12 +60,12 @@ def wrapper(func):
@wraps(func)
def _wrapper(*args, **kwargs):
for g in gauges:
g.inc(1)
g.labels(hostname=HOST_NAME).inc(1)
try:
return func(*args, **kwargs)
finally:
for g in gauges:
g.dec(1)
g.labels(hostname=HOST_NAME).dec(1)

return _wrapper

Expand All @@ -78,95 +81,114 @@ def _wrapper(*args, **kwargs):
return func(*args, **kwargs)
finally:
for h in histograms:
h.observe(time.time() - start)
h.labels(hostname=HOST_NAME).observe(time.time() - start)

return _wrapper

return wrapper


# engine metrics
ENGINE_RUNNING_PROCESSES = Gauge("engine_running_processes", "count running state processes")
ENGINE_RUNNING_SCHEDULES = Gauge("engine_running_schedules", "count running state schedules")
ENGINE_RUNNING_PROCESSES = Gauge("engine_running_processes", "count running state processes", labelnames=["hostname"])
ENGINE_RUNNING_SCHEDULES = Gauge("engine_running_schedules", "count running state schedules", labelnames=["hostname"])
ENGINE_PROCESS_RUNNING_TIME = Histogram(
"engine_process_running_time",
"time spent running process",
buckets=get_histogram_buckets_from_evn("ENGINE_PROCESS_RUNNING_TIME_BUCKETS"),
labelnames=["hostname"],
)
ENGINE_SCHEDULE_RUNNING_TIME = Histogram(
"engine_schedule_running_time",
"time spent running schedule",
buckets=get_histogram_buckets_from_evn("ENGINE_SCHEDULE_RUNNING_TIME_BUCKETS"),
labelnames=["hostname"],
)
ENGINE_NODE_EXECUTE_TIME = Histogram(
"engine_node_execute_time",
"time spent executing node",
buckets=get_histogram_buckets_from_evn("ENGINE_NODE_EXECUTE_TIME_BUCKETS"),
labelnames=["type"],
labelnames=["type", "hostname"],
)
ENGINE_NODE_SCHEDULE_TIME = Histogram(
"engine_node_schedule_time",
"time spent scheduling node",
buckets=get_histogram_buckets_from_evn("ENGINE_NODE_SCHEDULE_TIME_BUCKETS"),
labelnames=["type"],
labelnames=["type", "hostname"],
)

# runtime metrics
ENGINE_RUNTIME_CONTEXT_VALUE_READ_TIME = Histogram(
"engine_runtime_context_value_read_time", "time spent reading context value"
"engine_runtime_context_value_read_time", "time spent reading context value", labelnames=["hostname"]
)
ENGINE_RUNTIME_CONTEXT_REF_READ_TIME = Histogram(
"engine_runtime_context_ref_read_time", "time spent reading context value reference"
"engine_runtime_context_ref_read_time", "time spent reading context value reference", labelnames=["hostname"]
)
ENGINE_RUNTIME_CONTEXT_VALUE_UPSERT_TIME = Histogram(
"engine_runtime_context_value_upsert_time", "time spent upserting context value"
"engine_runtime_context_value_upsert_time", "time spent upserting context value", labelnames=["hostname"]
)

ENGINE_RUNTIME_DATA_INPUTS_READ_TIME = Histogram(
"engine_runtime_data_inputs_read_time", "time spent reading node data inputs"
"engine_runtime_data_inputs_read_time", "time spent reading node data inputs", labelnames=["hostname"]
)
ENGINE_RUNTIME_DATA_OUTPUTS_READ_TIME = Histogram(
"engine_runtime_data_outputs_read_time", "time spent reading node data outputs"
"engine_runtime_data_outputs_read_time", "time spent reading node data outputs", labelnames=["hostname"]
)
ENGINE_RUNTIME_DATA_READ_TIME = Histogram(
"engine_runtime_data_read_time", "time spent reading node data inputs and outputs"
"engine_runtime_data_read_time", "time spent reading node data inputs and outputs", labelnames=["hostname"]
)

ENGINE_RUNTIME_EXEC_DATA_INPUTS_READ_TIME = Histogram(
"engine_runtime_exec_data_inputs_read_time",
"time spent reading node execution data inputs",
labelnames=["hostname"],
)
ENGINE_RUNTIME_EXEC_DATA_OUTPUTS_READ_TIME = Histogram(
"engine_runtime_exec_data_outputs_read_time",
"time spent reading node execution data outputs",
labelnames=["hostname"],
)
ENGINE_RUNTIME_EXEC_DATA_READ_TIME = Histogram(
"engine_runtime_exec_data_read_time",
"time spent reading node execution data inputs and outputs",
labelnames=["hostname"],
)
ENGINE_RUNTIME_EXEC_DATA_INPUTS_WRITE_TIME = Histogram(
"engine_runtime_exec_data_inputs_write_time",
"time spent writing node execution data inputs",
labelnames=["hostname"],
)
ENGINE_RUNTIME_EXEC_DATA_OUTPUTS_WRITE_TIME = Histogram(
"engine_runtime_exec_data_outputs_write_time",
"time spent writing node execution data outputs",
labelnames=["hostname"],
)
ENGINE_RUNTIME_EXEC_DATA_WRITE_TIME = Histogram(
"engine_runtime_exec_data_write_time",
"time spent writing node execution data inputs and outputs",
labelnames=["hostname"],
)
ENGINE_RUNTIME_CALLBACK_DATA_READ_TIME = Histogram(
"engine_runtime_callback_data_read_time",
"time spent reading node callback data",
"engine_runtime_callback_data_read_time", "time spent reading node callback data", labelnames=["hostname"]
)

ENGINE_RUNTIME_SCHEDULE_READ_TIME = Histogram("engine_runtime_schedule_read_time", "time spent reading schedule")
ENGINE_RUNTIME_SCHEDULE_WRITE_TIME = Histogram("engine_runtime_schedule_write_time", "time spent writing schedule")
ENGINE_RUNTIME_SCHEDULE_READ_TIME = Histogram(
"engine_runtime_schedule_read_time", "time spent reading schedule", labelnames=["hostname"]
)
ENGINE_RUNTIME_SCHEDULE_WRITE_TIME = Histogram(
"engine_runtime_schedule_write_time", "time spent writing schedule", labelnames=["hostname"]
)

ENGINE_RUNTIME_STATE_READ_TIME = Histogram("engine_runtime_state_read_time", "time spent reading state")
ENGINE_RUNTIME_STATE_WRITE_TIME = Histogram("engine_runtime_state_write_time", "time spent writing state")
ENGINE_RUNTIME_STATE_READ_TIME = Histogram(
"engine_runtime_state_read_time", "time spent reading state", labelnames=["hostname"]
)
ENGINE_RUNTIME_STATE_WRITE_TIME = Histogram(
"engine_runtime_state_write_time", "time spent writing state", labelnames=["hostname"]
)

ENGINE_RUNTIME_NODE_READ_TIME = Histogram("engine_runtime_node_read_time", "time spent reading node")
ENGINE_RUNTIME_NODE_READ_TIME = Histogram(
"engine_runtime_node_read_time", "time spent reading node", labelnames=["hostname"]
)

ENGINE_RUNTIME_PROCESS_READ_TIME = Histogram("engine_runtime_process_read_time", "time spent reading process")
ENGINE_RUNTIME_PROCESS_READ_TIME = Histogram(
"engine_runtime_process_read_time", "time spent reading process", labelnames=["hostname"]
)
21 changes: 21 additions & 0 deletions bamboo_engine/utils/host.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# -*- coding: utf-8 -*-
"""
Tencent is pleased to support the open source community by making 蓝鲸智云PaaS平台社区版 (BlueKing PaaS Community
Edition) available.
Copyright (C) 2017-2021 THL A29 Limited, a Tencent company. All rights reserved.
Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://opensource.org/licenses/MIT
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
"""

import socket


def get_hostname():
"""
获取当前主机名
"""
return socket.gethostname()
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "bamboo-engine"
version = "1.5.2"
version = "1.6.0"
description = "Bamboo-engine is a general-purpose workflow engine"
authors = ["homholueng <[email protected]>"]
license = "MIT"
Expand Down
18 changes: 18 additions & 0 deletions tests/utils/test_host.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# -*- coding: utf-8 -*-
"""
Tencent is pleased to support the open source community by making 蓝鲸智云PaaS平台社区版 (BlueKing PaaS Community
Edition) available.
Copyright (C) 2017-2021 THL A29 Limited, a Tencent company. All rights reserved.
Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://opensource.org/licenses/MIT
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
"""

from bamboo_engine.utils import host


def test_get_hostname():
host.get_hostname()

0 comments on commit d190569

Please sign in to comment.