Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[mongo] collect mongod process cpu percentage for self hosted local mongodb #18618

Merged
merged 11 commits into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions mongo/changelog.d/18618.added
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Add `mongodb.system.cpu.percent` metric to track total CPU usage of the MongoDB process on self-hosted instances (only available on self-hosted MongoDB running on the same host as the Agent).

1 change: 1 addition & 0 deletions mongo/datadog_checks/mongo/collectors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from .fsynclock import FsyncLockCollector
from .host_info import HostInfoCollector
from .index_stats import IndexStatsCollector
from .process_stats import ProcessStatsCollector
from .replica import ReplicaCollector
from .replication_info import ReplicationOpLogCollector
from .server_status import ServerStatusCollector
Expand Down
103 changes: 103 additions & 0 deletions mongo/datadog_checks/mongo/collectors/process_stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# (C) Datadog, Inc. 2024-present
# All rights reserved
# Licensed under a 3-clause BSD style license (see LICENSE)

import psutil
from pymongo.errors import OperationFailure

from datadog_checks.mongo.collectors.base import MongoCollector
from datadog_checks.mongo.common import HostingType


class ProcessStatsCollector(MongoCollector):
"""
Collects process stats for a mongod or mongos node.
This collector is only compatible with self-hosted MongoDB running on the same host as the Agent.
"""

def __init__(self, check, tags):
super(ProcessStatsCollector, self).__init__(check, tags)
self._clean_server_name = check._config.clean_server_name
self._process = None

@property
def is_localhost(self):
return 'localhost' in self._clean_server_name or '127.0.0.1' in self._clean_server_name

def compatible_with(self, deployment):
# Can only be run on self-hosted MongoDB running on the same host as the Agent.
self.log.debug(
"Checking compatibility of the ProcessStatsCollector with %s, %s, %s",
deployment.hosting_type,
self._clean_server_name,
self.is_localhost,
)
return deployment.hosting_type == HostingType.SELF_HOSTED and self.is_localhost

def _get_pid_and_process_name(self, api):
"""Fetch PID and process name from MongoDB serverStatus."""
try:
server_status = api.server_status()
pid = server_status.get("pid")
process_name = server_status.get("process")
if not pid or not process_name:
self.log.warning("PID or process name not found in serverStatus.")
return pid, process_name
except OperationFailure as e:
self.log.warning("Failed to retrieve serverStatus: %s", e)
return None, None

def _find_process_by_pid(self, pid):
"""Return the process object for a given PID, or None if not found."""
try:
return psutil.Process(pid)
except (psutil.NoSuchProcess, psutil.AccessDenied):
self.log.warning("Process with PID %s not found or access denied.", pid)
return None

def _find_process_by_name(self, process_name):
"""Find and return the PID of a process by its name."""
if not process_name:
self.log.warning("No process name provided.")
return None
for process in psutil.process_iter(["pid", "name"]):
if process.info["name"] == process_name:
return process
self.log.warning("No process found with the name %s.", process_name)
return None

def _get_mongo_process(self, api):
"""Retrieve the MongoDB process using either PID or process name."""
if self._process:
return self._process

# Try to get the PID and process name from serverStatus
pid, process_name = self._get_pid_and_process_name(api)

# Attempt to get the process by PID
process = self._find_process_by_pid(pid) if pid else None

# If process not found by PID, attempt to find it by process name
if not process or process.name() != process_name:
process = self._find_process_by_name(process_name)

if not process:
self.log.warning("Unable to retrieve MongoDB process.")

self._process = process
return self._process

def collect(self, api):
process = self._get_mongo_process(api)
if not process:
return

try:
if (cpu_percent := process.cpu_percent()) != 0:
# the first call of cpu_percent is 0.0 and should be ignored
# the cpu_percent can be > 100% if the process has multiple threads
self._submit_payload({"system": {"cpu_percent": cpu_percent}})
else:
self.log.warning("The MongoDB process with PID %s is not consuming CPU", process.pid)
except Exception as e:
self.log.error("Failed to collect process stats for MongoDB process with PID %s: %s", process.pid, e)
1 change: 1 addition & 0 deletions mongo/datadog_checks/mongo/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@
"system.memSizeMB": (GAUGE, "system.mem.total"), # total amount of system memory
"system.memLimitMB": (GAUGE, "system.mem.limit"), # memory usage limit
"system.numCores": (GAUGE, "system.cpu.cores"), # number of CPU cores
'system.cpu_percent': (GAUGE, "system.cpu.percent"), # total mongo process CPU usage precent
}

"""
Expand Down
2 changes: 2 additions & 0 deletions mongo/datadog_checks/mongo/mongo.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
FsyncLockCollector,
HostInfoCollector,
IndexStatsCollector,
ProcessStatsCollector,
ReplicaCollector,
ReplicationOpLogCollector,
ServerStatusCollector,
Expand Down Expand Up @@ -134,6 +135,7 @@ def refresh_collectors(self, deployment_type, all_dbs, tags):
FsyncLockCollector(self, tags),
ServerStatusCollector(self, self._config.db_name, tags, tcmalloc=collect_tcmalloc_metrics),
HostInfoCollector(self, tags),
ProcessStatsCollector(self, tags),
]
if self._config.replica_check:
potential_collectors.append(ReplicaCollector(self, tags))
Expand Down
1 change: 1 addition & 0 deletions mongo/metadata.csv
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ mongodb.stats.totalfreestoragesize,gauge,,byte,,Total amount of free storage spa
mongodb.stats.totalsize,gauge,,byte,,Total amount of disk space allocated for both documents and indexes in all collections in the database. Includes used and free storage space.,0,mongodb,stats totalsize,,
mongodb.stats.views,gauge,,,,Contains a count of the number of views in the database.,0,mongodb,stats views,,
mongodb.system.cpu.cores,gauge,,core,,The total number of available logical processor cores.,0,mongodb,system cpu cores,,
mongodb.system.cpu.percent,gauge,,percent,,Total CPU usage percentage of the MongoDB process (only available on self-hosted MongoDB running on the same host as the Agent).,0,mongodb,process cpu percent total,,
mongodb.system.mem.limit,gauge,,megabyte,,The system memory (RAM) usage limit. For example running in a container may impose memory limits that are lower than the total system memory.,0,mongodb,system mem limit,,
mongodb.system.mem.total,gauge,,megabyte,,The total amount of system memory (RAM).,0,mongodb,system mem total,,
mongodb.tcmalloc.generic.current_allocated_bytes,gauge,,byte,,Number of bytes used by the application.,0,mongodb,,,
Expand Down
1 change: 1 addition & 0 deletions mongo/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ license = "BSD-3-Clause"
[project.optional-dependencies]
deps = [
"cachetools==5.5.0",
"psutil==5.9.6",
"pymongo[srv]==4.8.0; python_version >= '3.9'",
]

Expand Down
15 changes: 10 additions & 5 deletions mongo/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,11 +168,16 @@ def mock_local_tls_dns():
@contextmanager
def mock_pymongo(deployment):
mocked_client = MockedPyMongoClient(deployment=deployment)
with mock.patch('datadog_checks.mongo.api.MongoClient', mock.MagicMock(return_value=mocked_client)), mock.patch(
'pymongo.collection.Collection'
), mock.patch('pymongo.command_cursor') as cur:
cur.CommandCursor = lambda *args, **kwargs: args[1]['firstBatch']
yield mocked_client
with mock.patch(
'datadog_checks.mongo.collectors.process_stats.ProcessStatsCollector.is_localhost',
new_callable=mock.PropertyMock,
) as mock_is_localhost:
mock_is_localhost.return_value = False
with mock.patch('datadog_checks.mongo.api.MongoClient', mock.MagicMock(return_value=mocked_client)), mock.patch(
'pymongo.collection.Collection'
), mock.patch('pymongo.command_cursor') as cur:
cur.CommandCursor = lambda *args, **kwargs: args[1]['firstBatch']
yield mocked_client


@pytest.fixture
Expand Down
10 changes: 10 additions & 0 deletions mongo/tests/results/metrics-process-stats.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[
{
"name": "mongodb.system.cpu.percent",
"type": 0,
"value": 20.0,
"tags": [
"server:mongodb://testUser2:*****@localhost:27017/test"
]
}
]
21 changes: 21 additions & 0 deletions mongo/tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import json
import os

import mock
import pytest

from datadog_checks.dev.utils import get_metadata_metrics
Expand Down Expand Up @@ -1155,3 +1156,23 @@ def test_integration_database_autodiscovery(instance_integration_autodiscovery,
],
check_submission_type=True,
)


def test_integration_localhost_process_stats(instance_integration, aggregator, check, dd_run_check):
mongo_check = check(instance_integration)

with mock_pymongo("standalone"):
with mock.patch(
'datadog_checks.mongo.collectors.process_stats.ProcessStatsCollector.is_localhost',
new_callable=mock.PropertyMock,
) as mock_is_localhost:
mock_is_localhost.return_value = True
with mock.patch('psutil.Process') as mock_process:
mock_process.return_value.name.return_value = 'mongos'
mock_process.return_value.cpu_percent.return_value = 20.0
dd_run_check(mongo_check)

metrics_categories = [
'process-stats',
]
assert_metrics(mongo_check, aggregator, metrics_categories, ['hosting_type:self-hosted'])
Loading