From 8ccdc95722112ca63ce4dc4788091c22d9e35f06 Mon Sep 17 00:00:00 2001 From: spilkey-cisco <110940806+spilkey-cisco@users.noreply.github.com> Date: Fri, 14 Jul 2023 16:40:07 -0700 Subject: [PATCH] Use vendor customizable fan speed threshold checks (#378) --- sonic-thermalctld/scripts/thermalctld | 79 ++++++++------------- sonic-thermalctld/tests/test_thermalctld.py | 68 ++++++------------ 2 files changed, 50 insertions(+), 97 deletions(-) diff --git a/sonic-thermalctld/scripts/thermalctld b/sonic-thermalctld/scripts/thermalctld index 2265f302d..82d64a105 100644 --- a/sonic-thermalctld/scripts/thermalctld +++ b/sonic-thermalctld/scripts/thermalctld @@ -119,57 +119,35 @@ class FanStatus(logger.Logger): self.status = status return True - def _check_speed_value_available(self, speed, target_speed, tolerance, current_status): - if speed == NOT_AVAILABLE or target_speed == NOT_AVAILABLE or tolerance == NOT_AVAILABLE: - if isinstance(tolerance, int) and (tolerance > 100 or tolerance < 0): - self.log_warning('Invalid tolerance value: {}'.format(tolerance)) - return False - - if current_status is True: - self.log_warning('Fan speed or target_speed or tolerance became unavailable, ' - 'speed={}, target_speed={}, tolerance={}'.format(speed, target_speed, tolerance)) - return False - return True - - def set_under_speed(self, speed, target_speed, tolerance): + def set_under_speed(self, is_under_speed): """ Set and cache Fan under speed status - :param speed: Fan speed - :param target_speed: Fan target speed - :param tolerance: Threshold between Fan speed and target speed + :param is_under_speed: Fan under speed threshold status :return: True if status changed else False """ - if not self._check_speed_value_available(speed, target_speed, tolerance, self.under_speed): - old_status = self.under_speed - self.under_speed = False - return old_status != self.under_speed + if is_under_speed == NOT_AVAILABLE: + if self.under_speed: + self.log_warning('Fan under speed threshold check became unavailable') + is_under_speed = False - status = speed < target_speed * (1 - float(tolerance) / 100) - if status == self.under_speed: - return False + old_status = self.under_speed + self.under_speed = is_under_speed + return old_status != self.under_speed - self.under_speed = status - return True - - def set_over_speed(self, speed, target_speed, tolerance): + def set_over_speed(self, is_over_speed): """ Set and cache Fan over speed status - :param speed: Fan speed - :param target_speed: Fan target speed - :param tolerance: Threshold between Fan speed and target speed + :param is_over_speed: Fan over speed threshold status :return: True if status changed else False """ - if not self._check_speed_value_available(speed, target_speed, tolerance, self.over_speed): - old_status = self.over_speed - self.over_speed = False - return old_status != self.over_speed + if is_over_speed == NOT_AVAILABLE: + if self.over_speed: + self.log_warning('Fan over speed threshold check became unavailable') + is_over_speed = False - status = speed > target_speed * (1 + float(tolerance) / 100) - if status == self.over_speed: - return False - - self.over_speed = status - return True + old_status = self.over_speed + self.over_speed = is_over_speed + return old_status != self.over_speed def is_ok(self): """ @@ -315,16 +293,18 @@ class FanUpdater(logger.Logger): fan_status = self.fan_status_dict[fan_name] speed = NOT_AVAILABLE - speed_tolerance = NOT_AVAILABLE speed_target = NOT_AVAILABLE + is_under_speed = NOT_AVAILABLE + is_over_speed = NOT_AVAILABLE fan_fault_status = NOT_AVAILABLE fan_direction = NOT_AVAILABLE is_replaceable = try_get(fan.is_replaceable, False) presence = try_get(fan.get_presence, False) if presence: speed = try_get(fan.get_speed) - speed_tolerance = try_get(fan.get_speed_tolerance) speed_target = try_get(fan.get_target_speed) + is_under_speed = try_get(fan.is_under_speed) + is_over_speed = try_get(fan.is_over_speed) fan_fault_status = try_get(fan.get_status, False) fan_direction = try_get(fan.get_direction) @@ -344,20 +324,20 @@ class FanUpdater(logger.Logger): 'Fan fault warning: {} is broken'.format(fan_name) ) - if presence and fan_status.set_under_speed(speed, speed_target, speed_tolerance): + if presence and fan_status.set_under_speed(is_under_speed): set_led = True self._log_on_status_changed(not fan_status.under_speed, 'Fan low speed warning cleared: {} speed is back to normal'.format(fan_name), - 'Fan low speed warning: {} current speed={}, target speed={}, tolerance={}'. - format(fan_name, speed, speed_target, speed_tolerance) + 'Fan low speed warning: {} current speed={}, target speed={}'. + format(fan_name, speed, speed_target) ) - if presence and fan_status.set_over_speed(speed, speed_target, speed_tolerance): + if presence and fan_status.set_over_speed(is_over_speed): set_led = True self._log_on_status_changed(not fan_status.over_speed, 'Fan high speed warning cleared: {} speed is back to normal'.format(fan_name), - 'Fan high speed warning: {} target speed={}, current speed={}, tolerance={}'. - format(fan_name, speed_target, speed, speed_tolerance) + 'Fan high speed warning: {} current speed={}, target speed={}'. + format(fan_name, speed, speed_target) ) # We don't set PSU led here, PSU led will be handled in psud @@ -376,8 +356,9 @@ class FanUpdater(logger.Logger): ('status', str(fan_fault_status)), ('direction', str(fan_direction)), ('speed', str(speed)), - ('speed_tolerance', str(speed_tolerance)), ('speed_target', str(speed_target)), + ('is_under_speed', str(is_under_speed)), + ('is_over_speed', str(is_over_speed)), ('is_replaceable', str(is_replaceable)), ('timestamp', datetime.now().strftime('%Y%m%d %H:%M:%S')) ]) diff --git a/sonic-thermalctld/tests/test_thermalctld.py b/sonic-thermalctld/tests/test_thermalctld.py index cd25c5261..151b72fb7 100644 --- a/sonic-thermalctld/tests/test_thermalctld.py +++ b/sonic-thermalctld/tests/test_thermalctld.py @@ -68,30 +68,6 @@ class TestFanStatus(object): """ Test cases to cover functionality in FanStatus class """ - def test_check_speed_value_available(self): - fan_status = thermalctld.FanStatus() - - ret = fan_status._check_speed_value_available(30, 32, 5, True) - assert ret == True - assert fan_status.log_warning.call_count == 0 - - ret = fan_status._check_speed_value_available(thermalctld.NOT_AVAILABLE, 32, 105, True) - assert ret == False - assert fan_status.log_warning.call_count == 1 - fan_status.log_warning.assert_called_with('Invalid tolerance value: 105') - - # Reset - fan_status.log_warning.reset_mock() - - ret = fan_status._check_speed_value_available(thermalctld.NOT_AVAILABLE, 32, 5, False) - assert ret == False - assert fan_status.log_warning.call_count == 0 - - ret = fan_status._check_speed_value_available(thermalctld.NOT_AVAILABLE, 32, 5, True) - assert ret == False - assert fan_status.log_warning.call_count == 1 - fan_status.log_warning.assert_called_with('Fan speed or target_speed or tolerance became unavailable, speed=N/A, target_speed=32, tolerance=5') - def test_set_presence(self): fan_status = thermalctld.FanStatus() ret = fan_status.set_presence(True) @@ -104,52 +80,48 @@ def test_set_presence(self): def test_set_under_speed(self): fan_status = thermalctld.FanStatus() - ret = fan_status.set_under_speed(thermalctld.NOT_AVAILABLE, thermalctld.NOT_AVAILABLE, thermalctld.NOT_AVAILABLE) - assert not ret - - ret = fan_status.set_under_speed(thermalctld.NOT_AVAILABLE, thermalctld.NOT_AVAILABLE, 0) - assert not ret - ret = fan_status.set_under_speed(thermalctld.NOT_AVAILABLE, 0, 0) + ret = fan_status.set_under_speed(False) assert not ret - ret = fan_status.set_under_speed(0, 0, 0) - assert not ret - - ret = fan_status.set_under_speed(80, 100, 19) + ret = fan_status.set_under_speed(True) assert ret assert fan_status.under_speed assert not fan_status.is_ok() - ret = fan_status.set_under_speed(81, 100, 19) + ret = fan_status.set_under_speed(True) + assert not ret + + ret = fan_status.set_under_speed(False) assert ret assert not fan_status.under_speed assert fan_status.is_ok() - def test_set_over_speed(self): - fan_status = thermalctld.FanStatus() - ret = fan_status.set_over_speed(thermalctld.NOT_AVAILABLE, thermalctld.NOT_AVAILABLE, thermalctld.NOT_AVAILABLE) - assert not ret - - ret = fan_status.set_over_speed(thermalctld.NOT_AVAILABLE, thermalctld.NOT_AVAILABLE, 0) + ret = fan_status.set_under_speed(False) assert not ret - ret = fan_status.set_over_speed(thermalctld.NOT_AVAILABLE, 0, 0) - assert not ret + def test_set_over_speed(self): + fan_status = thermalctld.FanStatus() - ret = fan_status.set_over_speed(0, 0, 0) + ret = fan_status.set_over_speed(False) assert not ret - ret = fan_status.set_over_speed(120, 100, 19) + ret = fan_status.set_over_speed(True) assert ret assert fan_status.over_speed assert not fan_status.is_ok() - ret = fan_status.set_over_speed(120, 100, 21) + ret = fan_status.set_over_speed(True) + assert not ret + + ret = fan_status.set_over_speed(False) assert ret assert not fan_status.over_speed assert fan_status.is_ok() + ret = fan_status.set_over_speed(False) + assert not ret + class TestFanUpdater(object): """ @@ -251,7 +223,7 @@ def test_fan_under_speed(self): fan_list = chassis.get_all_fans() assert fan_list[0].get_status_led() == MockFan.STATUS_LED_COLOR_RED assert fan_updater.log_warning.call_count == 1 - fan_updater.log_warning.assert_called_with('Fan low speed warning: FanDrawer 0 fan 1 current speed=1, target speed=2, tolerance=0') + fan_updater.log_warning.assert_called_with('Fan low speed warning: FanDrawer 0 fan 1 current speed=1, target speed=2') fan_list[0].make_normal_speed() fan_updater.update() @@ -267,7 +239,7 @@ def test_fan_over_speed(self): fan_list = chassis.get_all_fans() assert fan_list[0].get_status_led() == MockFan.STATUS_LED_COLOR_RED assert fan_updater.log_warning.call_count == 1 - fan_updater.log_warning.assert_called_with('Fan high speed warning: FanDrawer 0 fan 1 target speed=1, current speed=2, tolerance=0') + fan_updater.log_warning.assert_called_with('Fan high speed warning: FanDrawer 0 fan 1 current speed=2, target speed=1') fan_list[0].make_normal_speed() fan_updater.update()