-
Notifications
You must be signed in to change notification settings - Fork 116
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix redis memory leak issue in PhysicalEntityCacheUpdater (#343)
- What I did Fixes the redis memory leak bug: #342 There's chance that the physical_entity_updater creates subscriptions to redis, and never consume the messages due to exceptions. Then the memory buffer(omem) of redis client starts to increase and never end, redis memory leaks. The reason is all 5 physical entity cache updaters inherit from PhysicalEntityCacheUpdater. In the first update_data, they initialize the psubscription to redis database. self.pub_sub_dict[db_index] = mibs.get_redis_pubsub(db, db.STATE_DB, self.get_key_pattern()) And everytime when the update_data is called again, it get the message from the psub and process. msg = pubsub.get_message() And outside, in the logic of the MIBUpdater, it calls update_data more frequently than reinit_data. A side-effect is, if reinit_data failed forever, the update_counter will not been cleaned, then update_data will not be called forever. self.update_counter = 0 So the problem is, at the begining, the psub is created at the first update_data and all things work well, until an unrecoverable issue happened, PHYSICAL_ENTITY_INFO|PSU * missing in the database (it's a pmon issue) This causes both reinit_data and update_data to be failed, because all of them finally call _update_per_namespace_data, which tries to cast an empty string '' to int and raises ValueError. Then the update_data is not called forever, because reinit_data will never success. But the previously established psubscription is still there, and no one gonna to consume it(the update_data is blocked), then Redis database memory starts to slowly leak. - How I did it Catch the exception during the loop of reinit_data, make sure the reinit_data of every physical_entity_updater will be called Clear message and cancel the subscription in the reinit_data, avoid the message accumulates in the redis subscription queue - How to verify it Tested on Cisco chassis, the memory is not leaking anymore.
- Loading branch information
1 parent
9d5ce53
commit 3ec1a3b
Showing
7 changed files
with
200 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -136,3 +136,4 @@ fabric.properties | |
|
||
gh-release.patch | ||
tests/test_cpuUtilizationHandler.py | ||
tests/test-results.xml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
import os | ||
import sys | ||
from unittest import TestCase | ||
|
||
import pytest | ||
from sonic_ax_impl.mibs.ietf.rfc2737 import PhysicalTableMIBUpdater | ||
|
||
|
||
if sys.version_info.major == 3: | ||
from unittest import mock | ||
else: | ||
import mock | ||
|
||
modules_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | ||
sys.path.insert(0, os.path.join(modules_path, 'src')) | ||
|
||
|
||
class TestPhysicalTableMIBUpdater(TestCase): | ||
|
||
# Given: 5 physical updaters are register into reinit of PhysicalTableMIBUpdater | ||
# When: The first updater(XcvrCacheUpdater) raises exception in the reinit | ||
# Then: The remaining updaters should execute reinit without any affection, | ||
# and the redis un-subscription should be called | ||
@mock.patch('sonic_ax_impl.mibs.ietf.rfc2737.XcvrCacheUpdater.reinit_data', side_effect=Exception('mocked error')) | ||
def test_PhysicalTableMIBUpdater_exception_in_reinit_data_wont_block_reinit_iteration_first(self, mocked_xcvr_reinit_data): | ||
updater = PhysicalTableMIBUpdater() | ||
|
||
with (pytest.raises(Exception) as excinfo, | ||
mock.patch('sonic_ax_impl.mibs.ietf.rfc2737.PsuCacheUpdater.reinit_data') as mocked_psu_reinit_data, | ||
mock.patch('sonic_ax_impl.mibs.ietf.rfc2737.FanDrawerCacheUpdater.reinit_data') as mocked_fan_drawer_reinit_data, | ||
mock.patch('sonic_ax_impl.mibs.ietf.rfc2737.FanCacheUpdater.reinit_data') as mocked_fan_cache_reinit_data, | ||
mock.patch('sonic_ax_impl.mibs.ietf.rfc2737.ThermalCacheUpdater.reinit_data') as mocked_thermal_reinit_data, | ||
mock.patch('sonic_ax_impl.mibs.cancel_redis_pubsub') as mocked_cancel_redis_pubsub): | ||
updater.reinit_data() | ||
mocked_xcvr_reinit_data.assert_called() | ||
mocked_psu_reinit_data.assert_called() | ||
mocked_fan_drawer_reinit_data.assert_called() | ||
mocked_fan_cache_reinit_data.assert_called() | ||
mocked_thermal_reinit_data.assert_called() | ||
mocked_cancel_redis_pubsub.assert_called() | ||
assert str(excinfo.value) == "[Exception('mocked error')]" | ||
|
||
# Given: 5 physical updaters are register into reinit of PhysicalTableMIBUpdater | ||
# When: The last updater(ThermalCacheUpdater) raises exception in the reinit | ||
# Then: The remaining updaters should execute reinit without any affection, | ||
# and the redis un-subscription should be called | ||
@mock.patch('sonic_ax_impl.mibs.ietf.rfc2737.ThermalCacheUpdater.reinit_data', side_effect=Exception('mocked error')) | ||
def test_PhysicalTableMIBUpdater_exception_in_reinit_data_wont_block_reinit_iteration_last(self, mocked_thermal_reinit_data): | ||
updater = PhysicalTableMIBUpdater() | ||
|
||
with (pytest.raises(Exception) as excinfo, | ||
mock.patch('sonic_ax_impl.mibs.ietf.rfc2737.XcvrCacheUpdater.reinit_data') as mocked_xcvr_reinit_data, | ||
mock.patch('sonic_ax_impl.mibs.ietf.rfc2737.PsuCacheUpdater.reinit_data') as mocked_psu_reinit_data, | ||
mock.patch( | ||
'sonic_ax_impl.mibs.ietf.rfc2737.FanDrawerCacheUpdater.reinit_data') as mocked_fan_drawer_reinit_data, | ||
mock.patch('sonic_ax_impl.mibs.ietf.rfc2737.FanCacheUpdater.reinit_data') as mocked_fan_cache_reinit_data, | ||
mock.patch('sonic_ax_impl.mibs.cancel_redis_pubsub') as mocked_cancel_redis_pubsub): | ||
updater.reinit_data() | ||
mocked_xcvr_reinit_data.assert_called() | ||
mocked_psu_reinit_data.assert_called() | ||
mocked_fan_drawer_reinit_data.assert_called() | ||
mocked_fan_cache_reinit_data.assert_called() | ||
mocked_thermal_reinit_data.assert_called() | ||
mocked_cancel_redis_pubsub.assert_called() | ||
assert str(excinfo.value) == "[Exception('mocked error')]" | ||
|
||
# Given: 5 physical updaters are register into reinit of PhysicalTableMIBUpdater | ||
# When: The first updater(XcvrCacheUpdater) raises Runtime exception in the reinit | ||
# Then: The remaining updaters should execute reinit without any affection, | ||
# and the redis un-subscription should be called | ||
@mock.patch('sonic_ax_impl.mibs.ietf.rfc2737.ThermalCacheUpdater.reinit_data', side_effect=RuntimeError('mocked runtime error')) | ||
def test_PhysicalTableMIBUpdater_runtime_exc_in_reinit_data_wont_block_reinit_iteration_first(self, mocked_thermal_reinit_data): | ||
updater = PhysicalTableMIBUpdater() | ||
|
||
with (pytest.raises(RuntimeError) as excinfo, | ||
mock.patch('sonic_ax_impl.mibs.ietf.rfc2737.XcvrCacheUpdater.reinit_data') as mocked_xcvr_reinit_data, | ||
mock.patch('sonic_ax_impl.mibs.ietf.rfc2737.PsuCacheUpdater.reinit_data') as mocked_psu_reinit_data, | ||
mock.patch( | ||
'sonic_ax_impl.mibs.ietf.rfc2737.FanDrawerCacheUpdater.reinit_data') as mocked_fan_drawer_reinit_data, | ||
mock.patch( | ||
'sonic_ax_impl.mibs.ietf.rfc2737.FanCacheUpdater.reinit_data') as mocked_fan_cache_reinit_data, | ||
mock.patch('sonic_ax_impl.mibs.cancel_redis_pubsub') as mocked_cancel_redis_pubsub): | ||
updater.reinit_data() | ||
mocked_thermal_reinit_data.assert_called() | ||
mocked_xcvr_reinit_data.assert_called() | ||
mocked_psu_reinit_data.assert_called() | ||
mocked_fan_drawer_reinit_data.assert_called() | ||
mocked_fan_cache_reinit_data.assert_called() | ||
mocked_cancel_redis_pubsub.assert_called() | ||
assert str(excinfo.value) == "[RuntimeError('mocked runtime error')]" | ||
|
||
# Given: 5 physical updaters are register into reinit of PhysicalTableMIBUpdater | ||
# When: The last updater(XcvrCacheUpdater) raises Runtime exception in the reinit | ||
# Then: The remaining updaters should execute reinit without any affection, | ||
# and the redis un-subscription should be called | ||
@mock.patch('sonic_ax_impl.mibs.ietf.rfc2737.XcvrCacheUpdater.reinit_data', side_effect=RuntimeError('mocked runtime error')) | ||
def test_PhysicalTableMIBUpdater_runtime_exc_in_reinit_data_wont_block_reinit_iteration_last(self, mocked_xcvr_reinit_data): | ||
updater = PhysicalTableMIBUpdater() | ||
|
||
with (pytest.raises(RuntimeError) as exc_info, | ||
mock.patch('sonic_ax_impl.mibs.ietf.rfc2737.PsuCacheUpdater.reinit_data') as mocked_psu_reinit_data, | ||
mock.patch('sonic_ax_impl.mibs.ietf.rfc2737.FanDrawerCacheUpdater.reinit_data') as mocked_fan_drawer_reinit_data, | ||
mock.patch('sonic_ax_impl.mibs.ietf.rfc2737.FanCacheUpdater.reinit_data') as mocked_fan_cache_reinit_data, | ||
mock.patch('sonic_ax_impl.mibs.ietf.rfc2737.ThermalCacheUpdater.reinit_data') as mocked_thermal_reinit_data, | ||
mock.patch('sonic_ax_impl.mibs.cancel_redis_pubsub') as mocked_cancel_redis_pubsub): | ||
updater.reinit_data() | ||
mocked_xcvr_reinit_data.assert_called() | ||
mocked_psu_reinit_data.assert_called() | ||
mocked_fan_drawer_reinit_data.assert_called() | ||
mocked_fan_cache_reinit_data.assert_called() | ||
mocked_thermal_reinit_data.assert_called() | ||
mocked_cancel_redis_pubsub.assert_called() | ||
assert str(exc_info.value) == "[RuntimeError('mocked runtime error')]" | ||
|
||
# Given: 5 physical updaters are register into reinit of PhysicalTableMIBUpdater | ||
# When: The first(XcvrCacheUpdater) and last updater(ThermalCacheUpdater) | ||
# raises Runtime exception and Exception in the reinit | ||
# Then: The remaining updaters should execute reinit without any affection, | ||
# and the redis un-subscription should be called | ||
# Both the RuntimeError and Exception should be caught and combined as RuntimeError then been raised | ||
@mock.patch('sonic_ax_impl.mibs.ietf.rfc2737.XcvrCacheUpdater.reinit_data', side_effect=RuntimeError('mocked runtime error')) | ||
@mock.patch('sonic_ax_impl.mibs.ietf.rfc2737.ThermalCacheUpdater.reinit_data', side_effect=Exception('mocked error')) | ||
def test_PhysicalTableMIBUpdater_multi_exception(self, mocked_xcvr_reinit_data, mocked_thermal_reinit_data): | ||
updater = PhysicalTableMIBUpdater() | ||
|
||
with (pytest.raises(RuntimeError) as exc_info, | ||
mock.patch('sonic_ax_impl.mibs.ietf.rfc2737.PsuCacheUpdater.reinit_data') as mocked_psu_reinit_data, | ||
mock.patch('sonic_ax_impl.mibs.ietf.rfc2737.FanDrawerCacheUpdater.reinit_data') as mocked_fan_drawer_reinit_data, | ||
mock.patch('sonic_ax_impl.mibs.ietf.rfc2737.FanCacheUpdater.reinit_data') as mocked_fan_cache_reinit_data, | ||
mock.patch('sonic_ax_impl.mibs.cancel_redis_pubsub') as mocked_cancel_redis_pubsub): | ||
updater.reinit_data() | ||
mocked_xcvr_reinit_data.assert_called() | ||
mocked_psu_reinit_data.assert_called() | ||
mocked_fan_drawer_reinit_data.assert_called() | ||
mocked_fan_cache_reinit_data.assert_called() | ||
mocked_thermal_reinit_data.assert_called() | ||
mocked_cancel_redis_pubsub.assert_called() | ||
assert str(exc_info.value) == "[RuntimeError('mocked runtime error'), Exception('mocked error')]" |