diff --git a/bellows/config/__init__.py b/bellows/config/__init__.py index bf1cb3b3..8bcbdc74 100644 --- a/bellows/config/__init__.py +++ b/bellows/config/__init__.py @@ -22,6 +22,7 @@ CONF_USE_THREAD = "use_thread" CONF_EZSP_CONFIG = "ezsp_config" CONF_EZSP_POLICIES = "ezsp_policies" +CONF_PARAM_MAX_WATCHDOG_FAILURES = "max_watchdog_failures" CONF_FLOW_CONTROL = "flow_control" CONF_FLOW_CONTROL_DEFAULT = "software" @@ -37,6 +38,7 @@ CONFIG_SCHEMA = CONFIG_SCHEMA.extend( { vol.Required(CONF_DEVICE): SCHEMA_DEVICE, + vol.Optional(CONF_PARAM_MAX_WATCHDOG_FAILURES, default=4): int, vol.Optional(CONF_EZSP_CONFIG, default={}): dict, vol.Optional(CONF_EZSP_POLICIES, default={}): vol.Schema( {vol.Optional(str): int} diff --git a/bellows/zigbee/application.py b/bellows/zigbee/application.py index 668b05c7..96c33b0c 100644 --- a/bellows/zigbee/application.py +++ b/bellows/zigbee/application.py @@ -27,6 +27,7 @@ from bellows.config import ( CONF_EZSP_CONFIG, CONF_EZSP_POLICIES, + CONF_PARAM_MAX_WATCHDOG_FAILURES, CONF_USE_THREAD, CONFIG_SCHEMA, SCHEMA_DEVICE, @@ -921,9 +922,9 @@ def _handle_id_conflict(self, nwk: t.EmberNodeId) -> None: async def _watchdog(self): """Watchdog handler.""" LOGGER.debug("Starting EZSP watchdog") + failures = 0 read_counter = 0 await asyncio.sleep(WATCHDOG_WAKE_PERIOD) - while True: try: async with asyncio_timeout(WATCHDOG_WAKE_PERIOD * 2): @@ -954,9 +955,13 @@ async def _watchdog(self): cnt._last_reset_value = 0 LOGGER.debug("%s", counters) + + failures = 0 except (asyncio.TimeoutError, EzspError) as exc: LOGGER.warning("Watchdog heartbeat timeout: %s", repr(exc)) - break + failures += 1 + if failures > self.config[CONF_PARAM_MAX_WATCHDOG_FAILURES]: + break except asyncio.CancelledError: raise except Exception as exc: @@ -968,7 +973,7 @@ async def _watchdog(self): await asyncio.sleep(WATCHDOG_WAKE_PERIOD) self.state.counters[COUNTERS_CTRL][COUNTER_WATCHDOG].increment() - self._handle_reset_request("Watchdog timeout") + self._handle_reset_request(f"Watchdog timeout. Heartbeat timeouts: {failures}") async def _get_free_buffers(self) -> int | None: status, value = await self._ezsp.getValue( diff --git a/tests/test_application.py b/tests/test_application.py index cd671f04..e95bfa98 100644 --- a/tests/test_application.py +++ b/tests/test_application.py @@ -1134,22 +1134,31 @@ async def test_watchdog(app, monkeypatch, ezsp_version): monkeypatch.setattr(application, "WATCHDOG_WAKE_PERIOD", 0.01) monkeypatch.setattr(application, "EZSP_COUNTERS_CLEAR_IN_WATCHDOG_PERIODS", 2) + nop_success = 7 app._ezsp.ezsp_version = ezsp_version - app._ezsp.nop = AsyncMock(side_effect=EzspError()) - app._ezsp.readCounters = AsyncMock(side_effect=EzspError()) - app._ezsp.readAndClearCounters = AsyncMock(side_effect=EzspError()) + async def nop_mock(): + nonlocal nop_success + if nop_success: + nop_success -= 1 + if nop_success % 3: + raise EzspError + else: + return ([0] * 10,) + raise asyncio.TimeoutError + + app._ezsp.nop = AsyncMock(side_effect=nop_mock) + app._ezsp.readCounters = AsyncMock(side_effect=nop_mock) + app._ezsp.readAndClearCounters = AsyncMock(side_effect=nop_mock) app._handle_reset_request = MagicMock() app._ctrl_event.set() await app._watchdog() if ezsp_version == 4: - assert app._ezsp.nop.await_count == 1 - assert app._ezsp.readCounters.await_count == 0 + assert app._ezsp.nop.await_count > 4 else: - assert app._ezsp.nop.await_count == 0 - assert app._ezsp.readCounters.await_count == 1 + assert app._ezsp.readCounters.await_count >= 4 assert app._handle_reset_request.call_count == 1