From 102d8140b04947474661bc3f79877565b8125af9 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Sat, 26 Oct 2024 23:28:09 +0100 Subject: [PATCH] Improve backoff logic --- corehq/motech/repeaters/models.py | 13 ++++++++ corehq/motech/repeaters/tasks.py | 16 +++++----- .../motech/repeaters/tests/test_repeater.py | 2 +- corehq/motech/repeaters/tests/test_tasks.py | 30 ++++++++++++++----- 4 files changed, 44 insertions(+), 17 deletions(-) diff --git a/corehq/motech/repeaters/models.py b/corehq/motech/repeaters/models.py index 2019a6b40f5e4..55543a9170292 100644 --- a/corehq/motech/repeaters/models.py +++ b/corehq/motech/repeaters/models.py @@ -1243,6 +1243,19 @@ def fire(self, force_send=False, timing_context=None): self.repeater.fire_for_record(self, timing_context=timing_context) except Exception as e: self.handle_payload_error(str(e), traceback_str=traceback.format_exc()) + # Repeat records with State.Fail are retried, and repeat + # records with State.InvalidPayload are not. + # + # But a repeat record can have State.InvalidPayload + # because it was sent and rejected, so we know that the + # remote endpoint is healthy and responding, or because + # this exception occurred and it was not sent, so we + # don't know anything about the remote endpoint. + # + # Return None so that `tasks.update_repeater()` treats + # the repeat record as unsent, and does not apply or + # reset a backoff. + return None return self.state return None diff --git a/corehq/motech/repeaters/tasks.py b/corehq/motech/repeaters/tasks.py index 49c750d2a514a..d6a7fa2f51a56 100644 --- a/corehq/motech/repeaters/tasks.py +++ b/corehq/motech/repeaters/tasks.py @@ -446,18 +446,16 @@ def update_repeater(repeat_record_states, repeater_id, lock_token): results of ``_process_repeat_record()`` tasks. """ try: + if all(s in (State.Empty, None) for s in repeat_record_states): + # We can't tell anything about the remote endpoint. + return + success_or_invalid = (State.Success, State.InvalidPayload) repeater = Repeater.objects.get(id=repeater_id) - if any(s == State.Success for s in repeat_record_states): - # At least one repeat record was sent successfully. The - # remote endpoint is healthy. + if any(s in success_or_invalid for s in repeat_record_states): + # The remote endpoint appears to be healthy. repeater.reset_backoff() - elif all(s in (State.Empty, State.InvalidPayload, None) - for s in repeat_record_states): - # We can't tell anything about the remote endpoint. - # (_process_repeat_record() can return None on an exception.) - pass else: - # All sent payloads failed. Try again later. + # All the payloads that were sent failed. Try again later. repeater.set_backoff() finally: lock = get_repeater_lock(repeater_id) diff --git a/corehq/motech/repeaters/tests/test_repeater.py b/corehq/motech/repeaters/tests/test_repeater.py index 2f93dfa7b5195..a5b0cd15dfa5b 100644 --- a/corehq/motech/repeaters/tests/test_repeater.py +++ b/corehq/motech/repeaters/tests/test_repeater.py @@ -701,7 +701,7 @@ def test_payload_exception_on_fire(self): with patch.object(CaseRepeater, 'get_payload', side_effect=Exception('Boom!')): state_or_none = rr.fire() - self.assertEqual(state_or_none, State.InvalidPayload) + self.assertIsNone(state_or_none) repeat_record = RepeatRecord.objects.get(id=rr.id) self.assertEqual(repeat_record.state, State.InvalidPayload) self.assertEqual(repeat_record.failure_reason, 'Boom!') diff --git a/corehq/motech/repeaters/tests/test_tasks.py b/corehq/motech/repeaters/tests/test_tasks.py index ee0b15999665a..17eb0561ebda5 100644 --- a/corehq/motech/repeaters/tests/test_tasks.py +++ b/corehq/motech/repeaters/tests/test_tasks.py @@ -519,10 +519,23 @@ class TestUpdateRepeater(SimpleTestCase): @patch('corehq.motech.repeaters.tasks.get_repeater_lock') @patch('corehq.motech.repeaters.tasks.Repeater.objects.get') def test_update_repeater_resets_backoff_on_success(self, mock_get_repeater, __): + repeat_record_states = [State.Success, State.Fail, State.Empty, None] + mock_repeater = MagicMock() mock_get_repeater.return_value = mock_repeater + update_repeater(repeat_record_states, 1, 'token') + + mock_repeater.set_backoff.assert_not_called() + mock_repeater.reset_backoff.assert_called_once() - update_repeater([State.Success, State.Fail, State.Empty, None], 1, 'token') + @patch('corehq.motech.repeaters.tasks.get_repeater_lock') + @patch('corehq.motech.repeaters.tasks.Repeater.objects.get') + def test_update_repeater_resets_backoff_on_invalid(self, mock_get_repeater, __): + repeat_record_states = [State.InvalidPayload, State.Fail, State.Empty, None] + + mock_repeater = MagicMock() + mock_get_repeater.return_value = mock_repeater + update_repeater(repeat_record_states, 1, 'token') mock_repeater.set_backoff.assert_not_called() mock_repeater.reset_backoff.assert_called_once() @@ -530,10 +543,11 @@ def test_update_repeater_resets_backoff_on_success(self, mock_get_repeater, __): @patch('corehq.motech.repeaters.tasks.get_repeater_lock') @patch('corehq.motech.repeaters.tasks.Repeater.objects.get') def test_update_repeater_sets_backoff_on_failure(self, mock_get_repeater, __): + repeat_record_states = [State.Fail, State.Empty, None] + mock_repeater = MagicMock() mock_get_repeater.return_value = mock_repeater - - update_repeater([State.Fail, State.Empty, None], 1, 'token') + update_repeater(repeat_record_states, 1, 'token') mock_repeater.set_backoff.assert_called_once() mock_repeater.reset_backoff.assert_not_called() @@ -541,10 +555,11 @@ def test_update_repeater_sets_backoff_on_failure(self, mock_get_repeater, __): @patch('corehq.motech.repeaters.tasks.get_repeater_lock') @patch('corehq.motech.repeaters.tasks.Repeater.objects.get') def test_update_repeater_does_nothing_on_empty(self, mock_get_repeater, __): + repeat_record_states = [State.Empty] + mock_repeater = MagicMock() mock_get_repeater.return_value = mock_repeater - - update_repeater([State.Empty], 1, 'token') + update_repeater(repeat_record_states, 1, 'token') mock_repeater.set_backoff.assert_not_called() mock_repeater.reset_backoff.assert_not_called() @@ -552,10 +567,11 @@ def test_update_repeater_does_nothing_on_empty(self, mock_get_repeater, __): @patch('corehq.motech.repeaters.tasks.get_repeater_lock') @patch('corehq.motech.repeaters.tasks.Repeater.objects.get') def test_update_repeater_does_nothing_on_none(self, mock_get_repeater, __): + repeat_record_states = [None] + mock_repeater = MagicMock() mock_get_repeater.return_value = mock_repeater - - update_repeater([None], 1, 'token') + update_repeater(repeat_record_states, 1, 'token') mock_repeater.set_backoff.assert_not_called() mock_repeater.reset_backoff.assert_not_called()