From e6bdc7abdeda09f0f71e1dee71485b7b114fa933 Mon Sep 17 00:00:00 2001 From: Sergey Vasilyev Date: Thu, 1 Aug 2019 20:25:33 +0200 Subject: [PATCH 1/4] Rename handler retry/fatal errors for better English and more clarity --- docs/errors.rst | 18 +++++++++--------- docs/idempotence.rst | 4 ++-- docs/walkthrough/creation.rst | 2 +- docs/walkthrough/deletion.rst | 2 +- docs/walkthrough/updates.rst | 4 ++-- kopf/__init__.py | 11 +++++++---- kopf/reactor/handling.py | 20 ++++++++++---------- tests/handling/test_delays.py | 10 +++++----- tests/handling/test_errors.py | 18 +++++++++--------- 9 files changed, 46 insertions(+), 43 deletions(-) diff --git a/docs/errors.rst b/docs/errors.rst index 3529ca51..488d5023 100644 --- a/docs/errors.rst +++ b/docs/errors.rst @@ -14,19 +14,19 @@ and reported via the object's events. For persistence, the errors are also stored on the object's status. -Retriable errors +Temporary errors ================ -If an exception raised inherits from `kopf.HandlerRetryError`, +If an exception raised inherits from `kopf.TemporaryError`, it will postpone the current handler for the next iteration, -which can be either immediately, or after some delay:: +which can happen either immediately, or after some delay:: import kopf @kopf.on.create('zalando.org', 'v1', 'kopfexamples') def create_fn(spec, **_): if not is_data_ready(): - raise kopf.HandlerRetryError("The data is not yet ready.", delay=60) + raise kopf.TemporaryError("The data is not yet ready.", delay=60) In that case, there is no need to sleep in the handler explicitly, thus blocking any other events, causes, and generally any other handlers on the same object @@ -42,10 +42,10 @@ from being handled (such as deletion or parallel handlers/sub-handlers). The only difference is that this special case produces less logs. -Fatal errors -============ +Permanent errors +================ -If a raised exception inherits from `kopf.HandlerFatalError`, the handler +If a raised exception inherits from `kopf.PermanentError`, the handler is considered as non-retriable and non-recoverable and completely failed. Use this when the domain logic of the application means that there @@ -57,7 +57,7 @@ is no need to retry over time, as it will not become better:: def create_fn(spec, **_): valid_until = datetime.datetime.fromisoformat(spec['validUntil']) if valid_until <= datetime.datetime.utcnow(): - raise kopf.HandlerFatalError("The object is not valid anymore.") + raise kopf.PermanentError("The object is not valid anymore.") @@ -80,7 +80,7 @@ The overall runtime of the handler can be limited:: @kopf.on.create('zalando.org', 'v1', 'kopfexamples', timeout=60*60) def create_fn(spec, **_): - raise kopf.HandlerRetryError(delay=60) + raise kopf.TemporaryError(delay=60) If the handler is not succeeded within this time, it is considered as fatally failed. diff --git a/docs/idempotence.rst b/docs/idempotence.rst index 88f8bef4..075cb809 100644 --- a/docs/idempotence.rst +++ b/docs/idempotence.rst @@ -29,11 +29,11 @@ within one handling cycle. async def create_a(retry, **kwargs): if retry < 2: - raise kopf.HandlerRetryError("Not ready yet.", delay=10) + raise kopf.TemporaryError("Not ready yet.", delay=10) async def create_b(retry, **kwargs): if retry < 6: - raise kopf.HandlerRetryError("Not ready yet.", delay=10) + raise kopf.TemporaryError("Not ready yet.", delay=10) In this example, both ``create_a`` & ``create_b`` are submitted to Kopf as the sub-handlers of ``create`` on every attempt to execute it. diff --git a/docs/walkthrough/creation.rst b/docs/walkthrough/creation.rst index 80ed889e..97fbcaa9 100644 --- a/docs/walkthrough/creation.rst +++ b/docs/walkthrough/creation.rst @@ -63,7 +63,7 @@ We will use the official Kubernetes client library: name = meta.get('name') size = spec.get('size') if not size: - raise kopf.HandlerFatalError(f"Size must be set. Got {size!r}.") + raise kopf.PermanentError(f"Size must be set. Got {size!r}.") path = os.path.join(os.path.dirname(__file__), 'pvc.yaml') tmpl = open(path, 'rt').read() diff --git a/docs/walkthrough/deletion.rst b/docs/walkthrough/deletion.rst index 18cb1021..714042e8 100644 --- a/docs/walkthrough/deletion.rst +++ b/docs/walkthrough/deletion.rst @@ -45,7 +45,7 @@ Let's extend the creation handler: name = meta.get('name') size = spec.get('size') if not size: - raise kopf.HandlerFatalError(f"Size must be set. Got {size!r}.") + raise kopf.PermanentError(f"Size must be set. Got {size!r}.") path = os.path.join(os.path.dirname(__file__), 'pvc-tpl.yaml') tmpl = open(path, 'rt').read() diff --git a/docs/walkthrough/updates.rst b/docs/walkthrough/updates.rst index 1fa7aeb7..df168f9c 100644 --- a/docs/walkthrough/updates.rst +++ b/docs/walkthrough/updates.rst @@ -36,7 +36,7 @@ with one additional line: name = meta.get('name') size = spec.get('size') if not size: - raise kopf.HandlerFatalError(f"Size must be set. Got {size!r}.") + raise kopf.PermanentError(f"Size must be set. Got {size!r}.") path = os.path.join(os.path.dirname(__file__), 'pvc-tpl.yaml') tmpl = open(path, 'rt').read() @@ -76,7 +76,7 @@ and patches the PVC with the new size from the EVC:: size = spec.get('create_fn', {}).get('size', None) if not size: - raise kopf.HandlerFatalError(f"Size must be set. Got {size!r}.") + raise kopf.PermanentError(f"Size must be set. Got {size!r}.") pvc_name = status['pvc-name'] pvc_patch = {'spec': {'resources': {'requests': {'storage': size}}}} diff --git a/kopf/__init__.py b/kopf/__init__.py index 17997f9a..4eb5dcf8 100644 --- a/kopf/__init__.py +++ b/kopf/__init__.py @@ -35,8 +35,8 @@ lifecycles, # as a separate name on the public namespace ) from kopf.reactor.handling import ( - HandlerRetryError, - HandlerFatalError, + TemporaryError, + PermanentError, HandlerTimeoutError, execute, ) @@ -66,6 +66,9 @@ remove_owner_reference, ) +HandlerFatalError = PermanentError # a backward-compatibility alias +HandlerRetryError = TemporaryError # a backward-compatibility alias + __all__ = [ 'on', 'lifecycles', 'register', 'execute', 'configure', @@ -76,8 +79,8 @@ 'get_default_lifecycle', 'set_default_lifecycle', 'build_object_reference', 'build_owner_reference', 'append_owner_reference', 'remove_owner_reference', - 'HandlerRetryError', - 'HandlerFatalError', + 'PermanentError', 'HandlerFatalError', + 'TemporaryError', 'HandlerRetryError', 'HandlerTimeoutError', 'BaseRegistry', 'SimpleRegistry', diff --git a/kopf/reactor/handling.py b/kopf/reactor/handling.py index 7863956a..4f1d2c4b 100644 --- a/kopf/reactor/handling.py +++ b/kopf/reactor/handling.py @@ -39,22 +39,22 @@ """ The default delay duration for the regular exception in retry-mode. """ -class HandlerFatalError(Exception): - """ A fatal handler error, the reties are useless. """ +class PermanentError(Exception): + """ A fatal handler error, the retries are useless. """ -class HandlerRetryError(Exception): +class TemporaryError(Exception): """ A potentially recoverable error, should be retried. """ def __init__(self, *args, delay=DEFAULT_RETRY_DELAY, **kwargs): super().__init__(*args, **kwargs) self.delay = delay -class HandlerTimeoutError(HandlerFatalError): +class HandlerTimeoutError(PermanentError): """ An error for the handler's timeout (if set). """ -class HandlerChildrenRetry(HandlerRetryError): +class HandlerChildrenRetry(TemporaryError): """ An internal pseudo-error to retry for the next sub-handlers attempt. """ @@ -393,19 +393,19 @@ async def _execute( status.set_retry_time(body=cause.body, patch=cause.patch, handler=handler, delay=e.delay) handlers_left.append(handler) - # Definitely retriable error, no matter what is the error-reaction mode. - except HandlerRetryError as e: + # Definitely a temporary error, regardless of the error strictness. + except TemporaryError as e: logger.exception(f"Handler {handler.id!r} failed with a retry exception. Will retry.") status.set_retry_time(body=cause.body, patch=cause.patch, handler=handler, delay=e.delay) handlers_left.append(handler) - # Definitely fatal error, no matter what is the error-reaction mode. - except HandlerFatalError as e: + # Definitely a permanent error, regardless of the error strictness. + except PermanentError as e: logger.exception(f"Handler {handler.id!r} failed with a fatal exception. Will stop.") status.store_failure(body=cause.body, patch=cause.patch, handler=handler, exc=e) # TODO: report the handling failure somehow (beside logs/events). persistent status? - # Regular errors behave as either retriable or fatal depending on the error-reaction mode. + # Regular errors behave as either temporary or permanent depending on the error strictness. except Exception as e: if retry_on_errors: logger.exception(f"Handler {handler.id!r} failed with an exception. Will retry.") diff --git a/tests/handling/test_delays.py b/tests/handling/test_delays.py index f427cefa..0263f6b7 100644 --- a/tests/handling/test_delays.py +++ b/tests/handling/test_delays.py @@ -6,7 +6,7 @@ import kopf from kopf.reactor.causation import HANDLER_CAUSES, CREATE, UPDATE, DELETE, RESUME -from kopf.reactor.handling import HandlerRetryError +from kopf.reactor.handling import TemporaryError from kopf.reactor.handling import WAITING_KEEPALIVE_INTERVAL from kopf.reactor.handling import custom_object_handler from kopf.structs.finalizers import FINALIZER @@ -21,10 +21,10 @@ async def test_delayed_handlers_progress( caplog, assert_logs, k8s_mocked, now, ts, delay): caplog.set_level(logging.DEBUG) - handlers.create_mock.side_effect = HandlerRetryError("oops", delay=delay) - handlers.update_mock.side_effect = HandlerRetryError("oops", delay=delay) - handlers.delete_mock.side_effect = HandlerRetryError("oops", delay=delay) - handlers.resume_mock.side_effect = HandlerRetryError("oops", delay=delay) + handlers.create_mock.side_effect = TemporaryError("oops", delay=delay) + handlers.update_mock.side_effect = TemporaryError("oops", delay=delay) + handlers.delete_mock.side_effect = TemporaryError("oops", delay=delay) + handlers.resume_mock.side_effect = TemporaryError("oops", delay=delay) cause_mock.event = cause_type diff --git a/tests/handling/test_errors.py b/tests/handling/test_errors.py index 3b35511e..b5498d22 100644 --- a/tests/handling/test_errors.py +++ b/tests/handling/test_errors.py @@ -5,7 +5,7 @@ import kopf from kopf.reactor.causation import HANDLER_CAUSES, CREATE, UPDATE, DELETE, RESUME -from kopf.reactor.handling import HandlerFatalError, HandlerRetryError +from kopf.reactor.handling import PermanentError, TemporaryError from kopf.reactor.handling import custom_object_handler @@ -18,10 +18,10 @@ async def test_fatal_error_stops_handler( name1 = f'{cause_type}_fn' cause_mock.event = cause_type - handlers.create_mock.side_effect = HandlerFatalError("oops") - handlers.update_mock.side_effect = HandlerFatalError("oops") - handlers.delete_mock.side_effect = HandlerFatalError("oops") - handlers.resume_mock.side_effect = HandlerFatalError("oops") + handlers.create_mock.side_effect = PermanentError("oops") + handlers.update_mock.side_effect = PermanentError("oops") + handlers.delete_mock.side_effect = PermanentError("oops") + handlers.resume_mock.side_effect = PermanentError("oops") await custom_object_handler( lifecycle=kopf.lifecycles.one_by_one, @@ -59,10 +59,10 @@ async def test_retry_error_delays_handler( name1 = f'{cause_type}_fn' cause_mock.event = cause_type - handlers.create_mock.side_effect = HandlerRetryError("oops") - handlers.update_mock.side_effect = HandlerRetryError("oops") - handlers.delete_mock.side_effect = HandlerRetryError("oops") - handlers.resume_mock.side_effect = HandlerRetryError("oops") + handlers.create_mock.side_effect = TemporaryError("oops") + handlers.update_mock.side_effect = TemporaryError("oops") + handlers.delete_mock.side_effect = TemporaryError("oops") + handlers.resume_mock.side_effect = TemporaryError("oops") await custom_object_handler( lifecycle=kopf.lifecycles.one_by_one, From 70f76b30691ef8d1d7df0a6982967ef946e16bdc Mon Sep 17 00:00:00 2001 From: Sergey Vasilyev Date: Thu, 1 Aug 2019 20:26:04 +0200 Subject: [PATCH 2/4] Demonstrate temporary and permanent errors in at least one example --- examples/03-exceptions/example.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/03-exceptions/example.py b/examples/03-exceptions/example.py index abc0ae1c..66736949 100644 --- a/examples/03-exceptions/example.py +++ b/examples/03-exceptions/example.py @@ -11,8 +11,8 @@ class MyException(Exception): def create_fn(retry, **kwargs): time.sleep(2) # for different timestamps of the events if not retry: - raise Exception("First failure.") + raise kopf.TemporaryError("First failure.", delay=10) elif retry == 1: raise MyException("Second failure.") else: - pass + raise kopf.PermanentError("Third failure, the final one.") From 256f977fc19d10f75d365bc3ada9d59a8cbdeb7d Mon Sep 17 00:00:00 2001 From: Sergey Vasilyev Date: Thu, 1 Aug 2019 20:27:16 +0200 Subject: [PATCH 3/4] Log framework-provided exceptions briefly, with no stacktraces --- kopf/reactor/handling.py | 4 ++-- tests/handling/test_delays.py | 2 +- tests/handling/test_errors.py | 4 ++-- tests/handling/test_timeouts.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/kopf/reactor/handling.py b/kopf/reactor/handling.py index 4f1d2c4b..977e8e4f 100644 --- a/kopf/reactor/handling.py +++ b/kopf/reactor/handling.py @@ -395,13 +395,13 @@ async def _execute( # Definitely a temporary error, regardless of the error strictness. except TemporaryError as e: - logger.exception(f"Handler {handler.id!r} failed with a retry exception. Will retry.") + logger.error(f"Handler {handler.id!r} failed temporarily: %s", str(e) or repr(e)) status.set_retry_time(body=cause.body, patch=cause.patch, handler=handler, delay=e.delay) handlers_left.append(handler) # Definitely a permanent error, regardless of the error strictness. except PermanentError as e: - logger.exception(f"Handler {handler.id!r} failed with a fatal exception. Will stop.") + logger.error(f"Handler {handler.id!r} failed permanently: %s", str(e) or repr(e)) status.store_failure(body=cause.body, patch=cause.patch, handler=handler, exc=e) # TODO: report the handling failure somehow (beside logs/events). persistent status? diff --git a/tests/handling/test_delays.py b/tests/handling/test_delays.py index 0263f6b7..25899ea5 100644 --- a/tests/handling/test_delays.py +++ b/tests/handling/test_delays.py @@ -52,7 +52,7 @@ async def test_delayed_handlers_progress( assert_logs([ "Invoking handler .+", - "Handler .+ failed with a retry exception. Will retry.", + "Handler .+ failed temporarily: oops", ]) diff --git a/tests/handling/test_errors.py b/tests/handling/test_errors.py index b5498d22..1749ab2d 100644 --- a/tests/handling/test_errors.py +++ b/tests/handling/test_errors.py @@ -46,7 +46,7 @@ async def test_fatal_error_stops_handler( assert patch['status']['kopf']['progress'][name1]['message'] == 'oops' assert_logs([ - "Handler .+ failed with a fatal exception. Will stop.", + "Handler .+ failed permanently: oops", ]) @@ -88,7 +88,7 @@ async def test_retry_error_delays_handler( assert 'delayed' in patch['status']['kopf']['progress'][name1] assert_logs([ - "Handler .+ failed with a retry exception. Will retry.", + "Handler .+ failed temporarily: oops", ]) diff --git a/tests/handling/test_timeouts.py b/tests/handling/test_timeouts.py index ae07a190..366e4ce5 100644 --- a/tests/handling/test_timeouts.py +++ b/tests/handling/test_timeouts.py @@ -55,5 +55,5 @@ async def test_timed_out_handler_fails( assert patch['status']['kopf']['progress'][name1]['failure'] is True assert_logs([ - "Handler .+ failed with a fatal exception. Will stop.", + "Handler .+ failed permanently: Handler .+ has timed out after", ]) From dbfe0baa273bccd6b18f52388c2ad4184cb995d5 Mon Sep 17 00:00:00 2001 From: Sergey Vasilyev Date: Thu, 1 Aug 2019 21:25:30 +0200 Subject: [PATCH 4/4] Log internal exceptions specially (and briefly), without generalisation --- kopf/reactor/handling.py | 6 ++++++ tests/handling/test_timeouts.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/kopf/reactor/handling.py b/kopf/reactor/handling.py index 977e8e4f..14def9d0 100644 --- a/kopf/reactor/handling.py +++ b/kopf/reactor/handling.py @@ -399,6 +399,12 @@ async def _execute( status.set_retry_time(body=cause.body, patch=cause.patch, handler=handler, delay=e.delay) handlers_left.append(handler) + # Same as permanent errors below, but with better logging for our internal cases. + except HandlerTimeoutError as e: + logger.error(f"%s", str(e) or repr(e)) # already formatted + status.store_failure(body=cause.body, patch=cause.patch, handler=handler, exc=e) + # TODO: report the handling failure somehow (beside logs/events). persistent status? + # Definitely a permanent error, regardless of the error strictness. except PermanentError as e: logger.error(f"Handler {handler.id!r} failed permanently: %s", str(e) or repr(e)) diff --git a/tests/handling/test_timeouts.py b/tests/handling/test_timeouts.py index 366e4ce5..def96b7c 100644 --- a/tests/handling/test_timeouts.py +++ b/tests/handling/test_timeouts.py @@ -55,5 +55,5 @@ async def test_timed_out_handler_fails( assert patch['status']['kopf']['progress'][name1]['failure'] is True assert_logs([ - "Handler .+ failed permanently: Handler .+ has timed out after", + "Handler .+ has timed out after", ])