From 8a6f31342a65264fda6491c41d0046d7ab1b8af3 Mon Sep 17 00:00:00 2001 From: Jack-Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Thu, 17 Nov 2022 14:35:11 +0000 Subject: [PATCH 01/11] Add a Snowplow interface (close #295) PR #301 * Add snowplow.py interface * Add emitter configuration class * Add tracker configuration class * Add get_namespace() to tracker --- snowplow_tracker/__init__.py | 3 + snowplow_tracker/contracts.py | 21 +- snowplow_tracker/emitter_configuration.py | 129 +++ snowplow_tracker/emitters.py | 287 ++++--- snowplow_tracker/payload.py | 50 +- snowplow_tracker/self_describing_json.py | 6 +- snowplow_tracker/snowplow.py | 161 ++++ snowplow_tracker/subject.py | 133 +-- snowplow_tracker/tracker.py | 949 ++++++++++++---------- snowplow_tracker/tracker_configuration.py | 66 ++ snowplow_tracker/typing.py | 27 +- 11 files changed, 1146 insertions(+), 686 deletions(-) create mode 100644 snowplow_tracker/emitter_configuration.py create mode 100644 snowplow_tracker/snowplow.py create mode 100644 snowplow_tracker/tracker_configuration.py diff --git a/snowplow_tracker/__init__.py b/snowplow_tracker/__init__.py index 3d618f9f..da683ada 100644 --- a/snowplow_tracker/__init__.py +++ b/snowplow_tracker/__init__.py @@ -3,6 +3,9 @@ from snowplow_tracker.emitters import logger, Emitter, AsyncEmitter from snowplow_tracker.self_describing_json import SelfDescribingJson from snowplow_tracker.tracker import Tracker +from snowplow_tracker.emitter_configuration import EmitterConfiguration +from snowplow_tracker.tracker_configuration import TrackerConfiguration +from snowplow_tracker.snowplow import Snowplow from snowplow_tracker.contracts import disable_contracts, enable_contracts # celery extra diff --git a/snowplow_tracker/contracts.py b/snowplow_tracker/contracts.py index 2db8b449..7a9253af 100644 --- a/snowplow_tracker/contracts.py +++ b/snowplow_tracker/contracts.py @@ -45,7 +45,9 @@ def contracts_enabled() -> bool: def greater_than(value: float, compared_to: float) -> None: if contracts_enabled() and value <= compared_to: - raise ValueError("{0} must be greater than {1}.".format(_get_parameter_name(), compared_to)) + raise ValueError( + "{0} must be greater than {1}.".format(_get_parameter_name(), compared_to) + ) def non_empty(seq: Sized) -> None: @@ -78,21 +80,26 @@ def _get_parameter_name() -> str: match = _MATCH_FIRST_PARAMETER_REGEX.search(code) if not match: - return 'Unnamed parameter' + return "Unnamed parameter" return match.groups(0)[0] def _check_form_element(element: Dict[str, Any]) -> bool: """ - Helper method to check that dictionary conforms element - in sumbit_form and change_form schemas + Helper method to check that dictionary conforms element + in sumbit_form and change_form schemas """ - all_present = isinstance(element, dict) and 'name' in element and 'value' in element and 'nodeName' in element + all_present = ( + isinstance(element, dict) + and "name" in element + and "value" in element + and "nodeName" in element + ) try: - if element['type'] in FORM_TYPES: + if element["type"] in FORM_TYPES: type_valid = True else: type_valid = False except KeyError: type_valid = True - return all_present and element['nodeName'] in FORM_NODE_NAMES and type_valid + return all_present and element["nodeName"] in FORM_NODE_NAMES and type_valid diff --git a/snowplow_tracker/emitter_configuration.py b/snowplow_tracker/emitter_configuration.py new file mode 100644 index 00000000..6d4ef0e6 --- /dev/null +++ b/snowplow_tracker/emitter_configuration.py @@ -0,0 +1,129 @@ +# """ +# emitter_configuration.py + +# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. + +# Authors: Jack Keene, Anuj More, Alex Dean, Fred Blundun, Paul Boocock +# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd +# License: Apache License Version 2.0 +# """ + +from typing import Optional, Union, Tuple +from snowplow_tracker.typing import SuccessCallback, FailureCallback + + +class EmitterConfiguration(object): + def __init__( + self, + buffer_size: Optional[int] = None, + on_success: Optional[SuccessCallback] = None, + on_failure: Optional[FailureCallback] = None, + byte_limit: Optional[int] = None, + request_timeout: Optional[Union[float, Tuple[float, float]]] = None, + ) -> None: + """ + Configuration for the emitter that sends events to the Snowplow collector. + :param buffer_size: The maximum number of queued events before the buffer is flushed. Default is 10. + :type buffer_size: int | None + :param on_success: Callback executed after every HTTP request in a flush has status code 200 + Gets passed the number of events flushed. + :type on_success: function | None + :param on_failure: Callback executed if at least one HTTP request in a flush has status code other than 200 + Gets passed two arguments: + 1) The number of events which were successfully sent + 2) If method is "post": The unsent data in string form; + If method is "get": An array of dictionaries corresponding to the unsent events' payloads + :type on_failure: function | None + :param byte_limit: The size event list after reaching which queued events will be flushed + :type byte_limit: int | None + :param request_timeout: Timeout for the HTTP requests. Can be set either as single float value which + applies to both "connect" AND "read" timeout, or as tuple with two float values + which specify the "connect" and "read" timeouts separately + :type request_timeout: float | tuple | None + """ + + self.buffer_size = buffer_size + self.on_success = on_success + self.on_failure = on_failure + self.byte_limit = byte_limit + self.request_timeout = request_timeout + + @property + def buffer_size(self) -> Optional[int]: + """ + The maximum number of queued events before the buffer is flushed. Default is 10. + """ + return self._buffer_size + + @buffer_size.setter + def buffer_size(self, value: Optional[int]): + if isinstance(value, int) and value < 0: + raise ValueError("buffer_size must greater than 0") + if not isinstance(value, int) and value is not None: + raise ValueError("buffer_size must be of type int") + self._buffer_size = value + + @property + def on_success(self) -> Optional[SuccessCallback]: + """ + Callback executed after every HTTP request in a flush has status code 200. Gets passed the number of events flushed. + """ + return self._on_success + + @on_success.setter + def on_success(self, value: Optional[SuccessCallback]): + self._on_success = value + + @property + def on_failure(self) -> Optional[FailureCallback]: + """ + Callback executed if at least one HTTP request in a flush has status code other than 200 + Gets passed two arguments: + 1) The number of events which were successfully sent + 2) If method is "post": The unsent data in string form; + If method is "get": An array of dictionaries corresponding to the unsent events' payloads + """ + return self._on_failure + + @on_failure.setter + def on_failure(self, value: Optional[FailureCallback]): + self._on_failure = value + + @property + def byte_limit(self) -> Optional[int]: + """ + The size event list after reaching which queued events will be flushed + """ + return self._byte_limit + + @byte_limit.setter + def byte_limit(self, value: Optional[int]): + if isinstance(value, int) and value < 0: + raise ValueError("byte_limit must greater than 0") + if not isinstance(value, int) and value is not None: + raise ValueError("byte_limit must be of type int") + self._byte_limit = value + + @property + def request_timeout(self) -> Optional[Union[float, Tuple[float, float]]]: + """ + Timeout for the HTTP requests. Can be set either as single float value which + applies to both "connect" AND "read" timeout, or as tuple with two float values + which specify the "connect" and "read" timeouts separately + """ + return self._request_timeout + + @request_timeout.setter + def request_timeout(self, value: Optional[Union[float, Tuple[float, float]]]): + self._request_timeout = value diff --git a/snowplow_tracker/emitters.py b/snowplow_tracker/emitters.py index d2549d25..5ddb67c8 100644 --- a/snowplow_tracker/emitters.py +++ b/snowplow_tracker/emitters.py @@ -28,7 +28,14 @@ from queue import Queue from snowplow_tracker.self_describing_json import SelfDescribingJson -from snowplow_tracker.typing import PayloadDict, PayloadDictList, HttpProtocol, Method, SuccessCallback, FailureCallback +from snowplow_tracker.typing import ( + PayloadDict, + PayloadDictList, + HttpProtocol, + Method, + SuccessCallback, + FailureCallback, +) from snowplow_tracker.contracts import one_of # logging @@ -37,54 +44,57 @@ logger.setLevel(logging.INFO) DEFAULT_MAX_LENGTH = 10 -PAYLOAD_DATA_SCHEMA = "iglu:com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4" +PAYLOAD_DATA_SCHEMA = ( + "iglu:com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4" +) PROTOCOLS = {"http", "https"} METHODS = {"get", "post"} class Emitter(object): """ - Synchronously send Snowplow events to a Snowplow collector - Supports both GET and POST requests + Synchronously send Snowplow events to a Snowplow collector + Supports both GET and POST requests """ def __init__( - self, - endpoint: str, - protocol: HttpProtocol = "https", - port: Optional[int] = None, - method: Method = "post", - buffer_size: Optional[int] = None, - on_success: Optional[SuccessCallback] = None, - on_failure: Optional[FailureCallback] = None, - byte_limit: Optional[int] = None, - request_timeout: Optional[Union[float, Tuple[float, float]]] = None) -> None: + self, + endpoint: str, + protocol: HttpProtocol = "https", + port: Optional[int] = None, + method: Method = "post", + buffer_size: Optional[int] = None, + on_success: Optional[SuccessCallback] = None, + on_failure: Optional[FailureCallback] = None, + byte_limit: Optional[int] = None, + request_timeout: Optional[Union[float, Tuple[float, float]]] = None, + ) -> None: """ - :param endpoint: The collector URL. If protocol is not set in endpoint it will automatically set to "https://" - this is done automatically. - :type endpoint: string - :param protocol: The protocol to use - http or https. Defaults to https. - :type protocol: protocol - :param port: The collector port to connect to - :type port: int | None - :param method: The HTTP request method. Defaults to post. - :type method: method - :param buffer_size: The maximum number of queued events before the buffer is flushed. Default is 10. - :type buffer_size: int | None - :param on_success: Callback executed after every HTTP request in a flush has status code 200 - Gets passed the number of events flushed. - :type on_success: function | None - :param on_failure: Callback executed if at least one HTTP request in a flush has status code other than 200 - Gets passed two arguments: - 1) The number of events which were successfully sent - 2) If method is "post": The unsent data in string form; - If method is "get": An array of dictionaries corresponding to the unsent events' payloads - :type on_failure: function | None - :param byte_limit: The size event list after reaching which queued events will be flushed - :type byte_limit: int | None - :param request_timeout: Timeout for the HTTP requests. Can be set either as single float value which - applies to both "connect" AND "read" timeout, or as tuple with two float values - which specify the "connect" and "read" timeouts separately - :type request_timeout: float | tuple | None + :param endpoint: The collector URL. If protocol is not set in endpoint it will automatically set to "https://" - this is done automatically. + :type endpoint: string + :param protocol: The protocol to use - http or https. Defaults to https. + :type protocol: protocol + :param port: The collector port to connect to + :type port: int | None + :param method: The HTTP request method. Defaults to post. + :type method: method + :param buffer_size: The maximum number of queued events before the buffer is flushed. Default is 10. + :type buffer_size: int | None + :param on_success: Callback executed after every HTTP request in a flush has status code 200 + Gets passed the number of events flushed. + :type on_success: function | None + :param on_failure: Callback executed if at least one HTTP request in a flush has status code other than 200 + Gets passed two arguments: + 1) The number of events which were successfully sent + 2) If method is "post": The unsent data in string form; + If method is "get": An array of dictionaries corresponding to the unsent events' payloads + :type on_failure: function | None + :param byte_limit: The size event list after reaching which queued events will be flushed + :type byte_limit: int | None + :param request_timeout: Timeout for the HTTP requests. Can be set either as single float value which + applies to both "connect" AND "read" timeout, or as tuple with two float values + which specify the "connect" and "read" timeouts separately + :type request_timeout: float | tuple | None """ one_of(protocol, PROTOCOLS) one_of(method, METHODS) @@ -115,20 +125,21 @@ def __init__( @staticmethod def as_collector_uri( - endpoint: str, - protocol: HttpProtocol = "https", - port: Optional[int] = None, - method: Method = "post") -> str: + endpoint: str, + protocol: HttpProtocol = "https", + port: Optional[int] = None, + method: Method = "post", + ) -> str: """ - :param endpoint: The raw endpoint provided by the user - :type endpoint: string - :param protocol: The protocol to use - http or https - :type protocol: protocol - :param port: The collector port to connect to - :type port: int | None - :param method: Either `get` or `post` HTTP method - :type method: method - :rtype: string + :param endpoint: The raw endpoint provided by the user + :type endpoint: string + :param protocol: The protocol to use - http or https + :type protocol: protocol + :param port: The collector port to connect to + :type port: int | None + :param method: Either `get` or `post` HTTP method + :type method: method + :rtype: string """ if len(endpoint) < 1: raise ValueError("No endpoint provided.") @@ -149,11 +160,11 @@ def as_collector_uri( def input(self, payload: PayloadDict) -> None: """ - Adds an event to the buffer. - If the maximum size has been reached, flushes the buffer. + Adds an event to the buffer. + If the maximum size has been reached, flushes the buffer. - :param payload: The name-value pairs for the event - :type payload: dict(string:\\*) + :param payload: The name-value pairs for the event + :type payload: dict(string:\\*) """ with self.lock: if self.bytes_queued is not None: @@ -169,18 +180,20 @@ def input(self, payload: PayloadDict) -> None: def reached_limit(self) -> bool: """ - Checks if event-size or bytes limit are reached + Checks if event-size or bytes limit are reached - :rtype: bool + :rtype: bool """ if self.byte_limit is None: return len(self.buffer) >= self.buffer_size else: - return (self.bytes_queued or 0) >= self.byte_limit or len(self.buffer) >= self.buffer_size + return (self.bytes_queued or 0) >= self.byte_limit or len( + self.buffer + ) >= self.buffer_size def flush(self) -> None: """ - Sends all events in the buffer to the collector. + Sends all events in the buffer to the collector. """ with self.lock: self.send_events(self.buffer) @@ -190,8 +203,8 @@ def flush(self) -> None: def http_post(self, data: str) -> bool: """ - :param data: The array of JSONs to be sent - :type data: string + :param data: The array of JSONs to be sent + :type data: string """ logger.info("Sending POST request to %s..." % self.endpoint) logger.debug("Payload: %s" % data) @@ -200,10 +213,13 @@ def http_post(self, data: str) -> bool: r = requests.post( self.endpoint, data=data, - headers={'Content-Type': 'application/json; charset=utf-8'}, - timeout=self.request_timeout) + headers={"Content-Type": "application/json; charset=utf-8"}, + timeout=self.request_timeout, + ) post_succeeded = Emitter.is_good_status_code(r.status_code) - getattr(logger, "info" if post_succeeded else "warning")("POST request finished with status code: " + str(r.status_code)) + getattr(logger, "info" if post_succeeded else "warning")( + "POST request finished with status code: " + str(r.status_code) + ) except requests.RequestException as e: logger.warning(e) @@ -211,16 +227,20 @@ def http_post(self, data: str) -> bool: def http_get(self, payload: PayloadDict) -> bool: """ - :param payload: The event properties - :type payload: dict(string:\\*) + :param payload: The event properties + :type payload: dict(string:\\*) """ logger.info("Sending GET request to %s..." % self.endpoint) logger.debug("Payload: %s" % payload) get_succeeded = False try: - r = requests.get(self.endpoint, params=payload, timeout=self.request_timeout) + r = requests.get( + self.endpoint, params=payload, timeout=self.request_timeout + ) get_succeeded = Emitter.is_good_status_code(r.status_code) - getattr(logger, "info" if get_succeeded else "warning")("GET request finished with status code: " + str(r.status_code)) + getattr(logger, "info" if get_succeeded else "warning")( + "GET request finished with status code: " + str(r.status_code) + ) except requests.RequestException as e: logger.warning(e) @@ -228,8 +248,8 @@ def http_get(self, payload: PayloadDict) -> bool: def sync_flush(self) -> None: """ - Calls the flush method of the base Emitter class. - This is guaranteed to be blocking, not asynchronous. + Calls the flush method of the base Emitter class. + This is guaranteed to be blocking, not asynchronous. """ logger.debug("Starting synchronous flush...") Emitter.flush(self) @@ -238,16 +258,16 @@ def sync_flush(self) -> None: @staticmethod def is_good_status_code(status_code: int) -> bool: """ - :param status_code: HTTP status code - :type status_code: int - :rtype: bool + :param status_code: HTTP status code + :type status_code: int + :rtype: bool """ return 200 <= status_code < 400 def send_events(self, evts: PayloadDictList) -> None: """ - :param evts: Array of events to be sent - :type evts: list(dict(string:\\*)) + :param evts: Array of events to be sent + :type evts: list(dict(string:\\*)) """ if len(evts) > 0: logger.info("Attempting to send %s events" % len(evts)) @@ -256,7 +276,7 @@ def send_events(self, evts: PayloadDictList) -> None: success_events = [] failure_events = [] - if self.method == 'post': + if self.method == "post": data = SelfDescribingJson(PAYLOAD_DATA_SCHEMA, evts).to_string() request_succeeded = self.http_post(data) if request_succeeded: @@ -264,7 +284,7 @@ def send_events(self, evts: PayloadDictList) -> None: else: failure_events += evts - elif self.method == 'get': + elif self.method == "get": for evt in evts: request_succeeded = self.http_get(evt) if request_succeeded: @@ -282,12 +302,12 @@ def send_events(self, evts: PayloadDictList) -> None: def set_flush_timer(self, timeout: float, flush_now: bool = False) -> None: """ - Set an interval at which the buffer will be flushed + Set an interval at which the buffer will be flushed - :param timeout: interval in seconds - :type timeout: int | float - :param flush_now: immediately flush buffer - :type flush_now: bool + :param timeout: interval in seconds + :type timeout: int | float + :param flush_now: immediately flush buffer + :type flush_now: bool """ # Repeatable create new timer @@ -299,7 +319,7 @@ def set_flush_timer(self, timeout: float, flush_now: bool = False) -> None: def cancel_flush_timer(self) -> None: """ - Abort automatic async flushing + Abort automatic async flushing """ if self.timer is not None: @@ -307,16 +327,17 @@ def cancel_flush_timer(self) -> None: @staticmethod def attach_sent_timestamp(events: PayloadDictList) -> None: - """ - Attach (by mutating in-place) current timestamp in milliseconds - as `stm` param + """ + Attach (by mutating in-place) current timestamp in milliseconds + as `stm` param - :param events: Array of events to be sent - :type events: list(dict(string:\\*)) - :rtype: None + :param events: Array of events to be sent + :type events: list(dict(string:\\*)) + :rtype: None """ + def update(e: PayloadDict) -> None: - e.update({'stm': str(int(time.time()) * 1000)}) + e.update({"stm": str(int(time.time()) * 1000)}) for event in events: update(event) @@ -324,46 +345,56 @@ def update(e: PayloadDict) -> None: class AsyncEmitter(Emitter): """ - Uses threads to send HTTP requests asynchronously + Uses threads to send HTTP requests asynchronously """ def __init__( - self, - endpoint: str, - protocol: HttpProtocol = "http", - port: Optional[int] = None, - method: Method = "post", - buffer_size: Optional[int] = None, - on_success: Optional[SuccessCallback] = None, - on_failure: Optional[FailureCallback] = None, - thread_count: int = 1, - byte_limit: Optional[int] = None) -> None: + self, + endpoint: str, + protocol: HttpProtocol = "http", + port: Optional[int] = None, + method: Method = "post", + buffer_size: Optional[int] = None, + on_success: Optional[SuccessCallback] = None, + on_failure: Optional[FailureCallback] = None, + thread_count: int = 1, + byte_limit: Optional[int] = None, + ) -> None: """ - :param endpoint: The collector URL. Don't include "http://" - this is done automatically. - :type endpoint: string - :param protocol: The protocol to use - http or https. Defaults to http. - :type protocol: protocol - :param port: The collector port to connect to - :type port: int | None - :param method: The HTTP request method - :type method: method - :param buffer_size: The maximum number of queued events before the buffer is flushed. Default is 10. - :type buffer_size: int | None - :param on_success: Callback executed after every HTTP request in a flush has status code 200 - Gets passed the number of events flushed. - :type on_success: function | None - :param on_failure: Callback executed if at least one HTTP request in a flush has status code other than 200 - Gets passed two arguments: - 1) The number of events which were successfully sent - 2) If method is "post": The unsent data in string form; - If method is "get": An array of dictionaries corresponding to the unsent events' payloads - :type on_failure: function | None - :param thread_count: Number of worker threads to use for HTTP requests - :type thread_count: int - :param byte_limit: The size event list after reaching which queued events will be flushed - :type byte_limit: int | None + :param endpoint: The collector URL. Don't include "http://" - this is done automatically. + :type endpoint: string + :param protocol: The protocol to use - http or https. Defaults to http. + :type protocol: protocol + :param port: The collector port to connect to + :type port: int | None + :param method: The HTTP request method + :type method: method + :param buffer_size: The maximum number of queued events before the buffer is flushed. Default is 10. + :type buffer_size: int | None + :param on_success: Callback executed after every HTTP request in a flush has status code 200 + Gets passed the number of events flushed. + :type on_success: function | None + :param on_failure: Callback executed if at least one HTTP request in a flush has status code other than 200 + Gets passed two arguments: + 1) The number of events which were successfully sent + 2) If method is "post": The unsent data in string form; + If method is "get": An array of dictionaries corresponding to the unsent events' payloads + :type on_failure: function | None + :param thread_count: Number of worker threads to use for HTTP requests + :type thread_count: int + :param byte_limit: The size event list after reaching which queued events will be flushed + :type byte_limit: int | None """ - super(AsyncEmitter, self).__init__(endpoint, protocol, port, method, buffer_size, on_success, on_failure, byte_limit) + super(AsyncEmitter, self).__init__( + endpoint, + protocol, + port, + method, + buffer_size, + on_success, + on_failure, + byte_limit, + ) self.queue = Queue() for i in range(thread_count): t = threading.Thread(target=self.consume) @@ -379,8 +410,8 @@ def sync_flush(self) -> None: def flush(self) -> None: """ - Removes all dead threads, then creates a new thread which - executes the flush method of the base Emitter class + Removes all dead threads, then creates a new thread which + executes the flush method of the base Emitter class """ with self.lock: self.queue.put(self.buffer) diff --git a/snowplow_tracker/payload.py b/snowplow_tracker/payload.py index bb47a1d6..aacc6243 100644 --- a/snowplow_tracker/payload.py +++ b/snowplow_tracker/payload.py @@ -26,10 +26,9 @@ class Payload: - def __init__(self, dict_: Optional[PayloadDict] = None) -> None: """ - Constructor + Constructor """ self.nv_pairs = {} @@ -44,41 +43,42 @@ def __init__(self, dict_: Optional[PayloadDict] = None) -> None: def add(self, name: str, value: Any) -> None: """ - Add a name value pair to the Payload object + Add a name value pair to the Payload object """ if not (value == "" or value is None): self.nv_pairs[name] = value def add_dict(self, dict_: PayloadDict, base64: bool = False) -> None: """ - Add a dict of name value pairs to the Payload object + Add a dict of name value pairs to the Payload object - :param dict_: Dictionary to be added to the Payload - :type dict_: dict(string:\\*) + :param dict_: Dictionary to be added to the Payload + :type dict_: dict(string:\\*) """ for f in dict_: self.add(f, dict_[f]) def add_json( - self, - dict_: Optional[PayloadDict], - encode_base64: bool, - type_when_encoded: str, - type_when_not_encoded: str, - json_encoder: Optional[JsonEncoderFunction] = None) -> None: + self, + dict_: Optional[PayloadDict], + encode_base64: bool, + type_when_encoded: str, + type_when_not_encoded: str, + json_encoder: Optional[JsonEncoderFunction] = None, + ) -> None: """ - Add an encoded or unencoded JSON to the payload - - :param dict_: Custom context for the event - :type dict_: dict(string:\\*) | None - :param encode_base64: If the payload is base64 encoded - :type encode_base64: bool - :param type_when_encoded: Name of the field when encode_base64 is set - :type type_when_encoded: string - :param type_when_not_encoded: Name of the field when encode_base64 is not set - :type type_when_not_encoded: string - :param json_encoder: Custom JSON serializer that gets called on non-serializable object - :type json_encoder: function | None + Add an encoded or unencoded JSON to the payload + + :param dict_: Custom context for the event + :type dict_: dict(string:\\*) | None + :param encode_base64: If the payload is base64 encoded + :type encode_base64: bool + :param type_when_encoded: Name of the field when encode_base64 is set + :type type_when_encoded: string + :param type_when_not_encoded: Name of the field when encode_base64 is not set + :type type_when_not_encoded: string + :param json_encoder: Custom JSON serializer that gets called on non-serializable object + :type json_encoder: function | None """ if dict_ is not None and dict_ != {}: @@ -96,6 +96,6 @@ def add_json( def get(self) -> PayloadDict: """ - Returns the context dictionary from the Payload object + Returns the context dictionary from the Payload object """ return self.nv_pairs diff --git a/snowplow_tracker/self_describing_json.py b/snowplow_tracker/self_describing_json.py index 84b49c94..538408ca 100644 --- a/snowplow_tracker/self_describing_json.py +++ b/snowplow_tracker/self_describing_json.py @@ -26,16 +26,12 @@ class SelfDescribingJson(object): - def __init__(self, schema: str, data: Union[PayloadDict, PayloadDictList]) -> None: self.schema = schema self.data = data def to_json(self) -> PayloadDict: - return { - "schema": self.schema, - "data": self.data - } + return {"schema": self.schema, "data": self.data} def to_string(self) -> str: return json.dumps(self.to_json()) diff --git a/snowplow_tracker/snowplow.py b/snowplow_tracker/snowplow.py new file mode 100644 index 00000000..7b08c071 --- /dev/null +++ b/snowplow_tracker/snowplow.py @@ -0,0 +1,161 @@ +# """ +# snowplow.py + +# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. + +# Authors: Jack Keene, Anuj More, Alex Dean, Fred Blundun, Paul Boocock +# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd +# License: Apache License Version 2.0 +# """ +import logging +from typing import Optional +from snowplow_tracker import ( + Tracker, + Emitter, + subject, + EmitterConfiguration, + TrackerConfiguration, +) +from snowplow_tracker.typing import Method + +# Logging +logging.basicConfig() +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +""" +Snowplow Class +""" + + +class Snowplow: + _trackers = {} + + @staticmethod + def create_tracker( + namespace: str, + endpoint: str, + method: Method = "post", + app_id: Optional[str] = None, + subject: Optional[subject.Subject] = None, + tracker_config: TrackerConfiguration = TrackerConfiguration(), + emitter_config: EmitterConfiguration = EmitterConfiguration(), + ) -> Tracker: + """ + Create a Snowplow tracker with a namespace and collector URL + + :param namespace: Name of the tracker + :type namespace: String + :param endpoint: The collector URL + :type endpoint: String + :param method: The HTTP request method. Defaults to post. + :type method: method + :param appId: Application ID + :type appId: String | None + :param subject: Subject to be tracked + :type subject: Subject | None + :param tracker_config: Tracker configuration + :type tracker_config: TrackerConfiguration + :param emitter_config: Emitter configuration + :type emitter_config: EmitterConfiguration + :rtype Tracker + """ + if endpoint is None: + raise TypeError("Emitter or Collector URL must be provided") + + emitter = Emitter( + endpoint, + method=method, + buffer_size=emitter_config.buffer_size, + on_success=emitter_config.on_success, + on_failure=emitter_config.on_failure, + byte_limit=emitter_config.byte_limit, + request_timeout=emitter_config.request_timeout, + ) + + tracker = Tracker( + emitter, + namespace=namespace, + app_id=app_id, + subject=subject, + encode_base64=tracker_config.encode_base64, + json_encoder=tracker_config.json_encoder, + ) + + return Snowplow.add_tracker(tracker) + + @classmethod + def add_tracker(cls, tracker: Tracker) -> Tracker: + """ + Add a Snowplow tracker to the Snowplow object + + :param tracker: Tracker object to add to Snowplow + :type tracker: Tracker + :rtype Tracker + """ + if not isinstance(tracker, Tracker): + logger.info("Tracker not provided.") + return None + + namespace = tracker.get_namespace() + + if namespace in cls._trackers.keys(): + raise TypeError("Tracker with this namespace already exists") + + cls._trackers[namespace] = tracker + logger.info("Tracker with namespace: '" + namespace + "' added to Snowplow") + return cls._trackers[namespace] + + @classmethod + def remove_tracker(cls, tracker: Tracker): + """ + Remove a Snowplow tracker from the Snowplow object if it exists + + :param tracker: Tracker object to remove from Snowplow + :type tracker: Tracker | None + """ + namespace = tracker.get_namespace() + cls.remove_tracker_by_namespace(namespace) + + @classmethod + def remove_tracker_by_namespace(cls, namespace: str): + """ + Remove a Snowplow tracker from the Snowplow object using it's namespace if it exists + + :param namespace: Tracker namespace to remove from Snowplow + :type tracker: String | None + """ + if not cls._trackers.pop(namespace, False): + logger.info("Tracker with namespace: '" + namespace + "' does not exist") + return + logger.info("Tracker with namespace: '" + namespace + "' removed from Snowplow") + + @classmethod + def reset(cls): + """ + Remove all active Snowplow trackers from the Snowplow object + """ + cls._trackers = {} + + @classmethod + def get_tracker(cls, namespace: str) -> Tracker: + """ + Returns a Snowplow tracker from the Snowplow object if it exists + :param namespace: Snowplow tracker namespace + :type namespace: string + :rtype: Tracker + """ + if namespace in cls._trackers.keys(): + return cls._trackers[namespace] + return None diff --git a/snowplow_tracker/subject.py b/snowplow_tracker/subject.py index 3b4fb82c..b5f3de83 100644 --- a/snowplow_tracker/subject.py +++ b/snowplow_tracker/subject.py @@ -27,41 +27,42 @@ class Subject(object): """ - Class for an event subject, where we view events as of the form + Class for an event subject, where we view events as of the form - (Subject) -> (Verb) -> (Object) + (Subject) -> (Verb) -> (Object) """ + def __init__(self) -> None: self.standard_nv_pairs = {"p": DEFAULT_PLATFORM} - def set_platform(self, value: SupportedPlatform) -> 'Subject': + def set_platform(self, value: SupportedPlatform) -> "Subject": """ - :param value: One of ["pc", "tv", "mob", "cnsl", "iot", "web", "srv", "app"] - :type value: supported_platform - :rtype: subject + :param value: One of ["pc", "tv", "mob", "cnsl", "iot", "web", "srv", "app"] + :type value: supported_platform + :rtype: subject """ one_of(value, SUPPORTED_PLATFORMS) self.standard_nv_pairs["p"] = value return self - def set_user_id(self, user_id: str) -> 'Subject': + def set_user_id(self, user_id: str) -> "Subject": """ - :param user_id: User ID - :type user_id: string - :rtype: subject + :param user_id: User ID + :type user_id: string + :rtype: subject """ self.standard_nv_pairs["uid"] = user_id return self - def set_screen_resolution(self, width: int, height: int) -> 'Subject': + def set_screen_resolution(self, width: int, height: int) -> "Subject": """ - :param width: Width of the screen - :param height: Height of the screen - :type width: int,>0 - :type height: int,>0 - :rtype: subject + :param width: Width of the screen + :param height: Height of the screen + :type width: int,>0 + :type height: int,>0 + :rtype: subject """ greater_than(width, 0) greater_than(height, 0) @@ -69,13 +70,13 @@ def set_screen_resolution(self, width: int, height: int) -> 'Subject': self.standard_nv_pairs["res"] = "".join([str(width), "x", str(height)]) return self - def set_viewport(self, width: int, height: int) -> 'Subject': + def set_viewport(self, width: int, height: int) -> "Subject": """ - :param width: Width of the viewport - :param height: Height of the viewport - :type width: int,>0 - :type height: int,>0 - :rtype: subject + :param width: Width of the viewport + :param height: Height of the viewport + :type width: int,>0 + :type height: int,>0 + :rtype: subject """ greater_than(width, 0) greater_than(height, 0) @@ -83,96 +84,96 @@ def set_viewport(self, width: int, height: int) -> 'Subject': self.standard_nv_pairs["vp"] = "".join([str(width), "x", str(height)]) return self - def set_color_depth(self, depth: int) -> 'Subject': + def set_color_depth(self, depth: int) -> "Subject": """ - :param depth: Depth of the color on the screen - :type depth: int - :rtype: subject + :param depth: Depth of the color on the screen + :type depth: int + :rtype: subject """ self.standard_nv_pairs["cd"] = depth return self - def set_timezone(self, timezone: str) -> 'Subject': + def set_timezone(self, timezone: str) -> "Subject": """ - :param timezone: Timezone as a string - :type timezone: string - :rtype: subject + :param timezone: Timezone as a string + :type timezone: string + :rtype: subject """ self.standard_nv_pairs["tz"] = timezone return self - def set_lang(self, lang: str) -> 'Subject': + def set_lang(self, lang: str) -> "Subject": """ - Set language. + Set language. - :param lang: Language the application is set to - :type lang: string - :rtype: subject + :param lang: Language the application is set to + :type lang: string + :rtype: subject """ self.standard_nv_pairs["lang"] = lang return self - def set_domain_user_id(self, duid: str) -> 'Subject': + def set_domain_user_id(self, duid: str) -> "Subject": """ - Set the domain user ID + Set the domain user ID - :param duid: Domain user ID - :type duid: string - :rtype: subject + :param duid: Domain user ID + :type duid: string + :rtype: subject """ self.standard_nv_pairs["duid"] = duid return self - def set_domain_session_id(self, sid: str) -> 'Subject': + def set_domain_session_id(self, sid: str) -> "Subject": """ - Set the domain session ID - :param sid: Domain session ID - :type sid: string - :rtype: subject + Set the domain session ID + :param sid: Domain session ID + :type sid: string + :rtype: subject """ self.standard_nv_pairs["sid"] = sid return self - def set_domain_session_index(self, vid: int) -> 'Subject': + def set_domain_session_index(self, vid: int) -> "Subject": """ - Set the domain session Index - :param vid: Domain session Index - :type vid: int - :rtype: subject + Set the domain session Index + :param vid: Domain session Index + :type vid: int + :rtype: subject """ self.standard_nv_pairs["vid"] = vid return self - def set_ip_address(self, ip: str) -> 'Subject': + def set_ip_address(self, ip: str) -> "Subject": """ - Set the domain user ID + Set the domain user ID - :param ip: IP address - :type ip: string - :rtype: subject + :param ip: IP address + :type ip: string + :rtype: subject """ self.standard_nv_pairs["ip"] = ip return self - def set_useragent(self, ua: str) -> 'Subject': + def set_useragent(self, ua: str) -> "Subject": """ - Set the user agent + Set the user agent - :param ua: User agent - :type ua: string - :rtype: subject + :param ua: User agent + :type ua: string + :rtype: subject """ self.standard_nv_pairs["ua"] = ua return self - def set_network_user_id(self, nuid: str) -> 'Subject': + def set_network_user_id(self, nuid: str) -> "Subject": """ - Set the network user ID field - This overwrites the nuid field set by the collector + Set the network user ID field + This overwrites the nuid field set by the collector - :param nuid: Network user ID - :type nuid: string - :rtype: subject + :param nuid: Network user ID + :type nuid: string + :rtype: subject """ self.standard_nv_pairs["tnuid"] = nuid return self diff --git a/snowplow_tracker/tracker.py b/snowplow_tracker/tracker.py index 16b89d8d..4c988fc8 100644 --- a/snowplow_tracker/tracker.py +++ b/snowplow_tracker/tracker.py @@ -26,8 +26,15 @@ from snowplow_tracker import payload, _version, SelfDescribingJson from snowplow_tracker import subject as _subject from snowplow_tracker.contracts import non_empty_string, one_of, non_empty, form_element -from snowplow_tracker.typing import JsonEncoderFunction, EmitterProtocol,\ - FORM_NODE_NAMES, FORM_TYPES, FormNodeName, ElementClasses, FormClasses +from snowplow_tracker.typing import ( + JsonEncoderFunction, + EmitterProtocol, + FORM_NODE_NAMES, + FORM_TYPES, + FormNodeName, + ElementClasses, + FormClasses, +) """ Constants & config @@ -47,28 +54,28 @@ class Tracker: - def __init__( - self, - emitters: Union[List[EmitterProtocol], EmitterProtocol], - subject: Optional[_subject.Subject] = None, - namespace: Optional[str] = None, - app_id: Optional[str] = None, - encode_base64: bool = DEFAULT_ENCODE_BASE64, - json_encoder: Optional[JsonEncoderFunction] = None) -> None: - """ - :param emitters: Emitters to which events will be sent - :type emitters: list[>0](emitter) | emitter - :param subject: Subject to be tracked - :type subject: subject | None - :param namespace: Identifier for the Tracker instance - :type namespace: string_or_none - :param app_id: Application ID - :type app_id: string_or_none - :param encode_base64: Whether JSONs in the payload should be base-64 encoded - :type encode_base64: bool - :param json_encoder: Custom JSON serializer that gets called on non-serializable object - :type json_encoder: function | None + self, + emitters: Union[List[EmitterProtocol], EmitterProtocol], + subject: Optional[_subject.Subject] = None, + namespace: Optional[str] = None, + app_id: Optional[str] = None, + encode_base64: bool = DEFAULT_ENCODE_BASE64, + json_encoder: Optional[JsonEncoderFunction] = None, + ) -> None: + """ + :param emitters: Emitters to which events will be sent + :type emitters: list[>0](emitter) | emitter + :param subject: Subject to be tracked + :type subject: subject | None + :param namespace: Identifier for the Tracker instance + :type namespace: string_or_none + :param app_id: Application ID + :type app_id: string_or_none + :param encode_base64: Whether JSONs in the payload should be base-64 encoded + :type encode_base64: bool + :param json_encoder: Custom JSON serializer that gets called on non-serializable object + :type json_encoder: function | None """ if subject is None: subject = _subject.Subject() @@ -83,31 +90,33 @@ def __init__( self.encode_base64 = encode_base64 self.json_encoder = json_encoder - self.standard_nv_pairs = { - "tv": VERSION, - "tna": namespace, - "aid": app_id - } + self.standard_nv_pairs = {"tv": VERSION, "tna": namespace, "aid": app_id} self.timer = None @staticmethod def get_uuid() -> str: """ - Set transaction ID for the payload once during the lifetime of the - event. + Set transaction ID for the payload once during the lifetime of the + event. - :rtype: string + :rtype: string """ return str(uuid.uuid4()) @staticmethod def get_timestamp(tstamp: Optional[float] = None) -> int: """ - :param tstamp: User-input timestamp or None - :type tstamp: int | float | None - :rtype: int - """ - if isinstance(tstamp, (int, float, )): + :param tstamp: User-input timestamp or None + :type tstamp: int | float | None + :rtype: int + """ + if isinstance( + tstamp, + ( + int, + float, + ), + ): return int(tstamp) return int(time.time() * 1000) @@ -115,37 +124,38 @@ def get_timestamp(tstamp: Optional[float] = None) -> int: Tracking methods """ - def track(self, pb: payload.Payload) -> 'Tracker': + def track(self, pb: payload.Payload) -> "Tracker": """ - Send the payload to a emitter + Send the payload to a emitter - :param pb: Payload builder - :type pb: payload - :rtype: tracker + :param pb: Payload builder + :type pb: payload + :rtype: tracker """ for emitter in self.emitters: emitter.input(pb.nv_pairs) return self def complete_payload( - self, - pb: payload.Payload, - context: Optional[List[SelfDescribingJson]], - tstamp: Optional[float], - event_subject: Optional[_subject.Subject]) -> 'Tracker': - """ - Called by all tracking events to add the standard name-value pairs - to the Payload object irrespective of the tracked event. - - :param pb: Payload builder - :type pb: payload - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + pb: payload.Payload, + context: Optional[List[SelfDescribingJson]], + tstamp: Optional[float], + event_subject: Optional[_subject.Subject], + ) -> "Tracker": + """ + Called by all tracking events to add the standard name-value pairs + to the Payload object irrespective of the tracked event. + + :param pb: Payload builder + :type pb: payload + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker """ pb.add("eid", Tracker.get_uuid()) @@ -155,8 +165,12 @@ def complete_payload( if context is not None: context_jsons = list(map(lambda c: c.to_json(), context)) - context_envelope = SelfDescribingJson(CONTEXT_SCHEMA, context_jsons).to_json() - pb.add_json(context_envelope, self.encode_base64, "cx", "co", self.json_encoder) + context_envelope = SelfDescribingJson( + CONTEXT_SCHEMA, context_jsons + ).to_json() + pb.add_json( + context_envelope, self.encode_base64, "cx", "co", self.json_encoder + ) pb.add_dict(self.standard_nv_pairs) @@ -166,32 +180,33 @@ def complete_payload( return self.track(pb) def track_page_view( - self, - page_url: str, - page_title: Optional[str] = None, - referrer: Optional[str] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param page_url: URL of the viewed page - :type page_url: non_empty_string - :param page_title: Title of the viewed page - :type page_title: string_or_none - :param referrer: Referrer of the page - :type referrer: string_or_none - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + page_url: str, + page_title: Optional[str] = None, + referrer: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[_subject.Subject] = None, + ) -> "Tracker": + """ + :param page_url: URL of the viewed page + :type page_url: non_empty_string + :param page_title: Title of the viewed page + :type page_title: string_or_none + :param referrer: Referrer of the page + :type referrer: string_or_none + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker """ non_empty_string(page_url) pb = payload.Payload() - pb.add("e", "pv") # pv: page view + pb.add("e", "pv") # pv: page view pb.add("url", page_url) pb.add("page", page_title) pb.add("refr", referrer) @@ -199,44 +214,45 @@ def track_page_view( return self.complete_payload(pb, context, tstamp, event_subject) def track_page_ping( - self, - page_url: str, - page_title: Optional[str] = None, - referrer: Optional[str] = None, - min_x: Optional[int] = None, - max_x: Optional[int] = None, - min_y: Optional[int] = None, - max_y: Optional[int] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param page_url: URL of the viewed page - :type page_url: non_empty_string - :param page_title: Title of the viewed page - :type page_title: string_or_none - :param referrer: Referrer of the page - :type referrer: string_or_none - :param min_x: Minimum page x offset seen in the last ping period - :type min_x: int | None - :param max_x: Maximum page x offset seen in the last ping period - :type max_x: int | None - :param min_y: Minimum page y offset seen in the last ping period - :type min_y: int | None - :param max_y: Maximum page y offset seen in the last ping period - :type max_y: int | None - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + page_url: str, + page_title: Optional[str] = None, + referrer: Optional[str] = None, + min_x: Optional[int] = None, + max_x: Optional[int] = None, + min_y: Optional[int] = None, + max_y: Optional[int] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[_subject.Subject] = None, + ) -> "Tracker": + """ + :param page_url: URL of the viewed page + :type page_url: non_empty_string + :param page_title: Title of the viewed page + :type page_title: string_or_none + :param referrer: Referrer of the page + :type referrer: string_or_none + :param min_x: Minimum page x offset seen in the last ping period + :type min_x: int | None + :param max_x: Maximum page x offset seen in the last ping period + :type max_x: int | None + :param min_y: Minimum page y offset seen in the last ping period + :type min_y: int | None + :param max_y: Maximum page y offset seen in the last ping period + :type max_y: int | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker """ non_empty_string(page_url) pb = payload.Payload() - pb.add("e", "pp") # pp: page ping + pb.add("e", "pp") # pp: page ping pb.add("url", page_url) pb.add("page", page_title) pb.add("refr", referrer) @@ -248,33 +264,34 @@ def track_page_ping( return self.complete_payload(pb, context, tstamp, event_subject) def track_link_click( - self, - target_url: str, - element_id: Optional[str] = None, - element_classes: Optional[ElementClasses] = None, - element_target: Optional[str] = None, - element_content: Optional[str] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param target_url: Target URL of the link - :type target_url: non_empty_string - :param element_id: ID attribute of the HTML element - :type element_id: string_or_none - :param element_classes: Classes of the HTML element - :type element_classes: list(str) | tuple(str,\\*) | None - :param element_target: ID attribute of the HTML element - :type element_target: string_or_none - :param element_content: The content of the HTML element - :type element_content: string_or_none - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + target_url: str, + element_id: Optional[str] = None, + element_classes: Optional[ElementClasses] = None, + element_target: Optional[str] = None, + element_content: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[_subject.Subject] = None, + ) -> "Tracker": + """ + :param target_url: Target URL of the link + :type target_url: non_empty_string + :param element_id: ID attribute of the HTML element + :type element_id: string_or_none + :param element_classes: Classes of the HTML element + :type element_classes: list(str) | tuple(str,\\*) | None + :param element_target: ID attribute of the HTML element + :type element_target: string_or_none + :param element_content: The content of the HTML element + :type element_content: string_or_none + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker """ non_empty_string(target_url) @@ -289,41 +306,44 @@ def track_link_click( if element_content is not None: properties["elementContent"] = element_content - event_json = SelfDescribingJson("%s/link_click/%s/1-0-1" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties) + event_json = SelfDescribingJson( + "%s/link_click/%s/1-0-1" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties + ) return self.track_unstruct_event(event_json, context, tstamp, event_subject) def track_add_to_cart( - self, - sku: str, - quantity: int, - name: Optional[str] = None, - category: Optional[str] = None, - unit_price: Optional[float] = None, - currency: Optional[str] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param sku: Item SKU or ID - :type sku: non_empty_string - :param quantity: Number added to cart - :type quantity: int - :param name: Item's name - :type name: string_or_none - :param category: Item's category - :type category: string_or_none - :param unit_price: Item's price - :type unit_price: int | float | None - :param currency: Type of currency the price is in - :type currency: string_or_none - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + sku: str, + quantity: int, + name: Optional[str] = None, + category: Optional[str] = None, + unit_price: Optional[float] = None, + currency: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[_subject.Subject] = None, + ) -> "Tracker": + """ + :param sku: Item SKU or ID + :type sku: non_empty_string + :param quantity: Number added to cart + :type quantity: int + :param name: Item's name + :type name: string_or_none + :param category: Item's category + :type category: string_or_none + :param unit_price: Item's price + :type unit_price: int | float | None + :param currency: Type of currency the price is in + :type currency: string_or_none + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker """ non_empty_string(sku) @@ -339,41 +359,44 @@ def track_add_to_cart( if currency is not None: properties["currency"] = currency - event_json = SelfDescribingJson("%s/add_to_cart/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties) + event_json = SelfDescribingJson( + "%s/add_to_cart/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties + ) return self.track_unstruct_event(event_json, context, tstamp, event_subject) def track_remove_from_cart( - self, - sku: str, - quantity: int, - name: Optional[str] = None, - category: Optional[str] = None, - unit_price: Optional[float] = None, - currency: Optional[str] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param sku: Item SKU or ID - :type sku: non_empty_string - :param quantity: Number added to cart - :type quantity: int - :param name: Item's name - :type name: string_or_none - :param category: Item's category - :type category: string_or_none - :param unit_price: Item's price - :type unit_price: int | float | None - :param currency: Type of currency the price is in - :type currency: string_or_none - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + sku: str, + quantity: int, + name: Optional[str] = None, + category: Optional[str] = None, + unit_price: Optional[float] = None, + currency: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[_subject.Subject] = None, + ) -> "Tracker": + """ + :param sku: Item SKU or ID + :type sku: non_empty_string + :param quantity: Number added to cart + :type quantity: int + :param name: Item's name + :type name: string_or_none + :param category: Item's category + :type category: string_or_none + :param unit_price: Item's price + :type unit_price: int | float | None + :param currency: Type of currency the price is in + :type currency: string_or_none + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker """ non_empty_string(sku) @@ -389,41 +412,44 @@ def track_remove_from_cart( if currency is not None: properties["currency"] = currency - event_json = SelfDescribingJson("%s/remove_from_cart/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties) + event_json = SelfDescribingJson( + "%s/remove_from_cart/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties + ) return self.track_unstruct_event(event_json, context, tstamp, event_subject) def track_form_change( - self, - form_id: str, - element_id: Optional[str], - node_name: FormNodeName, - value: Optional[str], - type_: Optional[str] = None, - element_classes: Optional[ElementClasses] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param form_id: ID attribute of the HTML form - :type form_id: non_empty_string - :param element_id: ID attribute of the HTML element - :type element_id: string_or_none - :param node_name: Type of input element - :type node_name: form_node_name - :param value: Value of the input element - :type value: string_or_none - :param type_: Type of data the element represents - :type type_: non_empty_string, form_type - :param element_classes: Classes of the HTML element - :type element_classes: list(str) | tuple(str,\\*) | None - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + form_id: str, + element_id: Optional[str], + node_name: FormNodeName, + value: Optional[str], + type_: Optional[str] = None, + element_classes: Optional[ElementClasses] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[_subject.Subject] = None, + ) -> "Tracker": + """ + :param form_id: ID attribute of the HTML form + :type form_id: non_empty_string + :param element_id: ID attribute of the HTML element + :type element_id: string_or_none + :param node_name: Type of input element + :type node_name: form_node_name + :param value: Value of the input element + :type value: string_or_none + :param type_: Type of data the element represents + :type type_: non_empty_string, form_type + :param element_classes: Classes of the HTML element + :type element_classes: list(str) | tuple(str,\\*) | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker """ non_empty_string(form_id) one_of(node_name, FORM_NODE_NAMES) @@ -440,73 +466,79 @@ def track_form_change( if element_classes is not None: properties["elementClasses"] = element_classes - event_json = SelfDescribingJson("%s/change_form/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties) + event_json = SelfDescribingJson( + "%s/change_form/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties + ) return self.track_unstruct_event(event_json, context, tstamp, event_subject) def track_form_submit( - self, - form_id: str, - form_classes: Optional[FormClasses] = None, - elements: Optional[List[Dict[str, Any]]] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param form_id: ID attribute of the HTML form - :type form_id: non_empty_string - :param form_classes: Classes of the HTML form - :type form_classes: list(str) | tuple(str,\\*) | None - :param elements: Classes of the HTML form - :type elements: list(form_element) | None - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + form_id: str, + form_classes: Optional[FormClasses] = None, + elements: Optional[List[Dict[str, Any]]] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[_subject.Subject] = None, + ) -> "Tracker": + """ + :param form_id: ID attribute of the HTML form + :type form_id: non_empty_string + :param form_classes: Classes of the HTML form + :type form_classes: list(str) | tuple(str,\\*) | None + :param elements: Classes of the HTML form + :type elements: list(form_element) | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker """ non_empty_string(form_id) for element in elements or []: form_element(element) properties = dict() - properties['formId'] = form_id + properties["formId"] = form_id if form_classes is not None: - properties['formClasses'] = form_classes + properties["formClasses"] = form_classes if elements is not None and len(elements) > 0: - properties['elements'] = elements + properties["elements"] = elements - event_json = SelfDescribingJson("%s/submit_form/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties) + event_json = SelfDescribingJson( + "%s/submit_form/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties + ) return self.track_unstruct_event(event_json, context, tstamp, event_subject) def track_site_search( - self, - terms: Sequence[str], - filters: Optional[Dict[str, Union[str, bool]]] = None, - total_results: Optional[int] = None, - page_results: Optional[int] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param terms: Search terms - :type terms: seq[>=1](str) - :param filters: Filters applied to the search - :type filters: dict(str:str|bool) | None - :param total_results: Total number of results returned - :type total_results: int | None - :param page_results: Total number of pages of results - :type page_results: int | None - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + terms: Sequence[str], + filters: Optional[Dict[str, Union[str, bool]]] = None, + total_results: Optional[int] = None, + page_results: Optional[int] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[_subject.Subject] = None, + ) -> "Tracker": + """ + :param terms: Search terms + :type terms: seq[>=1](str) + :param filters: Filters applied to the search + :type filters: dict(str:str|bool) | None + :param total_results: Total number of results returned + :type total_results: int | None + :param page_results: Total number of pages of results + :type page_results: int | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker """ non_empty(terms) @@ -519,47 +551,50 @@ def track_site_search( if page_results is not None: properties["pageResults"] = page_results - event_json = SelfDescribingJson("%s/site_search/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties) + event_json = SelfDescribingJson( + "%s/site_search/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties + ) return self.track_unstruct_event(event_json, context, tstamp, event_subject) def track_ecommerce_transaction_item( - self, - order_id: str, - sku: str, - price: float, - quantity: int, - name: Optional[str] = None, - category: Optional[str] = None, - currency: Optional[str] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - This is an internal method called by track_ecommerce_transaction. - It is not for public use. - - :param order_id: Order ID - :type order_id: non_empty_string - :param sku: Item SKU - :type sku: non_empty_string - :param price: Item price - :type price: int | float - :param quantity: Item quantity - :type quantity: int - :param name: Item name - :type name: string_or_none - :param category: Item category - :type category: string_or_none - :param currency: The currency the price is expressed in - :type currency: string_or_none - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + order_id: str, + sku: str, + price: float, + quantity: int, + name: Optional[str] = None, + category: Optional[str] = None, + currency: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[_subject.Subject] = None, + ) -> "Tracker": + """ + This is an internal method called by track_ecommerce_transaction. + It is not for public use. + + :param order_id: Order ID + :type order_id: non_empty_string + :param sku: Item SKU + :type sku: non_empty_string + :param price: Item price + :type price: int | float + :param quantity: Item quantity + :type quantity: int + :param name: Item name + :type name: string_or_none + :param category: Item category + :type category: string_or_none + :param currency: The currency the price is expressed in + :type currency: string_or_none + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker """ non_empty_string(order_id) non_empty_string(sku) @@ -577,48 +612,49 @@ def track_ecommerce_transaction_item( return self.complete_payload(pb, context, tstamp, event_subject) def track_ecommerce_transaction( - self, - order_id: str, - total_value: float, - affiliation: Optional[str] = None, - tax_value: Optional[float] = None, - shipping: Optional[float] = None, - city: Optional[str] = None, - state: Optional[str] = None, - country: Optional[str] = None, - currency: Optional[str] = None, - items: Optional[List[Dict[str, Any]]] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param order_id: ID of the eCommerce transaction - :type order_id: non_empty_string - :param total_value: Total transaction value - :type total_value: int | float - :param affiliation: Transaction affiliation - :type affiliation: string_or_none - :param tax_value: Transaction tax value - :type tax_value: int | float | None - :param shipping: Delivery cost charged - :type shipping: int | float | None - :param city: Delivery address city - :type city: string_or_none - :param state: Delivery address state - :type state: string_or_none - :param country: Delivery address country - :type country: string_or_none - :param currency: The currency the price is expressed in - :type currency: string_or_none - :param items: The items in the transaction - :type items: list(dict(str:\\*)) | None - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + order_id: str, + total_value: float, + affiliation: Optional[str] = None, + tax_value: Optional[float] = None, + shipping: Optional[float] = None, + city: Optional[str] = None, + state: Optional[str] = None, + country: Optional[str] = None, + currency: Optional[str] = None, + items: Optional[List[Dict[str, Any]]] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[_subject.Subject] = None, + ) -> "Tracker": + """ + :param order_id: ID of the eCommerce transaction + :type order_id: non_empty_string + :param total_value: Total transaction value + :type total_value: int | float + :param affiliation: Transaction affiliation + :type affiliation: string_or_none + :param tax_value: Transaction tax value + :type tax_value: int | float | None + :param shipping: Delivery cost charged + :type shipping: int | float | None + :param city: Delivery address city + :type city: string_or_none + :param state: Delivery address state + :type state: string_or_none + :param country: Delivery address country + :type country: string_or_none + :param currency: The currency the price is expressed in + :type currency: string_or_none + :param items: The items in the transaction + :type items: list(dict(str:\\*)) | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker """ non_empty_string(order_id) @@ -650,24 +686,25 @@ def track_ecommerce_transaction( return self def track_screen_view( - self, - name: Optional[str] = None, - id_: Optional[str] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param name: The name of the screen view event - :type name: string_or_none - :param id_: Screen view ID - :type id_: string_or_none - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + name: Optional[str] = None, + id_: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[_subject.Subject] = None, + ) -> "Tracker": + """ + :param name: The name of the screen view event + :type name: string_or_none + :param id_: Screen view ID + :type id_: string_or_none + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker """ screen_view_properties = {} if name is not None: @@ -675,40 +712,44 @@ def track_screen_view( if id_ is not None: screen_view_properties["id"] = id_ - event_json = SelfDescribingJson("%s/screen_view/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), screen_view_properties) + event_json = SelfDescribingJson( + "%s/screen_view/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), + screen_view_properties, + ) return self.track_unstruct_event(event_json, context, tstamp, event_subject) def track_struct_event( - self, - category: str, - action: str, - label: Optional[str] = None, - property_: Optional[str] = None, - value: Optional[float] = None, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param category: Category of the event - :type category: non_empty_string - :param action: The event itself - :type action: non_empty_string - :param label: Refer to the object the action is - performed on - :type label: string_or_none - :param property_: Property associated with either the action - or the object - :type property_: string_or_none - :param value: A value associated with the user action - :type value: int | float | None - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker + self, + category: str, + action: str, + label: Optional[str] = None, + property_: Optional[str] = None, + value: Optional[float] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[_subject.Subject] = None, + ) -> "Tracker": + """ + :param category: Category of the event + :type category: non_empty_string + :param action: The event itself + :type action: non_empty_string + :param label: Refer to the object the action is + performed on + :type label: string_or_none + :param property_: Property associated with either the action + or the object + :type property_: string_or_none + :param value: A value associated with the user action + :type value: int | float | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker """ non_empty_string(category) non_empty_string(action) @@ -724,26 +765,29 @@ def track_struct_event( return self.complete_payload(pb, context, tstamp, event_subject) def track_unstruct_event( - self, - event_json: SelfDescribingJson, - context: Optional[List[SelfDescribingJson]] = None, - tstamp: Optional[float] = None, - event_subject: Optional[_subject.Subject] = None) -> 'Tracker': - """ - :param event_json: The properties of the event. Has two field: - A "data" field containing the event properties and - A "schema" field identifying the schema against which the data is validated - :type event_json: self_describing_json - :param context: Custom context for the event - :type context: context_array | None - :param tstamp: Optional event timestamp in milliseconds - :type tstamp: int | float | None - :param event_subject: Optional per event subject - :type event_subject: subject | None - :rtype: tracker - """ - - envelope = SelfDescribingJson(UNSTRUCT_EVENT_SCHEMA, event_json.to_json()).to_json() + self, + event_json: SelfDescribingJson, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[_subject.Subject] = None, + ) -> "Tracker": + """ + :param event_json: The properties of the event. Has two field: + A "data" field containing the event properties and + A "schema" field identifying the schema against which the data is validated + :type event_json: self_describing_json + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker + """ + + envelope = SelfDescribingJson( + UNSTRUCT_EVENT_SCHEMA, event_json.to_json() + ).to_json() pb = payload.Payload() @@ -755,41 +799,44 @@ def track_unstruct_event( # Alias track_self_describing_event = track_unstruct_event - def flush(self, is_async: bool = False) -> 'Tracker': + def flush(self, is_async: bool = False) -> "Tracker": """ - Flush the emitter + Flush the emitter - :param is_async: Whether the flush is done asynchronously. Default is False - :type is_async: bool - :rtype: tracker + :param is_async: Whether the flush is done asynchronously. Default is False + :type is_async: bool + :rtype: tracker """ for emitter in self.emitters: if is_async: - if hasattr(emitter, 'flush'): + if hasattr(emitter, "flush"): emitter.flush() else: - if hasattr(emitter, 'sync_flush'): + if hasattr(emitter, "sync_flush"): emitter.sync_flush() return self - def set_subject(self, subject: Optional[_subject.Subject]) -> 'Tracker': + def set_subject(self, subject: Optional[_subject.Subject]) -> "Tracker": """ - Set the subject of the events fired by the tracker + Set the subject of the events fired by the tracker - :param subject: Subject to be tracked - :type subject: subject | None - :rtype: tracker + :param subject: Subject to be tracked + :type subject: subject | None + :rtype: tracker """ self.subject = subject return self - def add_emitter(self, emitter: EmitterProtocol) -> 'Tracker': + def add_emitter(self, emitter: EmitterProtocol) -> "Tracker": """ - Add a new emitter to which events should be passed + Add a new emitter to which events should be passed - :param emitter: New emitter - :type emitter: emitter - :rtype: tracker + :param emitter: New emitter + :type emitter: emitter + :rtype: tracker """ self.emitters.append(emitter) return self + + def get_namespace(self): + return self.standard_nv_pairs["tna"] diff --git a/snowplow_tracker/tracker_configuration.py b/snowplow_tracker/tracker_configuration.py new file mode 100644 index 00000000..9c717721 --- /dev/null +++ b/snowplow_tracker/tracker_configuration.py @@ -0,0 +1,66 @@ +# """ +# tracker_configuration.py + +# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. + +# Authors: Jack Keene, Anuj More, Alex Dean, Fred Blundun, Paul Boocock +# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd +# License: Apache License Version 2.0 +# """ + +from typing import Optional +from snowplow_tracker.typing import JsonEncoderFunction + + +class TrackerConfiguration(object): + def __init__( + self, + encode_base64: Optional[bool] = None, + json_encoder: Optional[JsonEncoderFunction] = None, + ) -> None: + """ + Configuration for additional tracker configuration options. + :param encode_base64: Whether JSONs in the payload should be base-64 encoded. Default is True. + :type encode_base64: bool + :param json_encoder: Custom JSON serializer that gets called on non-serializable object. + :type json_encoder: function | None + """ + + self.encode_base64 = encode_base64 + self.json_encoder = json_encoder + + @property + def encode_base64(self) -> Optional[bool]: + """ + Whether JSONs in the payload should be base-64 encoded. Default is True. + """ + return self._encode_base64 + + @encode_base64.setter + def encode_base64(self, value: Optional[bool]): + if isinstance(value, bool) or value is None: + self._encode_base64 = value + else: + raise ValueError("encode_base64 must be True or False") + + @property + def json_encoder(self) -> Optional[JsonEncoderFunction]: + """ + Custom JSON serializer that gets called on non-serializable object. + """ + return self._json_encoder + + @json_encoder.setter + def json_encoder(self, value: Optional[JsonEncoderFunction]): + self._json_encoder = value diff --git a/snowplow_tracker/typing.py b/snowplow_tracker/typing.py index 7800a657..db27a671 100644 --- a/snowplow_tracker/typing.py +++ b/snowplow_tracker/typing.py @@ -29,10 +29,29 @@ # tracker FORM_NODE_NAMES = {"INPUT", "TEXTAREA", "SELECT"} FORM_TYPES = { - "button", "checkbox", "color", "date", "datetime", - "datetime-local", "email", "file", "hidden", "image", "month", - "number", "password", "radio", "range", "reset", "search", - "submit", "tel", "text", "time", "url", "week" + "button", + "checkbox", + "color", + "date", + "datetime", + "datetime-local", + "email", + "file", + "hidden", + "image", + "month", + "number", + "password", + "radio", + "range", + "reset", + "search", + "submit", + "tel", + "text", + "time", + "url", + "week", } FormNodeName = Literal["INPUT", "TEXTAREA", "SELECT"] ElementClasses = Union[List[str], Tuple[str, Any]] From 578153b8e8fe88bb68a078a9fb614dbdcc9944f5 Mon Sep 17 00:00:00 2001 From: Jack Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Fri, 18 Nov 2022 10:17:32 +0000 Subject: [PATCH 02/11] Add example app with Snowplow interface (close #302) PR #304 * Add example app with Snowplow interface --- examples/snowplow_app.py | 58 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 examples/snowplow_app.py diff --git a/examples/snowplow_app.py b/examples/snowplow_app.py new file mode 100644 index 00000000..39484450 --- /dev/null +++ b/examples/snowplow_app.py @@ -0,0 +1,58 @@ +import sys +from snowplow_tracker import ( + Snowplow, + EmitterConfiguration, + Subject, + TrackerConfiguration, + SelfDescribingJson, +) + + +def get_url_from_args(): + if len(sys.argv) != 2: + raise ValueError("Collector Endpoint is required") + return sys.argv[1] + + +def main(): + + collector_url = get_url_from_args() + # Configure Emitter + emitter_config = EmitterConfiguration(buffer_size=5) + + # Configure Tracker + tracker_config = TrackerConfiguration(encode_base64=True) + + # Initialise subject + subject = Subject() + subject.set_user_id("uid") + + Snowplow.create_tracker( + namespace="ns", + endpoint=collector_url, + app_id="app1", + subject=subject, + tracker_config=tracker_config, + emitter_config=emitter_config, + ) + + tracker = Snowplow.get_tracker("ns") + + tracker.track_page_view("https://www.snowplow.io", "Homepage") + tracker.track_page_ping("https://www.snowplow.io", "Homepage") + tracker.track_link_click("https://www.snowplow.io/about") + tracker.track_page_view("https://www.snowplow.io/about", "About") + + tracker.track_self_describing_event( + SelfDescribingJson( + "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1", + {"targetUrl": "example.com"}, + ) + ) + tracker.track_struct_event("shop", "add-to-basket", None, "pcs", 2) + + tracker.flush() + + +if __name__ == "__main__": + main() From 71d2bfdc1592e44a8fb56a27658e348c28219a8e Mon Sep 17 00:00:00 2001 From: Jack Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Tue, 29 Nov 2022 15:10:52 +0000 Subject: [PATCH 03/11] Fix Collector URL with trailing '/' (close #300) PR #305 * Strip trailing slash character from endpoint --- snowplow_tracker/emitters.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/snowplow_tracker/emitters.py b/snowplow_tracker/emitters.py index 5ddb67c8..3d71b767 100644 --- a/snowplow_tracker/emitters.py +++ b/snowplow_tracker/emitters.py @@ -144,6 +144,8 @@ def as_collector_uri( if len(endpoint) < 1: raise ValueError("No endpoint provided.") + endpoint = endpoint.rstrip('/') + if endpoint.split("://")[0] in PROTOCOLS: endpoint_arr = endpoint.split("://") protocol = endpoint_arr[0] From d943ebcd162791510248ca99679a786f198b35ba Mon Sep 17 00:00:00 2001 From: Jack Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Tue, 13 Dec 2022 10:53:30 +0000 Subject: [PATCH 04/11] Upgrade set-output in cd (close #294) PR #299 * Update deprecated set-output command --- .github/workflows/cd.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index 10b24303..c028f253 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -21,8 +21,8 @@ jobs: - name: Get tag and tracker versions id: version run: | - echo ::set-output name=TAG_VERSION::${GITHUB_REF#refs/*/} - echo "##[set-output name=PYTHON_TRACKER_VERSION;]$(python setup.py --version)" + echo "TAG_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_OUTPUT + echo "PYTHON_TRACKER_VERSION=$(python setup.py --version)" >> $GITHUB_OUTPUT - name: Fail if version mismatch if: ${{ steps.version.outputs.TAG_VERSION != steps.version.outputs.PYTHON_TRACKER_VERSION }} From 39fd50a3aff98a5efdd5c5c7fb5518fe4761305b Mon Sep 17 00:00:00 2001 From: Jack Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Fri, 6 Jan 2023 15:31:53 +0000 Subject: [PATCH 05/11] Add Retry for failed events (close #296) PR #306 * Make http requests return status code * Add retry functionality * Add failure retry tests * Add buffer capacity * Rename buffer_size to batch_size * Encapsulate timers in an FlushTimer class --- examples/snowplow_app.py | 2 +- snowplow_tracker/celery/celery_emitter.py | 4 +- snowplow_tracker/emitter_configuration.py | 40 +++- snowplow_tracker/emitters.py | 218 ++++++++++++++---- snowplow_tracker/snowplow.py | 2 +- .../test/integration/test_integration.py | 8 +- snowplow_tracker/test/unit/test_emitters.py | 141 ++++++++--- 7 files changed, 312 insertions(+), 103 deletions(-) diff --git a/examples/snowplow_app.py b/examples/snowplow_app.py index 39484450..7ce2c1d8 100644 --- a/examples/snowplow_app.py +++ b/examples/snowplow_app.py @@ -18,7 +18,7 @@ def main(): collector_url = get_url_from_args() # Configure Emitter - emitter_config = EmitterConfiguration(buffer_size=5) + emitter_config = EmitterConfiguration(batch_size=5) # Configure Tracker tracker_config = TrackerConfiguration(encode_base64=True) diff --git a/snowplow_tracker/celery/celery_emitter.py b/snowplow_tracker/celery/celery_emitter.py index d9aafaa7..682ee24b 100644 --- a/snowplow_tracker/celery/celery_emitter.py +++ b/snowplow_tracker/celery/celery_emitter.py @@ -53,9 +53,9 @@ def __init__( protocol: HttpProtocol = "http", port: Optional[int] = None, method: Method = "post", - buffer_size: Optional[int] = None, + batch_size: Optional[int] = None, byte_limit: Optional[int] = None) -> None: - super(CeleryEmitter, self).__init__(endpoint, protocol, port, method, buffer_size, None, None, byte_limit) + super(CeleryEmitter, self).__init__(endpoint, protocol, port, method, batch_size, None, None, byte_limit) try: # Check whether a custom Celery configuration module named "snowplow_celery_config" exists diff --git a/snowplow_tracker/emitter_configuration.py b/snowplow_tracker/emitter_configuration.py index 6d4ef0e6..c38823a1 100644 --- a/snowplow_tracker/emitter_configuration.py +++ b/snowplow_tracker/emitter_configuration.py @@ -26,16 +26,17 @@ class EmitterConfiguration(object): def __init__( self, - buffer_size: Optional[int] = None, + batch_size: Optional[int] = None, on_success: Optional[SuccessCallback] = None, on_failure: Optional[FailureCallback] = None, byte_limit: Optional[int] = None, request_timeout: Optional[Union[float, Tuple[float, float]]] = None, + buffer_capacity: Optional[int] = None ) -> None: """ Configuration for the emitter that sends events to the Snowplow collector. - :param buffer_size: The maximum number of queued events before the buffer is flushed. Default is 10. - :type buffer_size: int | None + :param batch_size: The maximum number of queued events before the buffer is flushed. Default is 10. + :type batch_size: int | None :param on_success: Callback executed after every HTTP request in a flush has status code 200 Gets passed the number of events flushed. :type on_success: function | None @@ -53,26 +54,27 @@ def __init__( :type request_timeout: float | tuple | None """ - self.buffer_size = buffer_size + self.batch_size = batch_size self.on_success = on_success self.on_failure = on_failure self.byte_limit = byte_limit self.request_timeout = request_timeout + self.buffer_capacity = buffer_capacity @property - def buffer_size(self) -> Optional[int]: + def batch_size(self) -> Optional[int]: """ The maximum number of queued events before the buffer is flushed. Default is 10. """ - return self._buffer_size + return self._batch_size - @buffer_size.setter - def buffer_size(self, value: Optional[int]): + @batch_size.setter + def batch_size(self, value: Optional[int]): if isinstance(value, int) and value < 0: - raise ValueError("buffer_size must greater than 0") + raise ValueError("batch_size must greater than 0") if not isinstance(value, int) and value is not None: - raise ValueError("buffer_size must be of type int") - self._buffer_size = value + raise ValueError("batch_size must be of type int") + self._batch_size = value @property def on_success(self) -> Optional[SuccessCallback]: @@ -127,3 +129,19 @@ def request_timeout(self) -> Optional[Union[float, Tuple[float, float]]]: @request_timeout.setter def request_timeout(self, value: Optional[Union[float, Tuple[float, float]]]): self._request_timeout = value + + @property + def buffer_capacity(self) -> Optional[int]: + """ + The maximum capacity of the event buffer. The default buffer capacity is 10 000 events. + When the buffer is full new events are lost. + """ + return self._buffer_capacity + + @buffer_capacity.setter + def buffer_capacity(self, value: Optional[int]): + if isinstance(value, int) and value < 0: + raise ValueError("buffer_capacity must greater than 0") + if not isinstance(value, int) and value is not None: + raise ValueError("buffer_capacity must be of type int") + self._buffer_capacity = value \ No newline at end of file diff --git a/snowplow_tracker/emitters.py b/snowplow_tracker/emitters.py index 3d71b767..20acda8e 100644 --- a/snowplow_tracker/emitters.py +++ b/snowplow_tracker/emitters.py @@ -24,6 +24,7 @@ import time import threading import requests +import random from typing import Optional, Union, Tuple from queue import Queue @@ -63,11 +64,13 @@ def __init__( protocol: HttpProtocol = "https", port: Optional[int] = None, method: Method = "post", - buffer_size: Optional[int] = None, + batch_size: Optional[int] = None, on_success: Optional[SuccessCallback] = None, on_failure: Optional[FailureCallback] = None, byte_limit: Optional[int] = None, request_timeout: Optional[Union[float, Tuple[float, float]]] = None, + max_retry_delay_seconds: int = 60, + buffer_capacity: int = 10000, ) -> None: """ :param endpoint: The collector URL. If protocol is not set in endpoint it will automatically set to "https://" - this is done automatically. @@ -78,8 +81,8 @@ def __init__( :type port: int | None :param method: The HTTP request method. Defaults to post. :type method: method - :param buffer_size: The maximum number of queued events before the buffer is flushed. Default is 10. - :type buffer_size: int | None + :param batch_size: The maximum number of queued events before the buffer is flushed. Default is 10. + :type batch_size: int | None :param on_success: Callback executed after every HTTP request in a flush has status code 200 Gets passed the number of events flushed. :type on_success: function | None @@ -95,6 +98,11 @@ def __init__( applies to both "connect" AND "read" timeout, or as tuple with two float values which specify the "connect" and "read" timeouts separately :type request_timeout: float | tuple | None + :param max_retry_delay_seconds: Set the maximum time between attempts to send failed events to the collector. Default 60 seconds + :type max_retry_delay_seconds: int + :param buffer_capacity: The maximum capacity of the event buffer. The default buffer capacity is 10 000 events. + When the buffer is full new events are lost. + :type buffer_capacity: int """ one_of(protocol, PROTOCOLS) one_of(method, METHODS) @@ -103,12 +111,15 @@ def __init__( self.method = method - if buffer_size is None: + if batch_size is None: if method == "post": - buffer_size = DEFAULT_MAX_LENGTH + batch_size = DEFAULT_MAX_LENGTH else: - buffer_size = 1 - self.buffer_size = buffer_size + batch_size = 1 + + if batch_size > buffer_capacity: + batch_size = buffer_capacity + self.batch_size = batch_size self.buffer = [] self.byte_limit = byte_limit self.bytes_queued = None if byte_limit is None else 0 @@ -119,8 +130,13 @@ def __init__( self.lock = threading.RLock() - self.timer = None + self.timer = FlushTimer(emitter=self, repeating=True) + self.retry_timer = FlushTimer(emitter=self, repeating=False) + self.max_retry_delay_seconds = max_retry_delay_seconds + self.retry_delay = 0 + + self.buffer_capacity = buffer_capacity logger.info("Emitter initialized with endpoint " + self.endpoint) @staticmethod @@ -187,30 +203,33 @@ def reached_limit(self) -> bool: :rtype: bool """ if self.byte_limit is None: - return len(self.buffer) >= self.buffer_size + return len(self.buffer) >= self.batch_size else: return (self.bytes_queued or 0) >= self.byte_limit or len( self.buffer - ) >= self.buffer_size + ) >= self.batch_size def flush(self) -> None: """ Sends all events in the buffer to the collector. """ with self.lock: - self.send_events(self.buffer) + if self.retry_timer.is_active(): + return + + send_events = self.buffer self.buffer = [] + self.send_events(send_events) if self.bytes_queued is not None: self.bytes_queued = 0 - def http_post(self, data: str) -> bool: + def http_post(self, data: str) -> int: """ :param data: The array of JSONs to be sent :type data: string """ logger.info("Sending POST request to %s..." % self.endpoint) logger.debug("Payload: %s" % data) - post_succeeded = False try: r = requests.post( self.endpoint, @@ -218,35 +237,28 @@ def http_post(self, data: str) -> bool: headers={"Content-Type": "application/json; charset=utf-8"}, timeout=self.request_timeout, ) - post_succeeded = Emitter.is_good_status_code(r.status_code) - getattr(logger, "info" if post_succeeded else "warning")( - "POST request finished with status code: " + str(r.status_code) - ) except requests.RequestException as e: logger.warning(e) + return -1 - return post_succeeded + return r.status_code - def http_get(self, payload: PayloadDict) -> bool: + def http_get(self, payload: PayloadDict) -> int: """ :param payload: The event properties :type payload: dict(string:\\*) """ logger.info("Sending GET request to %s..." % self.endpoint) logger.debug("Payload: %s" % payload) - get_succeeded = False try: r = requests.get( self.endpoint, params=payload, timeout=self.request_timeout ) - get_succeeded = Emitter.is_good_status_code(r.status_code) - getattr(logger, "info" if get_succeeded else "warning")( - "GET request finished with status code: " + str(r.status_code) - ) except requests.RequestException as e: logger.warning(e) + return -1 - return get_succeeded + return r.status_code def sync_flush(self) -> None: """ @@ -254,7 +266,7 @@ def sync_flush(self) -> None: This is guaranteed to be blocking, not asynchronous. """ logger.debug("Starting synchronous flush...") - Emitter.flush(self) + self.flush() logger.info("Finished synchronous flush") @staticmethod @@ -264,7 +276,7 @@ def is_good_status_code(status_code: int) -> bool: :type status_code: int :rtype: bool """ - return 200 <= status_code < 400 + return 200 <= status_code < 300 def send_events(self, evts: PayloadDictList) -> None: """ @@ -280,7 +292,8 @@ def send_events(self, evts: PayloadDictList) -> None: if self.method == "post": data = SelfDescribingJson(PAYLOAD_DATA_SCHEMA, evts).to_string() - request_succeeded = self.http_post(data) + status_code = self.http_post(data) + request_succeeded = Emitter.is_good_status_code(status_code) if request_succeeded: success_events += evts else: @@ -288,7 +301,9 @@ def send_events(self, evts: PayloadDictList) -> None: elif self.method == "get": for evt in evts: - request_succeeded = self.http_get(evt) + status_code = self.http_get(evt) + request_succeeded = Emitter.is_good_status_code(status_code) + if request_succeeded: success_events += [evt] else: @@ -299,33 +314,36 @@ def send_events(self, evts: PayloadDictList) -> None: if self.on_failure is not None and len(failure_events) > 0: self.on_failure(len(success_events), failure_events) + if self._should_retry(status_code): + self._set_retry_delay() + self._retry_failed_events(failure_events) + else: + self._reset_retry_delay() else: logger.info("Skipping flush since buffer is empty") - def set_flush_timer(self, timeout: float, flush_now: bool = False) -> None: + def _set_retry_timer(self, timeout: float) -> None: """ - Set an interval at which the buffer will be flushed + Set an interval at which failed events will be retried :param timeout: interval in seconds :type timeout: int | float - :param flush_now: immediately flush buffer - :type flush_now: bool """ + self.retry_timer.start(timeout=timeout) - # Repeatable create new timer - if flush_now: - self.flush() - self.timer = threading.Timer(timeout, self.set_flush_timer, [timeout, True]) - self.timer.daemon = True - self.timer.start() + def set_flush_timer(self, timeout: float) -> None: + """ + Set an interval at which the buffer will be flushed + :param timeout: interval in seconds + :type timeout: int | float + """ + self.timer.start(timeout=timeout) def cancel_flush_timer(self) -> None: """ Abort automatic async flushing """ - - if self.timer is not None: - self.timer.cancel() + self.timer.cancel() @staticmethod def attach_sent_timestamp(events: PayloadDictList) -> None: @@ -344,6 +362,59 @@ def update(e: PayloadDict) -> None: for event in events: update(event) + def _should_retry(self, status_code: int) -> bool: + """ + Checks if a request should be retried + + :param status_code: Response status code + :type status_code: int + :rtype: bool + """ + if Emitter.is_good_status_code(status_code): + return False + + return status_code not in [400, 401, 403, 410, 422] + + def _set_retry_delay(self) -> None: + """ + Sets a delay to retry failed events + """ + random_noise = random.random() + self.retry_delay = min(self.retry_delay * 2 + random_noise, self.max_retry_delay_seconds) + + def _reset_retry_delay(self) -> None: + """ + Resets retry delay to 0 + """ + self.retry_delay = 0 + + def _retry_failed_events(self, failed_events) -> None: + """ + Adds failed events back to the buffer to retry + + :param failed_events: List of failed events + :type List + """ + for event in failed_events: + if not event in self.buffer and not self._buffer_capacity_reached(): + self.buffer.append(event) + + self._set_retry_timer(self.retry_delay) + + def _buffer_capacity_reached(self) -> bool: + """ + Returns true if buffer capacity is reached + + :rtype: bool + """ + return len(self.buffer) >= self.buffer_capacity + + def _cancel_retry_timer(self) -> None: + """ + Cancels a retry timer + """ + self.retry_timer.cancel() + class AsyncEmitter(Emitter): """ @@ -356,14 +427,16 @@ def __init__( protocol: HttpProtocol = "http", port: Optional[int] = None, method: Method = "post", - buffer_size: Optional[int] = None, + batch_size: Optional[int] = None, on_success: Optional[SuccessCallback] = None, on_failure: Optional[FailureCallback] = None, thread_count: int = 1, byte_limit: Optional[int] = None, + max_retry_delay_seconds: int = 60, + buffer_capacity: int = 10000, ) -> None: """ - :param endpoint: The collector URL. Don't include "http://" - this is done automatically. + :param endpoint: The collector URL. If protocol is not set in endpoint it will automatically set to "https://" - this is done automatically. :type endpoint: string :param protocol: The protocol to use - http or https. Defaults to http. :type protocol: protocol @@ -371,8 +444,8 @@ def __init__( :type port: int | None :param method: The HTTP request method :type method: method - :param buffer_size: The maximum number of queued events before the buffer is flushed. Default is 10. - :type buffer_size: int | None + :param batch_size: The maximum number of queued events before the buffer is flushed. Default is 10. + :type batch_size: int | None :param on_success: Callback executed after every HTTP request in a flush has status code 200 Gets passed the number of events flushed. :type on_success: function | None @@ -386,16 +459,23 @@ def __init__( :type thread_count: int :param byte_limit: The size event list after reaching which queued events will be flushed :type byte_limit: int | None + :param max_retry_delay_seconds: Set the maximum time between attempts to send failed events to the collector. Default 60 seconds + :type max_retry_delay_seconds: int + :param buffer_capacity: The maximum capacity of the event buffer. The default buffer capacity is 10,000 events. + When the buffer is full new events are lost. + :type buffer_capacity: int """ super(AsyncEmitter, self).__init__( endpoint, protocol, port, method, - buffer_size, + batch_size, on_success, on_failure, byte_limit, + max_retry_delay_seconds, + buffer_capacity ) self.queue = Queue() for i in range(thread_count): @@ -426,3 +506,47 @@ def consume(self) -> None: evts = self.queue.get() self.send_events(evts) self.queue.task_done() + + +class FlushTimer(object): + """ + Internal class used by the Emitter to schedule flush calls for later. + """ + + def __init__(self, emitter: Emitter, repeating: bool): + self.emitter = emitter + self.repeating = repeating + self.timer: Optional[threading.Timer] = None + self.lock = threading.RLock() + + def start(self, timeout: float) -> bool: + with self.lock: + if self.timer is not None: + return False + else: + self._schedule_timer(timeout=timeout) + return True + + def cancel(self) -> None: + with self.lock: + if self.timer is not None: + self.timer.cancel() + self.timer = None + + def is_active(self) -> bool: + with self.lock: + return self.timer is not None + + def _fire(self, timeout: float) -> None: + with self.lock: + if self.repeating: + self._schedule_timer(timeout) + else: + self.timer = None + + self.emitter.flush() + + def _schedule_timer(self, timeout: float) -> None: + self.timer = threading.Timer(timeout, self._fire, [timeout]) + self.timer.daemon = True + self.timer.start() diff --git a/snowplow_tracker/snowplow.py b/snowplow_tracker/snowplow.py index 7b08c071..8dd31645 100644 --- a/snowplow_tracker/snowplow.py +++ b/snowplow_tracker/snowplow.py @@ -77,7 +77,7 @@ def create_tracker( emitter = Emitter( endpoint, method=method, - buffer_size=emitter_config.buffer_size, + batch_size=emitter_config.batch_size, on_success=emitter_config.on_success, on_failure=emitter_config.on_failure, byte_limit=emitter_config.byte_limit, diff --git a/snowplow_tracker/test/integration/test_integration.py b/snowplow_tracker/test/integration/test_integration.py index c487bb70..05e2ee96 100644 --- a/snowplow_tracker/test/integration/test_integration.py +++ b/snowplow_tracker/test/integration/test_integration.py @@ -36,7 +36,7 @@ querystrings = [""] -default_emitter = emitters.Emitter("localhost", protocol="http", port=80, buffer_size=1) +default_emitter = emitters.Emitter("localhost", protocol="http", port=80, batch_size=1) get_emitter = emitters.Emitter("localhost", protocol="http", port=80, method='get') @@ -333,7 +333,7 @@ def test_post_page_view(self) -> None: self.assertEqual(request["data"][0][key], expected_fields[key]) def test_post_batched(self) -> None: - default_emitter = emitters.Emitter("localhost", protocol="http", port=80, buffer_size=2) + default_emitter = emitters.Emitter("localhost", protocol="http", port=80, batch_size=2) t = tracker.Tracker(default_emitter, default_subject) with HTTMock(pass_post_response_content): t.track_struct_event("Test", "A") @@ -343,7 +343,7 @@ def test_post_batched(self) -> None: @freeze_time("2021-04-19 00:00:01") # unix: 1618790401000 def test_timestamps(self) -> None: - emitter = emitters.Emitter("localhost", protocol="http", port=80, buffer_size=3) + emitter = emitters.Emitter("localhost", protocol="http", port=80, batch_size=3) t = tracker.Tracker([emitter], default_subject) with HTTMock(pass_post_response_content): t.track_page_view("localhost", "stamp0", None, tstamp=None) @@ -363,7 +363,7 @@ def test_timestamps(self) -> None: self.assertEqual(request["data"][i].get("stm"), expected_timestamps[i]["stm"]) def test_bytelimit(self) -> None: - default_emitter = emitters.Emitter("localhost", protocol="http", port=80, buffer_size=5, byte_limit=420) + default_emitter = emitters.Emitter("localhost", protocol="http", port=80, batch_size=5, byte_limit=420) t = tracker.Tracker(default_emitter, default_subject) with HTTMock(pass_post_response_content): t.track_struct_event("Test", "A") # 140 bytes diff --git a/snowplow_tracker/test/unit/test_emitters.py b/snowplow_tracker/test/unit/test_emitters.py index 51b107d0..c8790915 100644 --- a/snowplow_tracker/test/unit/test_emitters.py +++ b/snowplow_tracker/test/unit/test_emitters.py @@ -46,6 +46,14 @@ def mocked_http_success(*args: Any) -> bool: def mocked_http_failure(*args: Any) -> bool: return False +def mocked_http_response_success(*args: Any) -> int: + return 200 + +def mocked_http_response_failure(*args: Any) -> int: + return 400 + +def mocked_http_response_failure_retry(*args: Any) -> int: + return 500 class TestEmitters(unittest.TestCase): @@ -56,22 +64,22 @@ def test_init(self) -> None: e = Emitter('0.0.0.0') self.assertEqual(e.endpoint, 'https://0.0.0.0/com.snowplowanalytics.snowplow/tp2') self.assertEqual(e.method, 'post') - self.assertEqual(e.buffer_size, 10) + self.assertEqual(e.batch_size, 10) self.assertEqual(e.buffer, []) self.assertIsNone(e.byte_limit) self.assertIsNone(e.bytes_queued) self.assertIsNone(e.on_success) self.assertIsNone(e.on_failure) - self.assertIsNone(e.timer) + self.assertFalse(e.timer.is_active()) self.assertIsNone(e.request_timeout) - def test_init_buffer_size(self) -> None: - e = Emitter('0.0.0.0', buffer_size=10) - self.assertEqual(e.buffer_size, 10) + def test_init_batch_size(self) -> None: + e = Emitter('0.0.0.0', batch_size=10) + self.assertEqual(e.batch_size, 10) def test_init_post(self) -> None: e = Emitter('0.0.0.0') - self.assertEqual(e.buffer_size, DEFAULT_MAX_LENGTH) + self.assertEqual(e.batch_size, DEFAULT_MAX_LENGTH) def test_init_byte_limit(self) -> None: e = Emitter('0.0.0.0', byte_limit=512) @@ -113,7 +121,7 @@ def test_as_collector_uri_endpoint_protocol_http(self) -> None: def test_input_no_flush(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', method="get", buffer_size=2) + e = Emitter('0.0.0.0', method="get", batch_size=2) nvPairs = {"n0": "v0", "n1": "v1"} e.input(nvPairs) @@ -127,7 +135,7 @@ def test_input_no_flush(self, mok_flush: Any) -> None: def test_input_flush_byte_limit(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', method="get", buffer_size=2, byte_limit=16) + e = Emitter('0.0.0.0', method="get", batch_size=2, byte_limit=16) nvPairs = {"n0": "v0", "n1": "v1"} e.input(nvPairs) @@ -140,7 +148,7 @@ def test_input_flush_byte_limit(self, mok_flush: Any) -> None: def test_input_flush_buffer(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', method="get", buffer_size=2, byte_limit=1024) + e = Emitter('0.0.0.0', method="get", batch_size=2, byte_limit=1024) nvPairs = {"n0": "v0", "n1": "v1"} e.input(nvPairs) @@ -159,7 +167,7 @@ def test_input_flush_buffer(self, mok_flush: Any) -> None: def test_input_bytes_queued(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', method="get", buffer_size=2, byte_limit=1024) + e = Emitter('0.0.0.0', method="get", batch_size=2, byte_limit=1024) nvPairs = {"n0": "v0", "n1": "v1"} e.input(nvPairs) @@ -183,7 +191,7 @@ def test_input_bytes_post(self, mok_flush: Any) -> None: def test_flush(self, mok_send_events: Any) -> None: mok_send_events.side_effect = mocked_send_events - e = Emitter('0.0.0.0', buffer_size=2, byte_limit=None) + e = Emitter('0.0.0.0', batch_size=2, byte_limit=None) nvPairs = {"n": "v"} e.input(nvPairs) e.input(nvPairs) @@ -195,7 +203,7 @@ def test_flush(self, mok_send_events: Any) -> None: def test_flush_bytes_queued(self, mok_send_events: Any) -> None: mok_send_events.side_effect = mocked_send_events - e = Emitter('0.0.0.0', buffer_size=2, byte_limit=256) + e = Emitter('0.0.0.0', batch_size=2, byte_limit=256) nvPairs = {"n": "v"} e.input(nvPairs) e.input(nvPairs) @@ -219,7 +227,7 @@ def test_attach_sent_tstamp(self) -> None: def test_flush_timer(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', buffer_size=10) + e = Emitter('0.0.0.0', batch_size=10) ev_list = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] for i in ev_list: e.input(i) @@ -227,15 +235,15 @@ def test_flush_timer(self, mok_flush: Any) -> None: e.set_flush_timer(3) self.assertEqual(len(e.buffer), 3) time.sleep(5) - self.assertEqual(mok_flush.call_count, 1) + self.assertGreaterEqual(mok_flush.call_count, 1) @mock.patch('snowplow_tracker.Emitter.http_get') def test_send_events_get_success(self, mok_http_get: Any) -> None: - mok_http_get.side_effect = mocked_http_success + mok_http_get.side_effect = mocked_http_response_success mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', method="get", buffer_size=10, on_success=mok_success, on_failure=mok_failure) + e = Emitter('0.0.0.0', method="get", batch_size=10, on_success=mok_success, on_failure=mok_failure) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) @@ -244,11 +252,11 @@ def test_send_events_get_success(self, mok_http_get: Any) -> None: @mock.patch('snowplow_tracker.Emitter.http_get') def test_send_events_get_failure(self, mok_http_get: Any) -> None: - mok_http_get.side_effect = mocked_http_failure + mok_http_get.side_effect = mocked_http_response_failure mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', method="get", buffer_size=10, on_success=mok_success, on_failure=mok_failure) + e = Emitter('0.0.0.0', method="get", batch_size=10, on_success=mok_success, on_failure=mok_failure) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) @@ -257,11 +265,11 @@ def test_send_events_get_failure(self, mok_http_get: Any) -> None: @mock.patch('snowplow_tracker.Emitter.http_post') def test_send_events_post_success(self, mok_http_post: Any) -> None: - mok_http_post.side_effect = mocked_http_success + mok_http_post.side_effect = mocked_http_response_success mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', buffer_size=10, on_success=mok_success, on_failure=mok_failure) + e = Emitter('0.0.0.0', batch_size=10, on_success=mok_success, on_failure=mok_failure) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) @@ -270,11 +278,11 @@ def test_send_events_post_success(self, mok_http_post: Any) -> None: @mock.patch('snowplow_tracker.Emitter.http_post') def test_send_events_post_failure(self, mok_http_post: Any) -> None: - mok_http_post.side_effect = mocked_http_failure + mok_http_post.side_effect = mocked_http_response_failure mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', buffer_size=10, on_success=mok_success, on_failure=mok_failure) + e = Emitter('0.0.0.0', batch_size=10, on_success=mok_success, on_failure=mok_failure) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) @@ -285,7 +293,8 @@ def test_send_events_post_failure(self, mok_http_post: Any) -> None: def test_http_post_connect_timeout_error(self, mok_post_request: Any) -> None: mok_post_request.side_effect = ConnectTimeout e = Emitter('0.0.0.0') - post_succeeded = e.http_post("dummy_string") + response = e.http_post("dummy_string") + post_succeeded = Emitter.is_good_status_code(response) self.assertFalse(post_succeeded) @@ -293,8 +302,8 @@ def test_http_post_connect_timeout_error(self, mok_post_request: Any) -> None: def test_http_get_connect_timeout_error(self, mok_post_request: Any) -> None: mok_post_request.side_effect = ConnectTimeout e = Emitter('0.0.0.0', method='get') - get_succeeded = e.http_get({"a": "b"}) - + response = e.http_get({"a": "b"}) + get_succeeded = Emitter.is_good_status_code(response) self.assertFalse(get_succeeded) ### @@ -304,7 +313,7 @@ def test_http_get_connect_timeout_error(self, mok_post_request: Any) -> None: def test_async_emitter_input(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - ae = AsyncEmitter('0.0.0.0', port=9090, method="get", buffer_size=3, thread_count=5) + ae = AsyncEmitter('0.0.0.0', port=9090, method="get", batch_size=3, thread_count=5) self.assertTrue(ae.queue.empty()) ae.input({"a": "aa"}) @@ -320,7 +329,7 @@ def test_async_emitter_input(self, mok_flush: Any) -> None: def test_async_emitter_sync_flash(self, mok_send_events: Any) -> None: mok_send_events.side_effect = mocked_send_events - ae = AsyncEmitter('0.0.0.0', port=9090, method="get", buffer_size=3, thread_count=5, byte_limit=1024) + ae = AsyncEmitter('0.0.0.0', port=9090, method="get", batch_size=3, thread_count=5, byte_limit=1024) self.assertTrue(ae.queue.empty()) ae.input({"a": "aa"}) @@ -336,11 +345,11 @@ def test_async_emitter_sync_flash(self, mok_send_events: Any) -> None: @mock.patch('snowplow_tracker.Emitter.http_get') def test_async_send_events_get_success(self, mok_http_get: Any) -> None: - mok_http_get.side_effect = mocked_http_success + mok_http_get.side_effect = mocked_http_response_success mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - ae = AsyncEmitter('0.0.0.0', method="get", buffer_size=10, on_success=mok_success, on_failure=mok_failure) + ae = AsyncEmitter('0.0.0.0', method="get", batch_size=10, on_success=mok_success, on_failure=mok_failure) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] ae.send_events(evBuffer) @@ -349,11 +358,11 @@ def test_async_send_events_get_success(self, mok_http_get: Any) -> None: @mock.patch('snowplow_tracker.Emitter.http_get') def test_async_send_events_get_failure(self, mok_http_get: Any) -> None: - mok_http_get.side_effect = mocked_http_failure + mok_http_get.side_effect = mocked_http_response_failure mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - ae = AsyncEmitter('0.0.0.0', method="get", buffer_size=10, on_success=mok_success, on_failure=mok_failure) + ae = AsyncEmitter('0.0.0.0', method="get", batch_size=10, on_success=mok_success, on_failure=mok_failure) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] ae.send_events(evBuffer) @@ -362,11 +371,11 @@ def test_async_send_events_get_failure(self, mok_http_get: Any) -> None: @mock.patch('snowplow_tracker.Emitter.http_post') def test_async_send_events_post_success(self, mok_http_post: Any) -> None: - mok_http_post.side_effect = mocked_http_success + mok_http_post.side_effect = mocked_http_response_success mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - ae = Emitter('0.0.0.0', buffer_size=10, on_success=mok_success, on_failure=mok_failure) + ae = Emitter('0.0.0.0', batch_size=10, on_success=mok_success, on_failure=mok_failure) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] ae.send_events(evBuffer) @@ -375,11 +384,11 @@ def test_async_send_events_post_success(self, mok_http_post: Any) -> None: @mock.patch('snowplow_tracker.Emitter.http_post') def test_async_send_events_post_failure(self, mok_http_post: Any) -> None: - mok_http_post.side_effect = mocked_http_failure + mok_http_post.side_effect = mocked_http_response_failure mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - ae = Emitter('0.0.0.0', buffer_size=10, on_success=mok_success, on_failure=mok_failure) + ae = Emitter('0.0.0.0', batch_size=10, on_success=mok_success, on_failure=mok_failure) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] ae.send_events(evBuffer) @@ -392,7 +401,7 @@ def test_input_unicode_get(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush payload = {"unicode": u'\u0107', "alsoAscii": "abc"} - ae = AsyncEmitter('0.0.0.0', method="get", buffer_size=2) + ae = AsyncEmitter('0.0.0.0', method="get", batch_size=2) ae.input(payload) self.assertEqual(len(ae.buffer), 1) @@ -403,8 +412,66 @@ def test_input_unicode_post(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush payload = {"unicode": u'\u0107', "alsoAscii": "abc"} - ae = AsyncEmitter('0.0.0.0', buffer_size=2) + ae = AsyncEmitter('0.0.0.0', batch_size=2) ae.input(payload) self.assertEqual(len(ae.buffer), 1) self.assertDictEqual(payload, ae.buffer[0]) + + @mock.patch('snowplow_tracker.Emitter.http_post') + def test_send_events_post_retry(self, mok_http_post: Any) -> None: + mok_http_post.side_effect = mocked_http_response_failure_retry + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter('0.0.0.0', batch_size=10, on_success=mok_success, on_failure=mok_failure) + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + + mok_http_post.side_effect = mocked_http_response_success + time.sleep(5) + + mok_failure.assert_called_with(0, evBuffer) + mok_success.assert_called_with(evBuffer) + + @mock.patch('snowplow_tracker.Emitter.http_get') + def test_send_events_get_retry(self, mok_http_get: Any) -> None: + mok_http_get.side_effect = mocked_http_response_failure_retry + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter('0.0.0.0', method='get', batch_size=1, on_success=mok_success, on_failure=mok_failure) + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + + mok_http_get.side_effect = mocked_http_response_success + time.sleep(5) + + mok_failure.assert_called_with(0, evBuffer) + mok_success.assert_called_with(evBuffer) + + @mock.patch('snowplow_tracker.Emitter.http_get') + def test_send_events_get_no_retry(self, mok_http_get: Any) -> None: + mok_http_get.side_effect = mocked_http_response_failure + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter('0.0.0.0', method='get', batch_size=1, on_success=mok_success, on_failure=mok_failure) + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + + mok_failure.assert_called_once_with(0, evBuffer) + mok_success.assert_not_called() + + @mock.patch('snowplow_tracker.Emitter.http_post') + def test_send_events_post_no_retry(self, mok_http_post: Any) -> None: + mok_http_post.side_effect = mocked_http_response_failure + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter('0.0.0.0', method='get', batch_size=1, on_success=mok_success, on_failure=mok_failure) + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + + mok_failure.assert_called_once_with(0, evBuffer) + mok_success.assert_not_called() From 3335069198b5eaf7f8df451884f187887a29e848 Mon Sep 17 00:00:00 2001 From: Jack Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Tue, 10 Jan 2023 10:31:11 +0000 Subject: [PATCH 06/11] Add customizable no-retry HTTP status codes (close #297) PR #307 * Add custom retry codes to emitter class * Add custom retry codes to Snowplow class * Add custom retry codes to emitter configuration * Add custom retry unit tests --- examples/snowplow_app.py | 3 +- snowplow_tracker/emitter_configuration.py | 47 +++++++++++++++++++-- snowplow_tracker/emitters.py | 11 ++++- snowplow_tracker/snowplow.py | 1 + snowplow_tracker/test/unit/test_emitters.py | 33 +++++++++++++++ 5 files changed, 90 insertions(+), 5 deletions(-) diff --git a/examples/snowplow_app.py b/examples/snowplow_app.py index 7ce2c1d8..9cc9f093 100644 --- a/examples/snowplow_app.py +++ b/examples/snowplow_app.py @@ -18,7 +18,8 @@ def main(): collector_url = get_url_from_args() # Configure Emitter - emitter_config = EmitterConfiguration(batch_size=5) + custom_retry_codes = {500: False, 401: True} + emitter_config = EmitterConfiguration(batch_size=5, custom_retry_codes=custom_retry_codes) # Configure Tracker tracker_config = TrackerConfiguration(encode_base64=True) diff --git a/snowplow_tracker/emitter_configuration.py b/snowplow_tracker/emitter_configuration.py index c38823a1..77f80bc5 100644 --- a/snowplow_tracker/emitter_configuration.py +++ b/snowplow_tracker/emitter_configuration.py @@ -19,7 +19,7 @@ # License: Apache License Version 2.0 # """ -from typing import Optional, Union, Tuple +from typing import Optional, Union, Tuple, Dict from snowplow_tracker.typing import SuccessCallback, FailureCallback @@ -31,7 +31,8 @@ def __init__( on_failure: Optional[FailureCallback] = None, byte_limit: Optional[int] = None, request_timeout: Optional[Union[float, Tuple[float, float]]] = None, - buffer_capacity: Optional[int] = None + buffer_capacity: Optional[int] = None, + custom_retry_codes: Dict[int, bool] = {} ) -> None: """ Configuration for the emitter that sends events to the Snowplow collector. @@ -52,6 +53,10 @@ def __init__( applies to both "connect" AND "read" timeout, or as tuple with two float values which specify the "connect" and "read" timeouts separately :type request_timeout: float | tuple | None + :param custom_retry_codes: Set custom retry rules for HTTP status codes received in emit responses from the Collector. + By default, retry will not occur for status codes 400, 401, 403, 410 or 422. This can be overridden here. + Note that 2xx codes will never retry as they are considered successful. + :type custom_retry_codes: dict """ self.batch_size = batch_size @@ -60,6 +65,7 @@ def __init__( self.byte_limit = byte_limit self.request_timeout = request_timeout self.buffer_capacity = buffer_capacity + self.custom_retry_codes = custom_retry_codes @property def batch_size(self) -> Optional[int]: @@ -144,4 +150,39 @@ def buffer_capacity(self, value: Optional[int]): raise ValueError("buffer_capacity must greater than 0") if not isinstance(value, int) and value is not None: raise ValueError("buffer_capacity must be of type int") - self._buffer_capacity = value \ No newline at end of file + self._buffer_capacity = value + + @property + def custom_retry_codes(self) -> Dict[int, bool]: + """ + Custom retry rules for HTTP status codes received in emit responses from the Collector. + """ + return self._custom_retry_codes + + @custom_retry_codes.setter + def custom_retry_codes(self, value: Dict[int, bool]): + self._custom_retry_codes = value + def set_retry_code(self, status_code: int, retry = True) -> bool: + """ + Add a retry rule for HTTP status code received from emit responses from the Collector. + :param status_code: HTTP response code + :type status_code: int + :param retry: Set the status_code to retry (True) or not retry (False). Default is True + :type retry: bool + """ + if not isinstance(status_code, int): + print("status_code must be of type int") + return False + + if not isinstance(retry, bool): + print("retry must be of type bool") + return False + + if 200 <= status_code < 300: + print("custom_retry_codes should not include codes for succesful requests (2XX codes)") + return False + + self.custom_retry_codes[status_code] = retry + + return status_code in self.custom_retry_codes.keys() + diff --git a/snowplow_tracker/emitters.py b/snowplow_tracker/emitters.py index 20acda8e..3beaaecf 100644 --- a/snowplow_tracker/emitters.py +++ b/snowplow_tracker/emitters.py @@ -25,7 +25,7 @@ import threading import requests import random -from typing import Optional, Union, Tuple +from typing import Optional, Union, Tuple, Dict from queue import Queue from snowplow_tracker.self_describing_json import SelfDescribingJson @@ -71,6 +71,7 @@ def __init__( request_timeout: Optional[Union[float, Tuple[float, float]]] = None, max_retry_delay_seconds: int = 60, buffer_capacity: int = 10000, + custom_retry_codes: Dict[int, bool] = {} ) -> None: """ :param endpoint: The collector URL. If protocol is not set in endpoint it will automatically set to "https://" - this is done automatically. @@ -103,6 +104,10 @@ def __init__( :param buffer_capacity: The maximum capacity of the event buffer. The default buffer capacity is 10 000 events. When the buffer is full new events are lost. :type buffer_capacity: int + :param custom_retry_codes: Set custom retry rules for HTTP status codes received in emit responses from the Collector. + By default, retry will not occur for status codes 400, 401, 403, 410 or 422. This can be overridden here. + Note that 2xx codes will never retry as they are considered successful. + :type custom_retry_codes: dict """ one_of(protocol, PROTOCOLS) one_of(method, METHODS) @@ -137,6 +142,7 @@ def __init__( self.retry_delay = 0 self.buffer_capacity = buffer_capacity + self.custom_retry_codes = custom_retry_codes logger.info("Emitter initialized with endpoint " + self.endpoint) @staticmethod @@ -373,6 +379,9 @@ def _should_retry(self, status_code: int) -> bool: if Emitter.is_good_status_code(status_code): return False + if status_code in self.custom_retry_codes.keys(): + return self.custom_retry_codes[status_code] + return status_code not in [400, 401, 403, 410, 422] def _set_retry_delay(self) -> None: diff --git a/snowplow_tracker/snowplow.py b/snowplow_tracker/snowplow.py index 8dd31645..946802e7 100644 --- a/snowplow_tracker/snowplow.py +++ b/snowplow_tracker/snowplow.py @@ -82,6 +82,7 @@ def create_tracker( on_failure=emitter_config.on_failure, byte_limit=emitter_config.byte_limit, request_timeout=emitter_config.request_timeout, + custom_retry_codes=emitter_config.custom_retry_codes ) tracker = Tracker( diff --git a/snowplow_tracker/test/unit/test_emitters.py b/snowplow_tracker/test/unit/test_emitters.py index c8790915..439b66a8 100644 --- a/snowplow_tracker/test/unit/test_emitters.py +++ b/snowplow_tracker/test/unit/test_emitters.py @@ -475,3 +475,36 @@ def test_send_events_post_no_retry(self, mok_http_post: Any) -> None: mok_failure.assert_called_once_with(0, evBuffer) mok_success.assert_not_called() + + @mock.patch('snowplow_tracker.Emitter.http_post') + def test_send_events_post_custom_retry(self, mok_http_post: Any) -> None: + mok_http_post.side_effect = mocked_http_response_failure + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter('0.0.0.0', batch_size=10, on_success=mok_success, on_failure=mok_failure, custom_retry_codes={400: True}) + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + + mok_http_post.side_effect = mocked_http_response_success + time.sleep(5) + + mok_failure.assert_called_with(0, evBuffer) + mok_success.assert_called_with(evBuffer) + + @mock.patch('snowplow_tracker.Emitter.http_get') + def test_send_events_get_custom_retry(self, mok_http_get: Any) -> None: + mok_http_get.side_effect = mocked_http_response_failure + mok_success = mock.Mock(return_value="success mocked") + mok_failure = mock.Mock(return_value="failure mocked") + + e = Emitter('0.0.0.0', method='get',batch_size=10, on_success=mok_success, on_failure=mok_failure, custom_retry_codes={400: True}) + evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] + e.send_events(evBuffer) + + mok_http_get.side_effect = mocked_http_response_success + time.sleep(5) + + mok_failure.assert_called_with(0, evBuffer) + mok_success.assert_called_with(evBuffer) + From dccef831a493641ea55c6e244050569e50d9693c Mon Sep 17 00:00:00 2001 From: Jack Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Thu, 12 Jan 2023 09:08:38 +0000 Subject: [PATCH 07/11] Rename unstruct_event to self_describing_event (close #298) PR #308 * Update unstruct_event to self_describing_event * Add deprecation warning to track_unstruct_event --- .../test/integration/test_integration.py | 8 ++-- snowplow_tracker/test/unit/test_tracker.py | 46 +++++++++---------- snowplow_tracker/tracker.py | 40 ++++++++++++---- 3 files changed, 58 insertions(+), 36 deletions(-) diff --git a/snowplow_tracker/test/integration/test_integration.py b/snowplow_tracker/test/integration/test_integration.py index 05e2ee96..b9ec214d 100644 --- a/snowplow_tracker/test/integration/test_integration.py +++ b/snowplow_tracker/test/integration/test_integration.py @@ -153,10 +153,10 @@ def test_integration_struct_event(self) -> None: for key in expected_fields: self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) - def test_integration_unstruct_event_non_base64(self) -> None: + def test_integration_self_describing_event_non_base64(self) -> None: t = tracker.Tracker([get_emitter], default_subject, encode_base64=False) with HTTMock(pass_response_content): - t.track_unstruct_event(SelfDescribingJson("iglu:com.acme/viewed_product/jsonschema/2-0-2", {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000})) + t.track_self_describing_event(SelfDescribingJson("iglu:com.acme/viewed_product/jsonschema/2-0-2", {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000})) expected_fields = {"e": "ue"} for key in expected_fields: self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) @@ -167,10 +167,10 @@ def test_integration_unstruct_event_non_base64(self) -> None: "data": {"schema": "iglu:com.acme/viewed_product/jsonschema/2-0-2", "data": {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000}} }) - def test_integration_unstruct_event_base64(self) -> None: + def test_integration_self_describing_event_base64(self) -> None: t = tracker.Tracker([get_emitter], default_subject, encode_base64=True) with HTTMock(pass_response_content): - t.track_unstruct_event(SelfDescribingJson("iglu:com.acme/viewed_product/jsonschema/2-0-2", {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000})) + t.track_self_describing_event(SelfDescribingJson("iglu:com.acme/viewed_product/jsonschema/2-0-2", {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000})) expected_fields = {"e": "ue"} for key in expected_fields: self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) diff --git a/snowplow_tracker/test/unit/test_tracker.py b/snowplow_tracker/test/unit/test_tracker.py index 34964e0a..5f361ae8 100644 --- a/snowplow_tracker/test/unit/test_tracker.py +++ b/snowplow_tracker/test/unit/test_tracker.py @@ -159,7 +159,7 @@ def test_get_timestamp_3(self) -> None: self.assertEqual(tstamp, 1000) # 1970-01-01 00:00:01 in ms @mock.patch("snowplow_tracker.Tracker.track") - def test_alias_of_track_unstruct_event(self, mok_track: Any) -> None: + def test_alias_of_track_self_describing_event(self, mok_track: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -441,7 +441,7 @@ def test_complete_payload_event_subject( ### @mock.patch("snowplow_tracker.Tracker.complete_payload") - def test_track_unstruct_event(self, mok_complete_payload: Any) -> None: + def test_track_self_describing_event(self, mok_complete_payload: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -449,7 +449,7 @@ def test_track_unstruct_event(self, mok_complete_payload: Any) -> None: t = Tracker(e, encode_base64=False) evJson = SelfDescribingJson("test.sde.schema", {"n": "v"}) - t.track_unstruct_event(evJson) + t.track_self_describing_event(evJson) self.assertEqual(mok_complete_payload.call_count, 1) completeArgsList = mok_complete_payload.call_args_list[0][0] self.assertEqual(len(completeArgsList), 4) @@ -474,7 +474,7 @@ def test_track_unstruct_event(self, mok_complete_payload: Any) -> None: self.assertTrue(actualTstampArg is None) @mock.patch("snowplow_tracker.Tracker.complete_payload") - def test_track_unstruct_event_all_args(self, mok_complete_payload: Any) -> None: + def test_track_self_describing_event_all_args(self, mok_complete_payload: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -485,7 +485,7 @@ def test_track_unstruct_event_all_args(self, mok_complete_payload: Any) -> None: ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) evContext = [ctx] evTstamp = 1399021242030 - t.track_unstruct_event(evJson, evContext, evTstamp) + t.track_self_describing_event(evJson, evContext, evTstamp) self.assertEqual(mok_complete_payload.call_count, 1) completeArgsList = mok_complete_payload.call_args_list[0][0] self.assertEqual(len(completeArgsList), 4) @@ -510,7 +510,7 @@ def test_track_unstruct_event_all_args(self, mok_complete_payload: Any) -> None: self.assertEqual(actualTstampArg, evTstamp) @mock.patch("snowplow_tracker.Tracker.complete_payload") - def test_track_unstruct_event_encode(self, mok_complete_payload: Any) -> None: + def test_track_self_describing_event_encode(self, mok_complete_payload: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -518,7 +518,7 @@ def test_track_unstruct_event_encode(self, mok_complete_payload: Any) -> None: t = Tracker(e, encode_base64=True) evJson = SelfDescribingJson("test.sde.schema", {"n": "v"}) - t.track_unstruct_event(evJson) + t.track_self_describing_event(evJson) self.assertEqual(mok_complete_payload.call_count, 1) completeArgsList = mok_complete_payload.call_args_list[0][0] self.assertEqual(len(completeArgsList), 4) @@ -829,7 +829,7 @@ def test_track_ecommerce_transaction_with_items( } self.assertDictEqual(secItemCallKwargs, expectedSecItemPairs) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_link_click(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -867,7 +867,7 @@ def test_track_link_click(self, mok_track_unstruct: Any) -> None: self.assertIs(callArgs[1][0], ctx) self.assertEqual(callArgs[2], evTstamp) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_link_click_optional_none(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -891,7 +891,7 @@ def test_track_link_click_optional_none(self, mok_track_unstruct: Any) -> None: self.assertTrue(callArgs[1] is None) self.assertTrue(callArgs[2] is None) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_add_to_cart(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -931,7 +931,7 @@ def test_track_add_to_cart(self, mok_track_unstruct: Any) -> None: self.assertIs(callArgs[1][0], ctx) self.assertEqual(callArgs[2], evTstamp) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_add_to_cart_optional_none(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -953,7 +953,7 @@ def test_track_add_to_cart_optional_none(self, mok_track_unstruct: Any) -> None: self.assertTrue(callArgs[1] is None) self.assertTrue(callArgs[2] is None) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_remove_from_cart(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -993,7 +993,7 @@ def test_track_remove_from_cart(self, mok_track_unstruct: Any) -> None: self.assertIs(callArgs[1][0], ctx) self.assertEqual(callArgs[2], evTstamp) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_remove_from_cart_optional_none( self, mok_track_unstruct: Any ) -> None: @@ -1017,7 +1017,7 @@ def test_track_remove_from_cart_optional_none( self.assertTrue(callArgs[1] is None) self.assertTrue(callArgs[2] is None) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_change(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -1057,7 +1057,7 @@ def test_track_form_change(self, mok_track_unstruct: Any) -> None: self.assertIs(callArgs[1][0], ctx) self.assertEqual(callArgs[2], evTstamp) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_change_optional_none(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -1083,7 +1083,7 @@ def test_track_form_change_optional_none(self, mok_track_unstruct: Any) -> None: self.assertTrue(callArgs[1] is None) self.assertTrue(callArgs[2] is None) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_submit(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -1125,7 +1125,7 @@ def test_track_form_submit(self, mok_track_unstruct: Any) -> None: self.assertIs(callArgs[1][0], ctx) self.assertEqual(callArgs[2], evTstamp) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_submit_invalid_element_type( self, mok_track_unstruct: Any ) -> None: @@ -1155,7 +1155,7 @@ def test_track_form_submit_invalid_element_type( tstamp=evTstamp, ) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_submit_invalid_element_type_disabled_contracts( self, mok_track_unstruct: Any ) -> None: @@ -1200,7 +1200,7 @@ def test_track_form_submit_invalid_element_type_disabled_contracts( self.assertIs(callArgs[1][0], ctx) self.assertEqual(callArgs[2], evTstamp) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_submit_optional_none(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -1218,7 +1218,7 @@ def test_track_form_submit_optional_none(self, mok_track_unstruct: Any) -> None: self.assertTrue(callArgs[1] is None) self.assertTrue(callArgs[2] is None) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_form_submit_empty_elems(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -1234,7 +1234,7 @@ def test_track_form_submit_empty_elems(self, mok_track_unstruct: Any) -> None: self.assertEqual(len(callArgs), 4) self.assertDictEqual(callArgs[0].to_json(), expected) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_site_search(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -1265,7 +1265,7 @@ def test_track_site_search(self, mok_track_unstruct: Any) -> None: self.assertIs(callArgs[1][0], ctx) self.assertEqual(callArgs[2], evTstamp) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_site_search_optional_none(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -1286,7 +1286,7 @@ def test_track_site_search_optional_none(self, mok_track_unstruct: Any) -> None: self.assertTrue(callArgs[1] is None) self.assertTrue(callArgs[2] is None) - @mock.patch("snowplow_tracker.Tracker.track_unstruct_event") + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_screen_view(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() diff --git a/snowplow_tracker/tracker.py b/snowplow_tracker/tracker.py index 4c988fc8..0215bf16 100644 --- a/snowplow_tracker/tracker.py +++ b/snowplow_tracker/tracker.py @@ -22,6 +22,7 @@ import time import uuid from typing import Any, Optional, Union, List, Dict, Sequence +from warnings import warn from snowplow_tracker import payload, _version, SelfDescribingJson from snowplow_tracker import subject as _subject @@ -310,7 +311,7 @@ def track_link_click( "%s/link_click/%s/1-0-1" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - return self.track_unstruct_event(event_json, context, tstamp, event_subject) + return self.track_self_describing_event(event_json, context, tstamp, event_subject) def track_add_to_cart( self, @@ -363,7 +364,7 @@ def track_add_to_cart( "%s/add_to_cart/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - return self.track_unstruct_event(event_json, context, tstamp, event_subject) + return self.track_self_describing_event(event_json, context, tstamp, event_subject) def track_remove_from_cart( self, @@ -416,7 +417,7 @@ def track_remove_from_cart( "%s/remove_from_cart/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - return self.track_unstruct_event(event_json, context, tstamp, event_subject) + return self.track_self_describing_event(event_json, context, tstamp, event_subject) def track_form_change( self, @@ -470,7 +471,7 @@ def track_form_change( "%s/change_form/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - return self.track_unstruct_event(event_json, context, tstamp, event_subject) + return self.track_self_describing_event(event_json, context, tstamp, event_subject) def track_form_submit( self, @@ -511,7 +512,7 @@ def track_form_submit( "%s/submit_form/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - return self.track_unstruct_event(event_json, context, tstamp, event_subject) + return self.track_self_describing_event(event_json, context, tstamp, event_subject) def track_site_search( self, @@ -555,7 +556,7 @@ def track_site_search( "%s/site_search/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - return self.track_unstruct_event(event_json, context, tstamp, event_subject) + return self.track_self_describing_event(event_json, context, tstamp, event_subject) def track_ecommerce_transaction_item( self, @@ -717,7 +718,7 @@ def track_screen_view( screen_view_properties, ) - return self.track_unstruct_event(event_json, context, tstamp, event_subject) + return self.track_self_describing_event(event_json, context, tstamp, event_subject) def track_struct_event( self, @@ -764,7 +765,7 @@ def track_struct_event( return self.complete_payload(pb, context, tstamp, event_subject) - def track_unstruct_event( + def track_self_describing_event( self, event_json: SelfDescribingJson, context: Optional[List[SelfDescribingJson]] = None, @@ -797,7 +798,28 @@ def track_unstruct_event( return self.complete_payload(pb, context, tstamp, event_subject) # Alias - track_self_describing_event = track_unstruct_event + def track_unstruct_event( + self, + event_json: SelfDescribingJson, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[_subject.Subject] = None, + ) -> "Tracker": + """ + :param event_json: The properties of the event. Has two field: + A "data" field containing the event properties and + A "schema" field identifying the schema against which the data is validated + :type event_json: self_describing_json + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker + """ + warn('track_unstruct_event will be deprecated in future versions. Please use track_self_describing_event.', DeprecationWarning, stacklevel=2) + return self.track_self_describing_event(event_json, context, tstamp, event_subject) def flush(self, is_async: bool = False) -> "Tracker": """ From bbb276e87ab32e584a4115f2ed1ce61cbb33bcea Mon Sep 17 00:00:00 2001 From: Jack Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Wed, 18 Jan 2023 13:48:02 +0000 Subject: [PATCH 08/11] EventStore with max limit (close #309) PR #310 * Define EventStore protocol * Add default InMemoryEventStore --- snowplow_tracker/__init__.py | 1 + snowplow_tracker/emitter_configuration.py | 35 +++-- snowplow_tracker/emitters.py | 110 +++++++------- snowplow_tracker/event_store.py | 142 ++++++++++++++++++ snowplow_tracker/snowplow.py | 2 +- snowplow_tracker/test/unit/test_emitters.py | 48 +++--- .../test/unit/test_in_memory_event_store.py | 110 ++++++++++++++ snowplow_tracker/tracker.py | 40 +++-- 8 files changed, 391 insertions(+), 97 deletions(-) create mode 100644 snowplow_tracker/event_store.py create mode 100644 snowplow_tracker/test/unit/test_in_memory_event_store.py diff --git a/snowplow_tracker/__init__.py b/snowplow_tracker/__init__.py index da683ada..8726efd9 100644 --- a/snowplow_tracker/__init__.py +++ b/snowplow_tracker/__init__.py @@ -7,6 +7,7 @@ from snowplow_tracker.tracker_configuration import TrackerConfiguration from snowplow_tracker.snowplow import Snowplow from snowplow_tracker.contracts import disable_contracts, enable_contracts +from snowplow_tracker.event_store import EventStore # celery extra from .celery import CeleryEmitter diff --git a/snowplow_tracker/emitter_configuration.py b/snowplow_tracker/emitter_configuration.py index 77f80bc5..9b636d02 100644 --- a/snowplow_tracker/emitter_configuration.py +++ b/snowplow_tracker/emitter_configuration.py @@ -21,6 +21,7 @@ from typing import Optional, Union, Tuple, Dict from snowplow_tracker.typing import SuccessCallback, FailureCallback +from snowplow_tracker.event_store import EventStore class EmitterConfiguration(object): @@ -32,7 +33,8 @@ def __init__( byte_limit: Optional[int] = None, request_timeout: Optional[Union[float, Tuple[float, float]]] = None, buffer_capacity: Optional[int] = None, - custom_retry_codes: Dict[int, bool] = {} + custom_retry_codes: Dict[int, bool] = {}, + event_store: Optional[EventStore] = None, ) -> None: """ Configuration for the emitter that sends events to the Snowplow collector. @@ -57,6 +59,8 @@ def __init__( By default, retry will not occur for status codes 400, 401, 403, 410 or 422. This can be overridden here. Note that 2xx codes will never retry as they are considered successful. :type custom_retry_codes: dict + :param event_store: Stores the event buffer and buffer capacity. Default is an InMemoryEventStore object with buffer_capacity of 10,000 events. + :type event_store: EventStore | None """ self.batch_size = batch_size @@ -66,6 +70,7 @@ def __init__( self.request_timeout = request_timeout self.buffer_capacity = buffer_capacity self.custom_retry_codes = custom_retry_codes + self.event_store = event_store @property def batch_size(self) -> Optional[int]: @@ -155,34 +160,44 @@ def buffer_capacity(self, value: Optional[int]): @property def custom_retry_codes(self) -> Dict[int, bool]: """ - Custom retry rules for HTTP status codes received in emit responses from the Collector. + Custom retry rules for HTTP status codes received in emit responses from the Collector. """ return self._custom_retry_codes @custom_retry_codes.setter def custom_retry_codes(self, value: Dict[int, bool]): self._custom_retry_codes = value - def set_retry_code(self, status_code: int, retry = True) -> bool: + + def set_retry_code(self, status_code: int, retry=True) -> bool: """ - Add a retry rule for HTTP status code received from emit responses from the Collector. - :param status_code: HTTP response code - :type status_code: int - :param retry: Set the status_code to retry (True) or not retry (False). Default is True - :type retry: bool + Add a retry rule for HTTP status code received from emit responses from the Collector. + :param status_code: HTTP response code + :type status_code: int + :param retry: Set the status_code to retry (True) or not retry (False). Default is True + :type retry: bool """ if not isinstance(status_code, int): print("status_code must be of type int") return False if not isinstance(retry, bool): - print("retry must be of type bool") + print("retry must be of type bool") return False if 200 <= status_code < 300: - print("custom_retry_codes should not include codes for succesful requests (2XX codes)") + print( + "custom_retry_codes should not include codes for succesful requests (2XX codes)" + ) return False self.custom_retry_codes[status_code] = retry return status_code in self.custom_retry_codes.keys() + @property + def event_store(self) -> Optional[EventStore]: + return self._event_store + + @event_store.setter + def event_store(self, value: Optional[EventStore]): + self._event_store = value diff --git a/snowplow_tracker/emitters.py b/snowplow_tracker/emitters.py index 3beaaecf..d6ac1ef2 100644 --- a/snowplow_tracker/emitters.py +++ b/snowplow_tracker/emitters.py @@ -38,6 +38,7 @@ FailureCallback, ) from snowplow_tracker.contracts import one_of +from snowplow_tracker.event_store import EventStore, InMemoryEventStore # logging logging.basicConfig() @@ -70,8 +71,9 @@ def __init__( byte_limit: Optional[int] = None, request_timeout: Optional[Union[float, Tuple[float, float]]] = None, max_retry_delay_seconds: int = 60, - buffer_capacity: int = 10000, - custom_retry_codes: Dict[int, bool] = {} + buffer_capacity: Optional[int] = None, + custom_retry_codes: Dict[int, bool] = {}, + event_store: Optional[EventStore] = None, ) -> None: """ :param endpoint: The collector URL. If protocol is not set in endpoint it will automatically set to "https://" - this is done automatically. @@ -101,13 +103,15 @@ def __init__( :type request_timeout: float | tuple | None :param max_retry_delay_seconds: Set the maximum time between attempts to send failed events to the collector. Default 60 seconds :type max_retry_delay_seconds: int - :param buffer_capacity: The maximum capacity of the event buffer. The default buffer capacity is 10 000 events. + :param buffer_capacity: The maximum capacity of the event buffer. When the buffer is full new events are lost. - :type buffer_capacity: int + :type buffer_capacity: int :param custom_retry_codes: Set custom retry rules for HTTP status codes received in emit responses from the Collector. By default, retry will not occur for status codes 400, 401, 403, 410 or 422. This can be overridden here. Note that 2xx codes will never retry as they are considered successful. :type custom_retry_codes: dict + :param event_store: Stores the event buffer and buffer capacity. Default is an InMemoryEventStore object with buffer_capacity of 10,000 events. + :type event_store: EventStore | None """ one_of(protocol, PROTOCOLS) one_of(method, METHODS) @@ -116,16 +120,26 @@ def __init__( self.method = method + if event_store is None: + if buffer_capacity is None: + event_store = InMemoryEventStore(logger=logger) + else: + event_store = InMemoryEventStore( + buffer_capacity=buffer_capacity, logger=logger + ) + + self.event_store = event_store + if batch_size is None: if method == "post": batch_size = DEFAULT_MAX_LENGTH else: batch_size = 1 - - if batch_size > buffer_capacity: + + if buffer_capacity is not None and batch_size > buffer_capacity: batch_size = buffer_capacity + self.batch_size = batch_size - self.buffer = [] self.byte_limit = byte_limit self.bytes_queued = None if byte_limit is None else 0 self.request_timeout = request_timeout @@ -141,7 +155,6 @@ def __init__( self.max_retry_delay_seconds = max_retry_delay_seconds self.retry_delay = 0 - self.buffer_capacity = buffer_capacity self.custom_retry_codes = custom_retry_codes logger.info("Emitter initialized with endpoint " + self.endpoint) @@ -166,7 +179,7 @@ def as_collector_uri( if len(endpoint) < 1: raise ValueError("No endpoint provided.") - endpoint = endpoint.rstrip('/') + endpoint = endpoint.rstrip("/") if endpoint.split("://")[0] in PROTOCOLS: endpoint_arr = endpoint.split("://") @@ -195,9 +208,9 @@ def input(self, payload: PayloadDict) -> None: self.bytes_queued += len(str(payload)) if self.method == "post": - self.buffer.append({key: str(payload[key]) for key in payload}) + self.event_store.add_event({key: str(payload[key]) for key in payload}) else: - self.buffer.append(payload) + self.event_store.add_event(payload) if self.reached_limit(): self.flush() @@ -209,11 +222,11 @@ def reached_limit(self) -> bool: :rtype: bool """ if self.byte_limit is None: - return len(self.buffer) >= self.batch_size + return self.event_store.size() >= self.batch_size else: - return (self.bytes_queued or 0) >= self.byte_limit or len( - self.buffer - ) >= self.batch_size + return ( + self.bytes_queued or 0 + ) >= self.byte_limit or self.event_store.size() >= self.batch_size def flush(self) -> None: """ @@ -222,9 +235,7 @@ def flush(self) -> None: with self.lock: if self.retry_timer.is_active(): return - - send_events = self.buffer - self.buffer = [] + send_events = self.event_store.get_events_batch() self.send_events(send_events) if self.bytes_queued is not None: self.bytes_queued = 0 @@ -324,6 +335,7 @@ def send_events(self, evts: PayloadDictList) -> None: self._set_retry_delay() self._retry_failed_events(failure_events) else: + self.event_store.cleanup(success_events, False) self._reset_retry_delay() else: logger.info("Skipping flush since buffer is empty") @@ -339,9 +351,9 @@ def _set_retry_timer(self, timeout: float) -> None: def set_flush_timer(self, timeout: float) -> None: """ - Set an interval at which the buffer will be flushed - :param timeout: interval in seconds - :type timeout: int | float + Set an interval at which the buffer will be flushed + :param timeout: interval in seconds + :type timeout: int | float """ self.timer.start(timeout=timeout) @@ -370,11 +382,11 @@ def update(e: PayloadDict) -> None: def _should_retry(self, status_code: int) -> bool: """ - Checks if a request should be retried - - :param status_code: Response status code - :type status_code: int - :rtype: bool + Checks if a request should be retried + + :param status_code: Response status code + :type status_code: int + :rtype: bool """ if Emitter.is_good_status_code(status_code): return False @@ -386,41 +398,32 @@ def _should_retry(self, status_code: int) -> bool: def _set_retry_delay(self) -> None: """ - Sets a delay to retry failed events + Sets a delay to retry failed events """ random_noise = random.random() - self.retry_delay = min(self.retry_delay * 2 + random_noise, self.max_retry_delay_seconds) + self.retry_delay = min( + self.retry_delay * 2 + random_noise, self.max_retry_delay_seconds + ) def _reset_retry_delay(self) -> None: """ - Resets retry delay to 0 + Resets retry delay to 0 """ self.retry_delay = 0 def _retry_failed_events(self, failed_events) -> None: """ - Adds failed events back to the buffer to retry + Adds failed events back to the buffer to retry - :param failed_events: List of failed events - :type List + :param failed_events: List of failed events + :type List """ - for event in failed_events: - if not event in self.buffer and not self._buffer_capacity_reached(): - self.buffer.append(event) - + self.event_store.cleanup(failed_events, True) self._set_retry_timer(self.retry_delay) - def _buffer_capacity_reached(self) -> bool: - """ - Returns true if buffer capacity is reached - - :rtype: bool - """ - return len(self.buffer) >= self.buffer_capacity - def _cancel_retry_timer(self) -> None: """ - Cancels a retry timer + Cancels a retry timer """ self.retry_timer.cancel() @@ -442,7 +445,8 @@ def __init__( thread_count: int = 1, byte_limit: Optional[int] = None, max_retry_delay_seconds: int = 60, - buffer_capacity: int = 10000, + buffer_capacity: int = None, + event_store: Optional[EventStore] = None, ) -> None: """ :param endpoint: The collector URL. If protocol is not set in endpoint it will automatically set to "https://" - this is done automatically. @@ -470,9 +474,11 @@ def __init__( :type byte_limit: int | None :param max_retry_delay_seconds: Set the maximum time between attempts to send failed events to the collector. Default 60 seconds :type max_retry_delay_seconds: int - :param buffer_capacity: The maximum capacity of the event buffer. The default buffer capacity is 10,000 events. + :param buffer_capacity: The maximum capacity of the event buffer. When the buffer is full new events are lost. - :type buffer_capacity: int + :type buffer_capacity: int + :param event_store: Stores the event buffer and buffer capacity. Default is an InMemoryEventStore object with buffer_capacity of 10,000 events. + :type event_store: EventStore """ super(AsyncEmitter, self).__init__( endpoint, @@ -484,7 +490,8 @@ def __init__( on_failure, byte_limit, max_retry_delay_seconds, - buffer_capacity + buffer_capacity, + event_store, ) self.queue = Queue() for i in range(thread_count): @@ -496,7 +503,7 @@ def sync_flush(self) -> None: while True: self.flush() self.queue.join() - if len(self.buffer) < 1: + if self.event_store.size() < 1: break def flush(self) -> None: @@ -505,8 +512,7 @@ def flush(self) -> None: executes the flush method of the base Emitter class """ with self.lock: - self.queue.put(self.buffer) - self.buffer = [] + self.queue.put(self.event_store.get_events_batch()) if self.bytes_queued is not None: self.bytes_queued = 0 diff --git a/snowplow_tracker/event_store.py b/snowplow_tracker/event_store.py new file mode 100644 index 00000000..ad47ac5c --- /dev/null +++ b/snowplow_tracker/event_store.py @@ -0,0 +1,142 @@ +# """ +# event_store.py + +# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. + +# Authors: Jack Keene, Anuj More, Alex Dean, Fred Blundun, Paul Boocock +# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd +# License: Apache License Version 2.0 +# """ + +from typing_extensions import Protocol +from snowplow_tracker.typing import PayloadDict, PayloadDictList +from logging import Logger + + +class EventStore(Protocol): + """ + EventStore protocol. For buffering events in the Emitter. + """ + + def add_event(payload: PayloadDict) -> bool: + """ + Add PayloadDict to buffer. Returns True if successful. + + :param payload: The payload to add + :type payload: PayloadDict + :rtype bool + """ + ... + + def get_events_batch() -> PayloadDictList: + """ + Get a list of all the PayloadDicts in the buffer. + + :rtype PayloadDictList + """ + ... + + def cleanup(batch: PayloadDictList, need_retry: bool) -> None: + """ + Removes sent events from the event store. If events need to be retried they are re-added to the buffer. + + :param batch: The events to be removed from the buffer + :type batch: PayloadDictList + :param need_retry Whether the events should be re-sent or not + :type need_retry bool + """ + ... + + def size() -> int: + """ + Returns the number of events in the buffer + + :rtype int + """ + ... + + +class InMemoryEventStore(EventStore): + """ + Create a InMemoryEventStore object with custom buffer capacity. The default is 10,000 events. + """ + + def __init__(self, logger: Logger, buffer_capacity: int = 10000) -> None: + """ + :param logger: Logging module + :type logger: Logger + :param buffer_capacity: The maximum capacity of the event buffer. + When the buffer is full new events are lost. + :type buffer_capacity int + """ + self.event_buffer = [] + self.buffer_capacity = buffer_capacity + self.logger = logger + + def add_event(self, payload: PayloadDict) -> bool: + """ + Add PayloadDict to buffer. + + :param payload: The payload to add + :type payload: PayloadDict + """ + if self._buffer_capacity_reached(): + self.logger.error("Event buffer is full, dropping event.") + return False + + self.event_buffer.append(payload) + return True + + def get_events_batch(self) -> PayloadDictList: + """ + Get a list of all the PayloadDicts in the in the buffer. + + :rtype PayloadDictList + """ + batch = self.event_buffer + self.event_buffer = [] + return batch + + def cleanup(self, batch: PayloadDictList, need_retry: bool) -> None: + """ + Removes sent events from the InMemoryEventStore buffer. If events need to be retried they are re-added to the buffer. + + :param batch: The events to be removed from the buffer + :type batch: PayloadDictList + :param need_retry Whether the events should be re-sent or not + :type need_retry bool + """ + if not need_retry: + return + + for event in batch: + if not event in self.event_buffer: + if not self.add_event(event): + return + + def size(self) -> int: + """ + Returns the number of events in the buffer + + :rtype int + """ + return len(self.event_buffer) + + def _buffer_capacity_reached(self) -> bool: + """ + Returns true if buffer capacity is reached + + :rtype: bool + """ + return self.size() >= self.buffer_capacity diff --git a/snowplow_tracker/snowplow.py b/snowplow_tracker/snowplow.py index 946802e7..88f1e878 100644 --- a/snowplow_tracker/snowplow.py +++ b/snowplow_tracker/snowplow.py @@ -82,7 +82,7 @@ def create_tracker( on_failure=emitter_config.on_failure, byte_limit=emitter_config.byte_limit, request_timeout=emitter_config.request_timeout, - custom_retry_codes=emitter_config.custom_retry_codes + custom_retry_codes=emitter_config.custom_retry_codes, ) tracker = Tracker( diff --git a/snowplow_tracker/test/unit/test_emitters.py b/snowplow_tracker/test/unit/test_emitters.py index 439b66a8..c52eb0b2 100644 --- a/snowplow_tracker/test/unit/test_emitters.py +++ b/snowplow_tracker/test/unit/test_emitters.py @@ -65,7 +65,7 @@ def test_init(self) -> None: self.assertEqual(e.endpoint, 'https://0.0.0.0/com.snowplowanalytics.snowplow/tp2') self.assertEqual(e.method, 'post') self.assertEqual(e.batch_size, 10) - self.assertEqual(e.buffer, []) + self.assertEqual(e.event_store.event_buffer, []) self.assertIsNone(e.byte_limit) self.assertIsNone(e.bytes_queued) self.assertIsNone(e.on_success) @@ -125,8 +125,8 @@ def test_input_no_flush(self, mok_flush: Any) -> None: nvPairs = {"n0": "v0", "n1": "v1"} e.input(nvPairs) - self.assertEqual(len(e.buffer), 1) - self.assertDictEqual(nvPairs, e.buffer[0]) + self.assertEqual(len(e.event_store.event_buffer), 1) + self.assertDictEqual(nvPairs, e.event_store.event_buffer[0]) self.assertIsNone(e.byte_limit) self.assertFalse(e.reached_limit()) mok_flush.assert_not_called() @@ -139,8 +139,8 @@ def test_input_flush_byte_limit(self, mok_flush: Any) -> None: nvPairs = {"n0": "v0", "n1": "v1"} e.input(nvPairs) - self.assertEqual(len(e.buffer), 1) - self.assertDictEqual(nvPairs, e.buffer[0]) + self.assertEqual(len(e.event_store.event_buffer), 1) + self.assertDictEqual(nvPairs, e.event_store.event_buffer[0]) self.assertTrue(e.reached_limit()) self.assertEqual(mok_flush.call_count, 1) @@ -152,14 +152,14 @@ def test_input_flush_buffer(self, mok_flush: Any) -> None: nvPairs = {"n0": "v0", "n1": "v1"} e.input(nvPairs) - self.assertEqual(len(e.buffer), 1) + self.assertEqual(len(e.event_store.event_buffer), 1) self.assertFalse(e.reached_limit()) - self.assertDictEqual(nvPairs, e.buffer[0]) + self.assertDictEqual(nvPairs, e.event_store.event_buffer[0]) nextPairs = {"n0": "v0"} e.input(nextPairs) # since we mock flush, the buffer is not empty - self.assertEqual(e.buffer, [nvPairs, nextPairs]) + self.assertEqual(e.event_store.event_buffer, [nvPairs, nextPairs]) self.assertTrue(e.reached_limit()) self.assertEqual(mok_flush.call_count, 1) @@ -171,7 +171,7 @@ def test_input_bytes_queued(self, mok_flush: Any) -> None: nvPairs = {"n0": "v0", "n1": "v1"} e.input(nvPairs) - self.assertEqual(len(e.buffer), 1) + self.assertEqual(len(e.event_store.event_buffer), 1) self.assertEqual(e.bytes_queued, 24) e.input(nvPairs) @@ -185,11 +185,11 @@ def test_input_bytes_post(self, mok_flush: Any) -> None: nvPairs = {"testString": "test", "testNum": 2.72} e.input(nvPairs) - self.assertEqual(e.buffer, [{"testString": "test", "testNum": "2.72"}]) + self.assertEqual(e.event_store.event_buffer, [{"testString": "test", "testNum": "2.72"}]) - @mock.patch('snowplow_tracker.Emitter.send_events') + @mock.patch('snowplow_tracker.Emitter.http_post') def test_flush(self, mok_send_events: Any) -> None: - mok_send_events.side_effect = mocked_send_events + mok_send_events.side_effect = mocked_http_response_success e = Emitter('0.0.0.0', batch_size=2, byte_limit=None) nvPairs = {"n": "v"} @@ -197,11 +197,11 @@ def test_flush(self, mok_send_events: Any) -> None: e.input(nvPairs) self.assertEqual(mok_send_events.call_count, 1) - self.assertEqual(len(e.buffer), 0) + self.assertEqual(len(e.event_store.event_buffer), 0) - @mock.patch('snowplow_tracker.Emitter.send_events') + @mock.patch('snowplow_tracker.Emitter.http_post') def test_flush_bytes_queued(self, mok_send_events: Any) -> None: - mok_send_events.side_effect = mocked_send_events + mok_send_events.side_effect = mocked_http_response_success e = Emitter('0.0.0.0', batch_size=2, byte_limit=256) nvPairs = {"n": "v"} @@ -209,7 +209,7 @@ def test_flush_bytes_queued(self, mok_send_events: Any) -> None: e.input(nvPairs) self.assertEqual(mok_send_events.call_count, 1) - self.assertEqual(len(e.buffer), 0) + self.assertEqual(len(e.event_store.event_buffer), 0) self.assertEqual(e.bytes_queued, 0) @freeze_time("2021-04-14 00:00:02") # unix: 1618358402000 @@ -233,7 +233,7 @@ def test_flush_timer(self, mok_flush: Any) -> None: e.input(i) e.set_flush_timer(3) - self.assertEqual(len(e.buffer), 3) + self.assertEqual(len(e.event_store.event_buffer), 3) time.sleep(5) self.assertGreaterEqual(mok_flush.call_count, 1) @@ -318,7 +318,7 @@ def test_async_emitter_input(self, mok_flush: Any) -> None: ae.input({"a": "aa"}) ae.input({"b": "bb"}) - self.assertEqual(len(ae.buffer), 2) + self.assertEqual(len(ae.event_store.event_buffer), 2) self.assertTrue(ae.queue.empty()) mok_flush.assert_not_called() @@ -334,12 +334,12 @@ def test_async_emitter_sync_flash(self, mok_send_events: Any) -> None: ae.input({"a": "aa"}) ae.input({"b": "bb"}) - self.assertEqual(len(ae.buffer), 2) + self.assertEqual(len(ae.event_store.event_buffer), 2) self.assertTrue(ae.queue.empty()) mok_send_events.assert_not_called() ae.sync_flush() - self.assertEqual(len(ae.buffer), 0) + self.assertEqual(len(ae.event_store.event_buffer), 0) self.assertEqual(ae.bytes_queued, 0) self.assertEqual(mok_send_events.call_count, 1) @@ -404,8 +404,8 @@ def test_input_unicode_get(self, mok_flush: Any) -> None: ae = AsyncEmitter('0.0.0.0', method="get", batch_size=2) ae.input(payload) - self.assertEqual(len(ae.buffer), 1) - self.assertDictEqual(payload, ae.buffer[0]) + self.assertEqual(len(ae.event_store.event_buffer), 1) + self.assertDictEqual(payload, ae.event_store.event_buffer[0]) @mock.patch('snowplow_tracker.AsyncEmitter.flush') def test_input_unicode_post(self, mok_flush: Any) -> None: @@ -415,8 +415,8 @@ def test_input_unicode_post(self, mok_flush: Any) -> None: ae = AsyncEmitter('0.0.0.0', batch_size=2) ae.input(payload) - self.assertEqual(len(ae.buffer), 1) - self.assertDictEqual(payload, ae.buffer[0]) + self.assertEqual(len(ae.event_store.event_buffer), 1) + self.assertDictEqual(payload, ae.event_store.event_buffer[0]) @mock.patch('snowplow_tracker.Emitter.http_post') def test_send_events_post_retry(self, mok_http_post: Any) -> None: diff --git a/snowplow_tracker/test/unit/test_in_memory_event_store.py b/snowplow_tracker/test/unit/test_in_memory_event_store.py new file mode 100644 index 00000000..90c9e887 --- /dev/null +++ b/snowplow_tracker/test/unit/test_in_memory_event_store.py @@ -0,0 +1,110 @@ +# """ +# test_in_memory_event_store.py + +# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. + +# This program is licensed to you under the Apache License Version 2.0, +# and you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. + +# Unless required by applicable law or agreed to in writing, +# software distributed under the Apache License Version 2.0 is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the Apache License Version 2.0 for the specific +# language governing permissions and limitations there under. + +# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock +# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd +# License: Apache License Version 2.0 +# """ + +import unittest +from snowplow_tracker.event_store import InMemoryEventStore +import logging + +# logging +logging.basicConfig() +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +class TestInMemoryEventStore(unittest.TestCase): + def setUp(self) -> None: + pass + + def test_init(self): + event_store = InMemoryEventStore(logger) + self.assertEqual(event_store.buffer_capacity, 10000) + self.assertEqual(event_store.event_buffer, []) + + def test_add_event(self): + event_store = InMemoryEventStore(logger) + nvPairs = {"n0": "v0", "n1": "v1"} + + event_store.add_event(nvPairs) + self.assertDictEqual(nvPairs, event_store.event_buffer[0]) + + def test_size(self): + event_store = InMemoryEventStore(logger) + nvPairs = {"n0": "v0", "n1": "v1"} + + event_store.add_event(nvPairs) + event_store.add_event(nvPairs) + event_store.add_event(nvPairs) + + self.assertEqual(event_store.size(), 3) + + def test_add_failed_events_to_buffer(self): + event_store = InMemoryEventStore(logger) + + nvPair1 = {"n0": "v0", "n1": "v1"} + nvPair2 = {"n2": "v2", "n3": "v3"} + + event_store.add_event(nvPair1) + event_store.add_event(nvPair2) + + payload_list = event_store.get_events_batch() + + event_store.cleanup(payload_list, True) + + self.assertEqual(event_store.event_buffer, payload_list) + + def test_remove_success_events_from_buffer(self): + event_store = InMemoryEventStore(logger) + + nvPairs = {"n0": "v0", "n1": "v1"} + + event_store.add_event(nvPairs) + event_store.add_event(nvPairs) + payload_list = event_store.get_events_batch() + event_store.cleanup(payload_list, False) + + self.assertEqual(event_store.event_buffer, []) + + def test_drop_new_events_buffer_full(self): + event_store = InMemoryEventStore(logger, buffer_capacity=2) + + nvPair1 = {"n0": "v0"} + nvPair2 = {"n1": "v1"} + nvPair3 = {"n2": "v2"} + + event_store.add_event(nvPair1) + event_store.add_event(nvPair2) + + self.assertEqual(event_store.event_buffer, [{"n0": "v0"}, {"n1": "v1"}]) + + event_store.add_event(nvPair3) + + self.assertEqual(event_store.event_buffer, [{"n0": "v0"}, {"n1": "v1"}]) + + def test_get_events(self): + event_store = InMemoryEventStore(logger, buffer_capacity=2) + + nvPairs = {"n0": "v0"} + batch = [nvPairs, nvPairs] + + event_store.add_event(nvPairs) + event_store.add_event(nvPairs) + + self.assertEqual(event_store.get_events_batch(), batch) diff --git a/snowplow_tracker/tracker.py b/snowplow_tracker/tracker.py index 0215bf16..3a6ee7cd 100644 --- a/snowplow_tracker/tracker.py +++ b/snowplow_tracker/tracker.py @@ -311,7 +311,9 @@ def track_link_click( "%s/link_click/%s/1-0-1" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - return self.track_self_describing_event(event_json, context, tstamp, event_subject) + return self.track_self_describing_event( + event_json, context, tstamp, event_subject + ) def track_add_to_cart( self, @@ -364,7 +366,9 @@ def track_add_to_cart( "%s/add_to_cart/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - return self.track_self_describing_event(event_json, context, tstamp, event_subject) + return self.track_self_describing_event( + event_json, context, tstamp, event_subject + ) def track_remove_from_cart( self, @@ -417,7 +421,9 @@ def track_remove_from_cart( "%s/remove_from_cart/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - return self.track_self_describing_event(event_json, context, tstamp, event_subject) + return self.track_self_describing_event( + event_json, context, tstamp, event_subject + ) def track_form_change( self, @@ -471,7 +477,9 @@ def track_form_change( "%s/change_form/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - return self.track_self_describing_event(event_json, context, tstamp, event_subject) + return self.track_self_describing_event( + event_json, context, tstamp, event_subject + ) def track_form_submit( self, @@ -512,7 +520,9 @@ def track_form_submit( "%s/submit_form/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - return self.track_self_describing_event(event_json, context, tstamp, event_subject) + return self.track_self_describing_event( + event_json, context, tstamp, event_subject + ) def track_site_search( self, @@ -556,7 +566,9 @@ def track_site_search( "%s/site_search/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG), properties ) - return self.track_self_describing_event(event_json, context, tstamp, event_subject) + return self.track_self_describing_event( + event_json, context, tstamp, event_subject + ) def track_ecommerce_transaction_item( self, @@ -718,7 +730,9 @@ def track_screen_view( screen_view_properties, ) - return self.track_self_describing_event(event_json, context, tstamp, event_subject) + return self.track_self_describing_event( + event_json, context, tstamp, event_subject + ) def track_struct_event( self, @@ -817,9 +831,15 @@ def track_unstruct_event( :param event_subject: Optional per event subject :type event_subject: subject | None :rtype: tracker - """ - warn('track_unstruct_event will be deprecated in future versions. Please use track_self_describing_event.', DeprecationWarning, stacklevel=2) - return self.track_self_describing_event(event_json, context, tstamp, event_subject) + """ + warn( + "track_unstruct_event will be deprecated in future versions. Please use track_self_describing_event.", + DeprecationWarning, + stacklevel=2, + ) + return self.track_self_describing_event( + event_json, context, tstamp, event_subject + ) def flush(self, is_async: bool = False) -> "Tracker": """ From 6cf399ae4eacff1a209d493fb20584be5657b557 Mon Sep 17 00:00:00 2001 From: Jack Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Mon, 23 Jan 2023 09:45:40 +0000 Subject: [PATCH 09/11] Refactor ScreenView using a single schema for all the trackers (#close 303) PR #311 * Add deprecation warning to track_screen_view() * Add track_mobile_screen_view() * Add mobile screen schema --- .../test/integration/test_integration.py | 18 ++--- snowplow_tracker/test/unit/test_tracker.py | 37 +++++++++- snowplow_tracker/tracker.py | 70 +++++++++++++++++++ 3 files changed, 114 insertions(+), 11 deletions(-) diff --git a/snowplow_tracker/test/integration/test_integration.py b/snowplow_tracker/test/integration/test_integration.py index b9ec214d..7e931388 100644 --- a/snowplow_tracker/test/integration/test_integration.py +++ b/snowplow_tracker/test/integration/test_integration.py @@ -125,10 +125,10 @@ def test_integration_ecommerce_transaction(self) -> None: self.assertEqual(from_querystring("ttm", querystrings[-3]), from_querystring("ttm", querystrings[-2])) - def test_integration_screen_view(self) -> None: + def test_integration_mobile_screen_view(self) -> None: t = tracker.Tracker([get_emitter], default_subject, encode_base64=False) with HTTMock(pass_response_content): - t.track_screen_view("Game HUD 2", id_="534") + t.track_mobile_screen_view("534", "Game HUD 2") expected_fields = {"e": "ue"} for key in expected_fields: self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) @@ -137,10 +137,10 @@ def test_integration_screen_view(self) -> None: self.assertEqual(envelope, { "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", "data": { - "schema": "iglu:com.snowplowanalytics.snowplow/screen_view/jsonschema/1-0-0", + "schema": "iglu:com.snowplowanalytics.mobile/screen_view/jsonschema/1-0-0", "data": { - "name": "Game HUD 2", - "id": "534" + "id": "534", + "name": "Game HUD 2" } } }) @@ -380,7 +380,7 @@ def test_unicode_get(self) -> None: test_ctx = SelfDescribingJson('iglu:a.b/c/jsonschema/1-0-0', {'test': unicode_a}) with HTTMock(pass_response_content): t.track_page_view(unicode_b, context=[test_ctx]) - t.track_screen_view(unicode_b, context=[test_ctx]) + t.track_mobile_screen_view(unicode_b, context=[test_ctx]) url_string = unquote_plus(from_querystring("url", querystrings[-2])) try: @@ -394,7 +394,7 @@ def test_unicode_get(self) -> None: self.assertEqual(actual_a, unicode_a) uepr_string = unquote_plus(from_querystring("ue_pr", querystrings[-1])) - actual_b = json.loads(uepr_string)['data']['data']['name'] + actual_b = json.loads(uepr_string)['data']['data']['id'] self.assertEqual(actual_b, unicode_b) def test_unicode_post(self) -> None: @@ -404,7 +404,7 @@ def test_unicode_post(self) -> None: test_ctx = SelfDescribingJson('iglu:a.b/c/jsonschema/1-0-0', {'test': unicode_a}) with HTTMock(pass_post_response_content): t.track_page_view(unicode_b, context=[test_ctx]) - t.track_screen_view(unicode_b, context=[test_ctx]) + t.track_mobile_screen_view(unicode_b, context=[test_ctx]) pv_event = querystrings[-2] self.assertEqual(pv_event['data'][0]['url'], unicode_b) @@ -413,5 +413,5 @@ def test_unicode_post(self) -> None: self.assertEqual(in_test_ctx, unicode_a) sv_event = querystrings[-1] - in_uepr_name = json.loads(sv_event['data'][0]['ue_pr'])['data']['data']['name'] + in_uepr_name = json.loads(sv_event['data'][0]['ue_pr'])['data']['data']['id'] self.assertEqual(in_uepr_name, unicode_b) diff --git a/snowplow_tracker/test/unit/test_tracker.py b/snowplow_tracker/test/unit/test_tracker.py index 5f361ae8..91a2eff5 100644 --- a/snowplow_tracker/test/unit/test_tracker.py +++ b/snowplow_tracker/test/unit/test_tracker.py @@ -44,6 +44,9 @@ FORM_CHANGE_SCHEMA = "iglu:com.snowplowanalytics.snowplow/change_form/jsonschema/1-0-0" FORM_SUBMIT_SCHEMA = "iglu:com.snowplowanalytics.snowplow/submit_form/jsonschema/1-0-0" SITE_SEARCH_SCHEMA = "iglu:com.snowplowanalytics.snowplow/site_search/jsonschema/1-0-0" +MOBILE_SCREEN_VIEW_SCHEMA = ( + "iglu:com.snowplowanalytics.mobile/screen_view/jsonschema/1-0-0" +) SCREEN_VIEW_SCHEMA = "iglu:com.snowplowanalytics.snowplow/screen_view/jsonschema/1-0-0" # helpers @@ -474,7 +477,9 @@ def test_track_self_describing_event(self, mok_complete_payload: Any) -> None: self.assertTrue(actualTstampArg is None) @mock.patch("snowplow_tracker.Tracker.complete_payload") - def test_track_self_describing_event_all_args(self, mok_complete_payload: Any) -> None: + def test_track_self_describing_event_all_args( + self, mok_complete_payload: Any + ) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -510,7 +515,9 @@ def test_track_self_describing_event_all_args(self, mok_complete_payload: Any) - self.assertEqual(actualTstampArg, evTstamp) @mock.patch("snowplow_tracker.Tracker.complete_payload") - def test_track_self_describing_event_encode(self, mok_complete_payload: Any) -> None: + def test_track_self_describing_event_encode( + self, mok_complete_payload: Any + ) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") e = mokEmitter() @@ -1286,6 +1293,32 @@ def test_track_site_search_optional_none(self, mok_track_unstruct: Any) -> None: self.assertTrue(callArgs[1] is None) self.assertTrue(callArgs[2] is None) + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") + def test_track_mobile_screen_view(self, mok_track_unstruct: Any) -> None: + mokEmitter = self.create_patch("snowplow_tracker.Emitter") + e = mokEmitter() + + mok_track_unstruct.side_effect = mocked_track_unstruct + + t = Tracker(e) + ctx = SelfDescribingJson("test.context.schema", {"user": "tester"}) + evTstamp = 1399021242030 + + t.track_mobile_screen_view( + "screenId", "screenName", context=[ctx], tstamp=evTstamp + ) + + expected = { + "schema": MOBILE_SCREEN_VIEW_SCHEMA, + "data": {"name": "screenName", "id": "screenId"}, + } + + callArgs = mok_track_unstruct.call_args_list[0][0] + self.assertEqual(len(callArgs), 4) + self.assertDictEqual(callArgs[0].to_json(), expected) + self.assertIs(callArgs[1][0], ctx) + self.assertEqual(callArgs[2], evTstamp) + @mock.patch("snowplow_tracker.Tracker.track_self_describing_event") def test_track_screen_view(self, mok_track_unstruct: Any) -> None: mokEmitter = self.create_patch("snowplow_tracker.Emitter") diff --git a/snowplow_tracker/tracker.py b/snowplow_tracker/tracker.py index 3a6ee7cd..7791b103 100644 --- a/snowplow_tracker/tracker.py +++ b/snowplow_tracker/tracker.py @@ -44,6 +44,7 @@ VERSION = "py-%s" % _version.__version__ DEFAULT_ENCODE_BASE64 = True BASE_SCHEMA_PATH = "iglu:com.snowplowanalytics.snowplow" +MOBILE_SCHEMA_PATH = "iglu:com.snowplowanalytics.mobile" SCHEMA_TAG = "jsonschema" CONTEXT_SCHEMA = "%s/contexts/%s/1-0-1" % (BASE_SCHEMA_PATH, SCHEMA_TAG) UNSTRUCT_EVENT_SCHEMA = "%s/unstruct_event/%s/1-0-0" % (BASE_SCHEMA_PATH, SCHEMA_TAG) @@ -719,6 +720,11 @@ def track_screen_view( :type event_subject: subject | None :rtype: tracker """ + warn( + "track_screen_view will be deprecated in future versions. Please use track_mobile_screen_view.", + DeprecationWarning, + stacklevel=2, + ) screen_view_properties = {} if name is not None: screen_view_properties["name"] = name @@ -734,6 +740,70 @@ def track_screen_view( event_json, context, tstamp, event_subject ) + def track_mobile_screen_view( + self, + id_: Optional[str] = None, + name: Optional[str] = None, + type: Optional[str] = None, + previous_name: Optional[str] = None, + previous_id: Optional[str] = None, + previous_type: Optional[str] = None, + transition_type: Optional[str] = None, + context: Optional[List[SelfDescribingJson]] = None, + tstamp: Optional[float] = None, + event_subject: Optional[_subject.Subject] = None, + ) -> "Tracker": + """ + :param id_: Screen view ID. This must be of type UUID. + :type id_: string | None + :param name: The name of the screen view event + :type name: string_or_none + :param type: The type of screen that was viewed e.g feed / carousel. + :type type: string | None + :param previous_name: The name of the previous screen. + :type previous_name: string | None + :param previous_id: The screenview ID of the previous screenview. + :type previous_id: string | None + :param previous_type The screen type of the previous screenview + :type previous_type string | None + :param transition_type The type of transition that led to the screen being viewed. + :type transition_type string | None + :param context: Custom context for the event + :type context: context_array | None + :param tstamp: Optional event timestamp in milliseconds + :type tstamp: int | float | None + :param event_subject: Optional per event subject + :type event_subject: subject | None + :rtype: tracker + """ + screen_view_properties = {} + + if id_ is None: + id_ = self.get_uuid() + + screen_view_properties["id"] = id_ + + if name is not None: + screen_view_properties["name"] = name + if type is not None: + screen_view_properties["type"] = type + if previous_name is not None: + screen_view_properties["previousName"] = previous_name + if previous_id is not None: + screen_view_properties["previousId"] = previous_id + if previous_type is not None: + screen_view_properties["previousType"] = previous_type + if transition_type is not None: + screen_view_properties["transitionType"] = transition_type + + event_json = SelfDescribingJson( + "%s/screen_view/%s/1-0-0" % (MOBILE_SCHEMA_PATH, SCHEMA_TAG), + screen_view_properties, + ) + return self.track_self_describing_event( + event_json, context, tstamp, event_subject + ) + def track_struct_event( self, category: str, From 1771798f0046243e93886ca28e2d827d4cab9d2e Mon Sep 17 00:00:00 2001 From: Jack Keene <87364579+Jack-Keene@users.noreply.github.com> Date: Mon, 23 Jan 2023 10:32:59 +0000 Subject: [PATCH 10/11] Bump copyright to 2023 (close #313) PR #314 * Update copyright notices for 2023 * Remove author list from copyright notice --- LICENSE | 2 +- README.md | 2 +- docs/source/conf.py | 2 +- setup.py | 8 +- snowplow_tracker/_version.py | 6 +- snowplow_tracker/celery/celery_emitter.py | 43 +- snowplow_tracker/contracts.py | 6 +- snowplow_tracker/emitter_configuration.py | 6 +- snowplow_tracker/emitters.py | 7 +- snowplow_tracker/event_store.py | 6 +- snowplow_tracker/payload.py | 6 +- snowplow_tracker/redis/redis_emitter.py | 35 +- snowplow_tracker/redis/redis_worker.py | 33 +- snowplow_tracker/self_describing_json.py | 6 +- snowplow_tracker/snowplow.py | 7 +- snowplow_tracker/subject.py | 6 +- .../test/integration/test_integration.py | 423 +++++++++++++----- snowplow_tracker/test/unit/test_contracts.py | 57 +-- snowplow_tracker/test/unit/test_emitters.py | 257 +++++++---- .../test/unit/test_in_memory_event_store.py | 6 +- snowplow_tracker/test/unit/test_payload.py | 70 +-- snowplow_tracker/test/unit/test_subject.py | 15 +- snowplow_tracker/test/unit/test_tracker.py | 6 +- snowplow_tracker/tracker.py | 6 +- snowplow_tracker/tracker_configuration.py | 6 +- snowplow_tracker/typing.py | 6 +- 26 files changed, 610 insertions(+), 423 deletions(-) diff --git a/LICENSE b/LICENSE index ec02d7cc..db047f7e 100644 --- a/LICENSE +++ b/LICENSE @@ -186,7 +186,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright 2013-2022 Snowplow Analytics Ltd. + Copyright 2013-2023 Snowplow Analytics Ltd. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/README.md b/README.md index 114aecb7..16fd5b51 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ Assuming [docker](https://www.docker.com/) is installed Copyright and license --------------------- -The Snowplow Python Tracker is copyright 2013-2022 Snowplow Analytics +The Snowplow Python Tracker is copyright 2013-2023 Snowplow Analytics Ltd. Licensed under the [Apache License, Version diff --git a/docs/source/conf.py b/docs/source/conf.py index d906b8be..f18cc0eb 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -24,7 +24,7 @@ # -- Project information ----------------------------------------------------- project = 'Snowplow Python Tracker' -copyright = '2022, Alex Dean, Paul Boocock, Matus Tomlein, Jack Keene' +copyright = "2023, Alex Dean, Paul Boocock, Matus Tomlein, Jack Keene" author = 'Alex Dean, Paul Boocock, Matus Tomlein, Jack Keene' # The full version, including alpha/beta/rc tags diff --git a/setup.py b/setup.py index cbb9e7cc..e12f7835 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ # # setup.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,11 +13,7 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 -# +# """ #!/usr/bin/env python # -*- coding: utf-8 -*- diff --git a/snowplow_tracker/_version.py b/snowplow_tracker/_version.py index d77b9f6d..aa7e4852 100644 --- a/snowplow_tracker/_version.py +++ b/snowplow_tracker/_version.py @@ -1,7 +1,7 @@ # """ # _version.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ __version_info__ = (0, 12, 0) diff --git a/snowplow_tracker/celery/celery_emitter.py b/snowplow_tracker/celery/celery_emitter.py index 682ee24b..bb1a1b96 100644 --- a/snowplow_tracker/celery/celery_emitter.py +++ b/snowplow_tracker/celery/celery_emitter.py @@ -1,7 +1,7 @@ # """ # celery_emitter.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import logging @@ -39,27 +35,32 @@ class CeleryEmitter(Emitter): """ - Uses a Celery worker to send HTTP requests asynchronously. - Works like the base Emitter class, - but on_success and on_failure callbacks cannot be set. + Uses a Celery worker to send HTTP requests asynchronously. + Works like the base Emitter class, + but on_success and on_failure callbacks cannot be set. """ + if _CELERY_OPT: celery_app = None def __init__( - self, - endpoint: str, - protocol: HttpProtocol = "http", - port: Optional[int] = None, - method: Method = "post", - batch_size: Optional[int] = None, - byte_limit: Optional[int] = None) -> None: - super(CeleryEmitter, self).__init__(endpoint, protocol, port, method, batch_size, None, None, byte_limit) + self, + endpoint: str, + protocol: HttpProtocol = "http", + port: Optional[int] = None, + method: Method = "post", + batch_size: Optional[int] = None, + byte_limit: Optional[int] = None, + ) -> None: + super(CeleryEmitter, self).__init__( + endpoint, protocol, port, method, batch_size, None, None, byte_limit + ) try: # Check whether a custom Celery configuration module named "snowplow_celery_config" exists import snowplow_celery_config + self.celery_app = Celery() self.celery_app.config_from_object(snowplow_celery_config) except ImportError: @@ -80,6 +81,10 @@ def async_flush(self) -> None: else: - def __new__(cls, *args: Any, **kwargs: Any) -> 'CeleryEmitter': - logger.error("CeleryEmitter is not available. Please install snowplow-tracker with celery extra dependency.") - raise RuntimeError('CeleryEmitter is not available. To use: `pip install snowplow-tracker[celery]`') + def __new__(cls, *args: Any, **kwargs: Any) -> "CeleryEmitter": + logger.error( + "CeleryEmitter is not available. Please install snowplow-tracker with celery extra dependency." + ) + raise RuntimeError( + "CeleryEmitter is not available. To use: `pip install snowplow-tracker[celery]`" + ) diff --git a/snowplow_tracker/contracts.py b/snowplow_tracker/contracts.py index 7a9253af..c54ac668 100644 --- a/snowplow_tracker/contracts.py +++ b/snowplow_tracker/contracts.py @@ -1,7 +1,7 @@ # """ # contracts.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock, Matus Tomlein -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import traceback diff --git a/snowplow_tracker/emitter_configuration.py b/snowplow_tracker/emitter_configuration.py index 9b636d02..87fa6c9e 100644 --- a/snowplow_tracker/emitter_configuration.py +++ b/snowplow_tracker/emitter_configuration.py @@ -1,7 +1,7 @@ # """ # emitter_configuration.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Jack Keene, Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ from typing import Optional, Union, Tuple, Dict diff --git a/snowplow_tracker/emitters.py b/snowplow_tracker/emitters.py index d6ac1ef2..760fc398 100644 --- a/snowplow_tracker/emitters.py +++ b/snowplow_tracker/emitters.py @@ -1,7 +1,7 @@ # """ # emitters.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,13 +13,8 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ - import logging import time import threading diff --git a/snowplow_tracker/event_store.py b/snowplow_tracker/event_store.py index ad47ac5c..898f92ff 100644 --- a/snowplow_tracker/event_store.py +++ b/snowplow_tracker/event_store.py @@ -1,7 +1,7 @@ # """ # event_store.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Jack Keene, Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ from typing_extensions import Protocol diff --git a/snowplow_tracker/payload.py b/snowplow_tracker/payload.py index aacc6243..26e3262c 100644 --- a/snowplow_tracker/payload.py +++ b/snowplow_tracker/payload.py @@ -1,7 +1,7 @@ # """ # payload.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import json diff --git a/snowplow_tracker/redis/redis_emitter.py b/snowplow_tracker/redis/redis_emitter.py index a24ee50b..bb4cf5f4 100644 --- a/snowplow_tracker/redis/redis_emitter.py +++ b/snowplow_tracker/redis/redis_emitter.py @@ -1,7 +1,7 @@ # """ # redis_emitter.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import json @@ -38,16 +34,19 @@ class RedisEmitter(object): """ - Sends Snowplow events to a Redis database + Sends Snowplow events to a Redis database """ + if _REDIS_OPT: - def __init__(self, rdb: Optional[RedisProtocol] = None, key: str = "snowplow") -> None: + def __init__( + self, rdb: Optional[RedisProtocol] = None, key: str = "snowplow" + ) -> None: """ - :param rdb: Optional custom Redis database - :type rdb: redis | None - :param key: The Redis key for the list of events - :type key: string + :param rdb: Optional custom Redis database + :type rdb: redis | None + :param key: The Redis key for the list of events + :type key: string """ if rdb is None: rdb = redis.StrictRedis() @@ -57,8 +56,8 @@ def __init__(self, rdb: Optional[RedisProtocol] = None, key: str = "snowplow") - def input(self, payload: PayloadDict) -> None: """ - :param payload: The event properties - :type payload: dict(string:*) + :param payload: The event properties + :type payload: dict(string:*) """ logger.debug("Pushing event to Redis queue...") self.rdb.rpush(self.key, json.dumps(payload)) @@ -72,6 +71,10 @@ def sync_flush(self) -> None: else: - def __new__(cls, *args: Any, **kwargs: Any) -> 'RedisEmitter': - logger.error("RedisEmitter is not available. Please install snowplow-tracker with redis extra dependency.") - raise RuntimeError('RedisEmitter is not available. To use: `pip install snowplow-tracker[redis]`') + def __new__(cls, *args: Any, **kwargs: Any) -> "RedisEmitter": + logger.error( + "RedisEmitter is not available. Please install snowplow-tracker with redis extra dependency." + ) + raise RuntimeError( + "RedisEmitter is not available. To use: `pip install snowplow-tracker[redis]`" + ) diff --git a/snowplow_tracker/redis/redis_worker.py b/snowplow_tracker/redis/redis_worker.py index 3f1e9f51..e0e9353f 100644 --- a/snowplow_tracker/redis/redis_worker.py +++ b/snowplow_tracker/redis/redis_worker.py @@ -1,7 +1,7 @@ # """ # redis_worker.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,13 +13,8 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ - import json import signal from typing import Any, Optional @@ -39,11 +34,17 @@ class RedisWorker(object): """ - Asynchronously take events from redis and send them to an emitter + Asynchronously take events from redis and send them to an emitter """ + if _REDIS_OPT: - def __init__(self, emitter: EmitterProtocol, rdb: Optional[RedisProtocol] = None, key: str = DEFAULT_KEY) -> None: + def __init__( + self, + emitter: EmitterProtocol, + rdb: Optional[RedisProtocol] = None, + key: str = DEFAULT_KEY, + ) -> None: self.emitter = emitter self.key = key if rdb is None: @@ -57,14 +58,14 @@ def __init__(self, emitter: EmitterProtocol, rdb: Optional[RedisProtocol] = None def send(self, payload: PayloadDict) -> None: """ - Send an event to an emitter + Send an event to an emitter """ self.emitter.input(payload) def pop_payload(self) -> None: """ - Get a single event from Redis and send it - If the Redis queue is empty, sleep to avoid making continual requests + Get a single event from Redis and send it + If the Redis queue is empty, sleep to avoid making continual requests """ payload = self.rdb.lpop(self.key) if payload: @@ -74,7 +75,7 @@ def pop_payload(self) -> None: def run(self) -> None: """ - Run indefinitely + Run indefinitely """ self._shutdown = False @@ -84,11 +85,13 @@ def run(self) -> None: def request_shutdown(self, *args: Any) -> None: """ - Halt the worker + Halt the worker """ self._shutdown = True else: - def __new__(cls, *args: Any, **kwargs: Any) -> 'RedisWorker': - raise RuntimeError('RedisWorker is not available. To use: `pip install snowplow-tracker[redis]`') + def __new__(cls, *args: Any, **kwargs: Any) -> "RedisWorker": + raise RuntimeError( + "RedisWorker is not available. To use: `pip install snowplow-tracker[redis]`" + ) diff --git a/snowplow_tracker/self_describing_json.py b/snowplow_tracker/self_describing_json.py index 538408ca..b386c039 100644 --- a/snowplow_tracker/self_describing_json.py +++ b/snowplow_tracker/self_describing_json.py @@ -1,7 +1,7 @@ # """ # self_describing_json.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import json diff --git a/snowplow_tracker/snowplow.py b/snowplow_tracker/snowplow.py index 88f1e878..92e96461 100644 --- a/snowplow_tracker/snowplow.py +++ b/snowplow_tracker/snowplow.py @@ -1,7 +1,7 @@ # """ # snowplow.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,11 +13,8 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Jack Keene, Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ + import logging from typing import Optional from snowplow_tracker import ( diff --git a/snowplow_tracker/subject.py b/snowplow_tracker/subject.py index b5f3de83..10bcbe2a 100644 --- a/snowplow_tracker/subject.py +++ b/snowplow_tracker/subject.py @@ -1,7 +1,7 @@ # """ # subject.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ from snowplow_tracker.contracts import one_of, greater_than diff --git a/snowplow_tracker/test/integration/test_integration.py b/snowplow_tracker/test/integration/test_integration.py index 7e931388..7758a266 100644 --- a/snowplow_tracker/test/integration/test_integration.py +++ b/snowplow_tracker/test/integration/test_integration.py @@ -1,7 +1,7 @@ # """ # test_integration.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import unittest @@ -38,7 +34,7 @@ default_emitter = emitters.Emitter("localhost", protocol="http", port=80, batch_size=1) -get_emitter = emitters.Emitter("localhost", protocol="http", port=80, method='get') +get_emitter = emitters.Emitter("localhost", protocol="http", port=80, method="get") default_subject = subject.Subject() @@ -53,77 +49,113 @@ def from_querystring(field: str, url: str) -> Optional[str]: @all_requests def pass_response_content(url: str, request: Any) -> Dict[str, Any]: querystrings.append(request.url) - return { - "url": request.url, - "status_code": 200 - } + return {"url": request.url, "status_code": 200} @all_requests def pass_post_response_content(url: str, request: Any) -> Dict[str, Any]: querystrings.append(json.loads(request.body)) - return { - "url": request.url, - "status_code": 200 - } + return {"url": request.url, "status_code": 200} @all_requests def fail_response_content(url: str, request: Any) -> Dict[str, Any]: - return { - "url": request.url, - "status_code": 501 - } + return {"url": request.url, "status_code": 501} class IntegrationTest(unittest.TestCase): - def test_integration_page_view(self) -> None: t = tracker.Tracker([get_emitter], default_subject) with HTTMock(pass_response_content): - t.track_page_view("http://savethearctic.org", "Save The Arctic", "http://referrer.com") - expected_fields = {"e": "pv", "page": "Save+The+Arctic", "url": "http%3A%2F%2Fsavethearctic.org", "refr": "http%3A%2F%2Freferrer.com"} + t.track_page_view( + "http://savethearctic.org", "Save The Arctic", "http://referrer.com" + ) + expected_fields = { + "e": "pv", + "page": "Save+The+Arctic", + "url": "http%3A%2F%2Fsavethearctic.org", + "refr": "http%3A%2F%2Freferrer.com", + } for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) def test_integration_ecommerce_transaction_item(self) -> None: t = tracker.Tracker([get_emitter], default_subject) with HTTMock(pass_response_content): - t.track_ecommerce_transaction_item("12345", "pbz0025", 7.99, 2, "black-tarot", "tarot", currency="GBP") - expected_fields = {"ti_ca": "tarot", "ti_id": "12345", "ti_qu": "2", "ti_sk": "pbz0025", "e": "ti", "ti_nm": "black-tarot", "ti_pr": "7.99", "ti_cu": "GBP"} + t.track_ecommerce_transaction_item( + "12345", "pbz0025", 7.99, 2, "black-tarot", "tarot", currency="GBP" + ) + expected_fields = { + "ti_ca": "tarot", + "ti_id": "12345", + "ti_qu": "2", + "ti_sk": "pbz0025", + "e": "ti", + "ti_nm": "black-tarot", + "ti_pr": "7.99", + "ti_cu": "GBP", + } for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) def test_integration_ecommerce_transaction(self) -> None: t = tracker.Tracker([get_emitter], default_subject) with HTTMock(pass_response_content): t.track_ecommerce_transaction( - "6a8078be", 35, city="London", currency="GBP", + "6a8078be", + 35, + city="London", + currency="GBP", items=[ - { - "sku": "pbz0026", - "price": 20, - "quantity": 1 - }, - { - "sku": "pbz0038", - "price": 15, - "quantity": 1 - }]) + {"sku": "pbz0026", "price": 20, "quantity": 1}, + {"sku": "pbz0038", "price": 15, "quantity": 1}, + ], + ) - expected_fields = {"e": "tr", "tr_id": "6a8078be", "tr_tt": "35", "tr_ci": "London", "tr_cu": "GBP"} + expected_fields = { + "e": "tr", + "tr_id": "6a8078be", + "tr_tt": "35", + "tr_ci": "London", + "tr_cu": "GBP", + } for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-3]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-3]), expected_fields[key] + ) - expected_fields = {"e": "ti", "ti_id": "6a8078be", "ti_sk": "pbz0026", "ti_pr": "20", "ti_cu": "GBP"} + expected_fields = { + "e": "ti", + "ti_id": "6a8078be", + "ti_sk": "pbz0026", + "ti_pr": "20", + "ti_cu": "GBP", + } for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-2]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-2]), expected_fields[key] + ) - expected_fields = {"e": "ti", "ti_id": "6a8078be", "ti_sk": "pbz0038", "ti_pr": "15", "ti_cu": "GBP"} + expected_fields = { + "e": "ti", + "ti_id": "6a8078be", + "ti_sk": "pbz0038", + "ti_pr": "15", + "ti_cu": "GBP", + } for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) - self.assertEqual(from_querystring("ttm", querystrings[-3]), from_querystring("ttm", querystrings[-2])) + self.assertEqual( + from_querystring("ttm", querystrings[-3]), + from_querystring("ttm", querystrings[-2]), + ) def test_integration_mobile_screen_view(self) -> None: t = tracker.Tracker([get_emitter], default_subject, encode_base64=False) @@ -131,77 +163,168 @@ def test_integration_mobile_screen_view(self) -> None: t.track_mobile_screen_view("534", "Game HUD 2") expected_fields = {"e": "ue"} for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) envelope_string = from_querystring("ue_pr", querystrings[-1]) envelope = json.loads(unquote_plus(envelope_string)) - self.assertEqual(envelope, { - "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", - "data": { - "schema": "iglu:com.snowplowanalytics.mobile/screen_view/jsonschema/1-0-0", + self.assertEqual( + envelope, + { + "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", "data": { - "id": "534", - "name": "Game HUD 2" - } - } - }) + "schema": "iglu:com.snowplowanalytics.mobile/screen_view/jsonschema/1-0-0", + "data": {"id": "534", "name": "Game HUD 2"}, + }, + }, + ) def test_integration_struct_event(self) -> None: t = tracker.Tracker([get_emitter], default_subject) with HTTMock(pass_response_content): - t.track_struct_event("Ecomm", "add-to-basket", "dog-skateboarding-video", "hd", 13.99) - expected_fields = {"se_ca": "Ecomm", "se_pr": "hd", "se_la": "dog-skateboarding-video", "se_va": "13.99", "se_ac": "add-to-basket", "e": "se"} + t.track_struct_event( + "Ecomm", "add-to-basket", "dog-skateboarding-video", "hd", 13.99 + ) + expected_fields = { + "se_ca": "Ecomm", + "se_pr": "hd", + "se_la": "dog-skateboarding-video", + "se_va": "13.99", + "se_ac": "add-to-basket", + "e": "se", + } for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) def test_integration_self_describing_event_non_base64(self) -> None: t = tracker.Tracker([get_emitter], default_subject, encode_base64=False) with HTTMock(pass_response_content): - t.track_self_describing_event(SelfDescribingJson("iglu:com.acme/viewed_product/jsonschema/2-0-2", {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000})) + t.track_self_describing_event( + SelfDescribingJson( + "iglu:com.acme/viewed_product/jsonschema/2-0-2", + {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000}, + ) + ) expected_fields = {"e": "ue"} for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) envelope_string = from_querystring("ue_pr", querystrings[-1]) envelope = json.loads(unquote_plus(envelope_string)) - self.assertEqual(envelope, { - "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", - "data": {"schema": "iglu:com.acme/viewed_product/jsonschema/2-0-2", "data": {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000}} - }) + self.assertEqual( + envelope, + { + "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data": { + "schema": "iglu:com.acme/viewed_product/jsonschema/2-0-2", + "data": { + "product_id": "ASO01043", + "price$flt": 49.95, + "walrus$tms": 1000, + }, + }, + }, + ) def test_integration_self_describing_event_base64(self) -> None: t = tracker.Tracker([get_emitter], default_subject, encode_base64=True) with HTTMock(pass_response_content): - t.track_self_describing_event(SelfDescribingJson("iglu:com.acme/viewed_product/jsonschema/2-0-2", {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000})) + t.track_self_describing_event( + SelfDescribingJson( + "iglu:com.acme/viewed_product/jsonschema/2-0-2", + {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000}, + ) + ) expected_fields = {"e": "ue"} for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) envelope_string = unquote_plus(from_querystring("ue_px", querystrings[-1])) - envelope = json.loads((base64.urlsafe_b64decode(bytearray(envelope_string, "utf-8"))).decode("utf-8")) - self.assertEqual(envelope, { - "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", - "data": {"schema": "iglu:com.acme/viewed_product/jsonschema/2-0-2", "data": {"product_id": "ASO01043", "price$flt": 49.95, "walrus$tms": 1000}} - }) + envelope = json.loads( + (base64.urlsafe_b64decode(bytearray(envelope_string, "utf-8"))).decode( + "utf-8" + ) + ) + self.assertEqual( + envelope, + { + "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data": { + "schema": "iglu:com.acme/viewed_product/jsonschema/2-0-2", + "data": { + "product_id": "ASO01043", + "price$flt": 49.95, + "walrus$tms": 1000, + }, + }, + }, + ) def test_integration_context_non_base64(self) -> None: t = tracker.Tracker([get_emitter], default_subject, encode_base64=False) with HTTMock(pass_response_content): - t.track_page_view("localhost", "local host", None, [SelfDescribingJson("iglu:com.example/user/jsonschema/2-0-3", {"user_type": "tester"})]) + t.track_page_view( + "localhost", + "local host", + None, + [ + SelfDescribingJson( + "iglu:com.example/user/jsonschema/2-0-3", + {"user_type": "tester"}, + ) + ], + ) envelope_string = from_querystring("co", querystrings[-1]) envelope = json.loads(unquote_plus(envelope_string)) - self.assertEqual(envelope, { - "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1", - "data": [{"schema": "iglu:com.example/user/jsonschema/2-0-3", "data": {"user_type": "tester"}}] - }) + self.assertEqual( + envelope, + { + "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1", + "data": [ + { + "schema": "iglu:com.example/user/jsonschema/2-0-3", + "data": {"user_type": "tester"}, + } + ], + }, + ) def test_integration_context_base64(self) -> None: t = tracker.Tracker([get_emitter], default_subject, encode_base64=True) with HTTMock(pass_response_content): - t.track_page_view("localhost", "local host", None, [SelfDescribingJson("iglu:com.example/user/jsonschema/2-0-3", {"user_type": "tester"})]) + t.track_page_view( + "localhost", + "local host", + None, + [ + SelfDescribingJson( + "iglu:com.example/user/jsonschema/2-0-3", + {"user_type": "tester"}, + ) + ], + ) envelope_string = unquote_plus(from_querystring("cx", querystrings[-1])) - envelope = json.loads((base64.urlsafe_b64decode(bytearray(envelope_string, "utf-8"))).decode("utf-8")) - self.assertEqual(envelope, { - "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1", - "data": [{"schema": "iglu:com.example/user/jsonschema/2-0-3", "data": {"user_type": "tester"}}] - }) + envelope = json.loads( + (base64.urlsafe_b64decode(bytearray(envelope_string, "utf-8"))).decode( + "utf-8" + ) + ) + self.assertEqual( + envelope, + { + "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1", + "data": [ + { + "schema": "iglu:com.example/user/jsonschema/2-0-3", + "data": {"user_type": "tester"}, + } + ], + }, + ) def test_integration_standard_nv_pairs(self) -> None: s = subject.Subject() @@ -212,14 +335,28 @@ def test_integration_standard_nv_pairs(self) -> None: s.set_timezone("Europe London") s.set_lang("en") - t = tracker.Tracker([emitters.Emitter("localhost", method='get')], s, "cf", app_id="angry-birds-android") + t = tracker.Tracker( + [emitters.Emitter("localhost", method="get")], + s, + "cf", + app_id="angry-birds-android", + ) with HTTMock(pass_response_content): t.track_page_view("localhost", "local host") - expected_fields = {"tna": "cf", "res": "100x200", - "lang": "en", "aid": "angry-birds-android", "cd": "24", "tz": "Europe+London", - "p": "mob", "tv": "py-" + _version.__version__} + expected_fields = { + "tna": "cf", + "res": "100x200", + "lang": "en", + "aid": "angry-birds-android", + "cd": "24", + "tz": "Europe+London", + "p": "mob", + "tv": "py-" + _version.__version__, + } for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) self.assertIsNotNone(from_querystring("eid", querystrings[-1])) self.assertIsNotNone(from_querystring("dtm", querystrings[-1])) @@ -229,10 +366,17 @@ def test_integration_identification_methods(self) -> None: s.set_domain_session_id("59ed13b1a5724dae") s.set_domain_session_index(1) s.set_ip_address("255.255.255.255") - s.set_useragent("Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0)") + s.set_useragent( + "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0)" + ) s.set_network_user_id("fbc6c76c-bce5-43ce-8d5a-31c5") - t = tracker.Tracker([emitters.Emitter("localhost", method='get')], s, "cf", app_id="angry-birds-android") + t = tracker.Tracker( + [emitters.Emitter("localhost", method="get")], + s, + "cf", + app_id="angry-birds-android", + ) with HTTMock(pass_response_content): t.track_page_view("localhost", "local host") expected_fields = { @@ -241,30 +385,39 @@ def test_integration_identification_methods(self) -> None: "vid": "1", "ip": "255.255.255.255", "ua": "Mozilla%2F5.0+%28compatible%3B+MSIE+9.0%3B+Windows+NT+6.0%3B+Trident%2F5.0%29", - "tnuid": "fbc6c76c-bce5-43ce-8d5a-31c5" + "tnuid": "fbc6c76c-bce5-43ce-8d5a-31c5", } for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) def test_integration_event_subject(self) -> None: s = subject.Subject() s.set_domain_user_id("4616bfb38f872d16") s.set_lang("ES") - t = tracker.Tracker([emitters.Emitter("localhost", method='get')], s, "cf", app_id="angry-birds-android") - evSubject = subject.Subject().set_domain_user_id("1111aaa11a111a11").set_lang("EN") + t = tracker.Tracker( + [emitters.Emitter("localhost", method="get")], + s, + "cf", + app_id="angry-birds-android", + ) + evSubject = ( + subject.Subject().set_domain_user_id("1111aaa11a111a11").set_lang("EN") + ) with HTTMock(pass_response_content): t.track_page_view("localhost", "local host", event_subject=evSubject) - expected_fields = { - "duid": "1111aaa11a111a11", - "lang": "EN" - } + expected_fields = {"duid": "1111aaa11a111a11", "lang": "EN"} for key in expected_fields: - self.assertEqual(from_querystring(key, querystrings[-1]), expected_fields[key]) + self.assertEqual( + from_querystring(key, querystrings[-1]), expected_fields[key] + ) def test_integration_redis_default(self) -> None: try: import fakeredis + r = fakeredis.FakeStrictRedis() t = tracker.Tracker([redis_emitter.RedisEmitter(rdb=r)], default_subject) t.track_page_view("http://www.example.com") @@ -278,8 +431,11 @@ def test_integration_redis_default(self) -> None: def test_integration_redis_custom(self) -> None: try: import fakeredis + r = fakeredis.FakeStrictRedis() - t = tracker.Tracker([redis_emitter.RedisEmitter(rdb=r, key="custom_key")], default_subject) + t = tracker.Tracker( + [redis_emitter.RedisEmitter(rdb=r, key="custom_key")], default_subject + ) t.track_page_view("http://www.example.com") event_string = r.rpop("custom_key") event_dict = json.loads(event_string.decode("utf-8")) @@ -293,9 +449,10 @@ def test_integration_success_callback(self) -> None: callback_failure_queue = [] callback_emitter = emitters.Emitter( "localhost", - method='get', + method="get", on_success=lambda x: callback_success_queue.append(x), - on_failure=lambda x, y: callback_failure_queue.append(x)) + on_failure=lambda x, y: callback_failure_queue.append(x), + ) t = tracker.Tracker([callback_emitter], default_subject) with HTTMock(pass_response_content): t.track_page_view("http://www.example.com") @@ -313,9 +470,10 @@ def test_integration_failure_callback(self) -> None: callback_failure_queue = [] callback_emitter = emitters.Emitter( "localhost", - method='get', + method="get", on_success=lambda x: callback_success_queue.append(x), - on_failure=lambda x, y: callback_failure_queue.append(x)) + on_failure=lambda x, y: callback_failure_queue.append(x), + ) t = tracker.Tracker([callback_emitter], default_subject) with HTTMock(fail_response_content): t.track_page_view("http://www.example.com") @@ -328,12 +486,17 @@ def test_post_page_view(self) -> None: t.track_page_view("localhost", "local host", None) expected_fields = {"e": "pv", "page": "local host", "url": "localhost"} request = querystrings[-1] - self.assertEqual(request["schema"], "iglu:com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4") + self.assertEqual( + request["schema"], + "iglu:com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4", + ) for key in expected_fields: self.assertEqual(request["data"][0][key], expected_fields[key]) def test_post_batched(self) -> None: - default_emitter = emitters.Emitter("localhost", protocol="http", port=80, batch_size=2) + default_emitter = emitters.Emitter( + "localhost", protocol="http", port=80, batch_size=2 + ) t = tracker.Tracker(default_emitter, default_subject) with HTTMock(pass_post_response_content): t.track_struct_event("Test", "A") @@ -353,65 +516,77 @@ def test_timestamps(self) -> None: expected_timestamps = [ {"dtm": "1618790401000", "ttm": None, "stm": "1618790401000"}, {"dtm": "1618790401000", "ttm": "1358933694000", "stm": "1618790401000"}, - {"dtm": "1618790401000", "ttm": "1358933694000", "stm": "1618790401000"} + {"dtm": "1618790401000", "ttm": "1358933694000", "stm": "1618790401000"}, ] request = querystrings[-1] for i, event in enumerate(expected_timestamps): - self.assertEqual(request["data"][i].get("dtm"), expected_timestamps[i]["dtm"]) - self.assertEqual(request["data"][i].get("ttm"), expected_timestamps[i]["ttm"]) - self.assertEqual(request["data"][i].get("stm"), expected_timestamps[i]["stm"]) + self.assertEqual( + request["data"][i].get("dtm"), expected_timestamps[i]["dtm"] + ) + self.assertEqual( + request["data"][i].get("ttm"), expected_timestamps[i]["ttm"] + ) + self.assertEqual( + request["data"][i].get("stm"), expected_timestamps[i]["stm"] + ) def test_bytelimit(self) -> None: - default_emitter = emitters.Emitter("localhost", protocol="http", port=80, batch_size=5, byte_limit=420) + default_emitter = emitters.Emitter( + "localhost", protocol="http", port=80, batch_size=5, byte_limit=420 + ) t = tracker.Tracker(default_emitter, default_subject) with HTTMock(pass_post_response_content): - t.track_struct_event("Test", "A") # 140 bytes - t.track_struct_event("Test", "A") # 280 bytes - t.track_struct_event("Test", "A") # 420 bytes. Send - t.track_struct_event("Test", "AA") # 141 + t.track_struct_event("Test", "A") # 140 bytes + t.track_struct_event("Test", "A") # 280 bytes + t.track_struct_event("Test", "A") # 420 bytes. Send + t.track_struct_event("Test", "AA") # 141 self.assertEqual(len(querystrings[-1]["data"]), 3) self.assertEqual(default_emitter.bytes_queued, 136 + len(_version.__version__)) def test_unicode_get(self) -> None: t = tracker.Tracker([get_emitter], default_subject, encode_base64=False) - unicode_a = u'\u0107' - unicode_b = u'test.\u0107om' - test_ctx = SelfDescribingJson('iglu:a.b/c/jsonschema/1-0-0', {'test': unicode_a}) + unicode_a = "\u0107" + unicode_b = "test.\u0107om" + test_ctx = SelfDescribingJson( + "iglu:a.b/c/jsonschema/1-0-0", {"test": unicode_a} + ) with HTTMock(pass_response_content): t.track_page_view(unicode_b, context=[test_ctx]) t.track_mobile_screen_view(unicode_b, context=[test_ctx]) url_string = unquote_plus(from_querystring("url", querystrings[-2])) try: - self.assertEqual(url_string.decode('utf-8'), unicode_b) + self.assertEqual(url_string.decode("utf-8"), unicode_b) except AttributeError: # in python 3: str type contains unicode (so no 'decode') self.assertEqual(url_string, unicode_b) context_string = unquote_plus(from_querystring("co", querystrings[-1])) - actual_a = json.loads(context_string)['data'][0]['data']['test'] + actual_a = json.loads(context_string)["data"][0]["data"]["test"] self.assertEqual(actual_a, unicode_a) uepr_string = unquote_plus(from_querystring("ue_pr", querystrings[-1])) - actual_b = json.loads(uepr_string)['data']['data']['id'] + actual_b = json.loads(uepr_string)["data"]["data"]["id"] self.assertEqual(actual_b, unicode_b) def test_unicode_post(self) -> None: t = tracker.Tracker([default_emitter], default_subject, encode_base64=False) - unicode_a = u'\u0107' - unicode_b = u'test.\u0107om' - test_ctx = SelfDescribingJson('iglu:a.b/c/jsonschema/1-0-0', {'test': unicode_a}) + unicode_a = "\u0107" + unicode_b = "test.\u0107om" + test_ctx = SelfDescribingJson( + "iglu:a.b/c/jsonschema/1-0-0", {"test": unicode_a} + ) with HTTMock(pass_post_response_content): t.track_page_view(unicode_b, context=[test_ctx]) t.track_mobile_screen_view(unicode_b, context=[test_ctx]) pv_event = querystrings[-2] - self.assertEqual(pv_event['data'][0]['url'], unicode_b) + self.assertEqual(pv_event["data"][0]["url"], unicode_b) - in_test_ctx = json.loads(pv_event['data'][0]['co'])['data'][0]['data']['test'] + in_test_ctx = json.loads(pv_event["data"][0]["co"])["data"][0]["data"]["test"] self.assertEqual(in_test_ctx, unicode_a) sv_event = querystrings[-1] - in_uepr_name = json.loads(sv_event['data'][0]['ue_pr'])['data']['data']['id'] + in_uepr_name = json.loads(sv_event["data"][0]["ue_pr"])["data"]["data"]["id"] self.assertEqual(in_uepr_name, unicode_b) diff --git a/snowplow_tracker/test/unit/test_contracts.py b/snowplow_tracker/test/unit/test_contracts.py index 98c4a154..9a913bca 100644 --- a/snowplow_tracker/test/unit/test_contracts.py +++ b/snowplow_tracker/test/unit/test_contracts.py @@ -1,7 +1,7 @@ # """ # test_tracker.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,19 +13,21 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import unittest -from snowplow_tracker.contracts import form_element, greater_than, non_empty, non_empty_string, one_of, satisfies +from snowplow_tracker.contracts import ( + form_element, + greater_than, + non_empty, + non_empty_string, + one_of, + satisfies, +) class TestContracts(unittest.TestCase): - def setUp(self) -> None: pass @@ -37,25 +39,25 @@ def test_greater_than_fails(self) -> None: greater_than(0, 10) def test_non_empty_succeeds(self) -> None: - non_empty(['something']) + non_empty(["something"]) def test_non_empty_fails(self) -> None: with self.assertRaises(ValueError): non_empty([]) def test_non_empty_string_succeeds(self) -> None: - non_empty_string('something') + non_empty_string("something") def test_non_empty_string_fails(self) -> None: with self.assertRaises(ValueError): - non_empty_string('') + non_empty_string("") def test_one_of_succeeds(self) -> None: - one_of('something', ['something', 'something else']) + one_of("something", ["something", "something else"]) def test_one_of_fails(self) -> None: with self.assertRaises(ValueError): - one_of('something', ['something else']) + one_of("something", ["something else"]) def test_satisfies_succeeds(self) -> None: satisfies(10, lambda v: v == 10) @@ -65,11 +67,7 @@ def test_satisfies_fails(self) -> None: satisfies(0, lambda v: v == 10) def test_form_element_no_type(self) -> None: - elem = { - "name": "elemName", - "value": "elemValue", - "nodeName": "INPUT" - } + elem = {"name": "elemName", "value": "elemValue", "nodeName": "INPUT"} form_element(elem) def test_form_element_type_valid(self) -> None: @@ -77,7 +75,7 @@ def test_form_element_type_valid(self) -> None: "name": "elemName", "value": "elemValue", "nodeName": "TEXTAREA", - "type": "button" + "type": "button", } form_element(elem) @@ -86,40 +84,27 @@ def test_form_element_type_invalid(self) -> None: "name": "elemName", "value": "elemValue", "nodeName": "SELECT", - "type": "invalid" + "type": "invalid", } with self.assertRaises(ValueError): form_element(elem) def test_form_element_nodename_invalid(self) -> None: - elem = { - "name": "elemName", - "value": "elemValue", - "nodeName": "invalid" - } + elem = {"name": "elemName", "value": "elemValue", "nodeName": "invalid"} with self.assertRaises(ValueError): form_element(elem) def test_form_element_no_nodename(self) -> None: - elem = { - "name": "elemName", - "value": "elemValue" - } + elem = {"name": "elemName", "value": "elemValue"} with self.assertRaises(ValueError): form_element(elem) def test_form_element_no_value(self) -> None: - elem = { - "name": "elemName", - "nodeName": "INPUT" - } + elem = {"name": "elemName", "nodeName": "INPUT"} with self.assertRaises(ValueError): form_element(elem) def test_form_element_no_name(self) -> None: - elem = { - "value": "elemValue", - "nodeName": "INPUT" - } + elem = {"value": "elemValue", "nodeName": "INPUT"} with self.assertRaises(ValueError): form_element(elem) diff --git a/snowplow_tracker/test/unit/test_emitters.py b/snowplow_tracker/test/unit/test_emitters.py index c52eb0b2..f02be943 100644 --- a/snowplow_tracker/test/unit/test_emitters.py +++ b/snowplow_tracker/test/unit/test_emitters.py @@ -1,7 +1,7 @@ # """ # test_emitters.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,13 +13,8 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ - import time import unittest import unittest.mock as mock @@ -46,24 +41,29 @@ def mocked_http_success(*args: Any) -> bool: def mocked_http_failure(*args: Any) -> bool: return False + def mocked_http_response_success(*args: Any) -> int: return 200 + def mocked_http_response_failure(*args: Any) -> int: return 400 + def mocked_http_response_failure_retry(*args: Any) -> int: return 500 -class TestEmitters(unittest.TestCase): +class TestEmitters(unittest.TestCase): def setUp(self) -> None: pass def test_init(self) -> None: - e = Emitter('0.0.0.0') - self.assertEqual(e.endpoint, 'https://0.0.0.0/com.snowplowanalytics.snowplow/tp2') - self.assertEqual(e.method, 'post') + e = Emitter("0.0.0.0") + self.assertEqual( + e.endpoint, "https://0.0.0.0/com.snowplowanalytics.snowplow/tp2" + ) + self.assertEqual(e.method, "post") self.assertEqual(e.batch_size, 10) self.assertEqual(e.event_store.event_buffer, []) self.assertIsNone(e.byte_limit) @@ -74,40 +74,40 @@ def test_init(self) -> None: self.assertIsNone(e.request_timeout) def test_init_batch_size(self) -> None: - e = Emitter('0.0.0.0', batch_size=10) + e = Emitter("0.0.0.0", batch_size=10) self.assertEqual(e.batch_size, 10) def test_init_post(self) -> None: - e = Emitter('0.0.0.0') + e = Emitter("0.0.0.0") self.assertEqual(e.batch_size, DEFAULT_MAX_LENGTH) def test_init_byte_limit(self) -> None: - e = Emitter('0.0.0.0', byte_limit=512) + e = Emitter("0.0.0.0", byte_limit=512) self.assertEqual(e.bytes_queued, 0) def test_init_requests_timeout(self) -> None: - e = Emitter('0.0.0.0', request_timeout=(2.5, 5)) + e = Emitter("0.0.0.0", request_timeout=(2.5, 5)) self.assertEqual(e.request_timeout, (2.5, 5)) def test_as_collector_uri(self) -> None: - uri = Emitter.as_collector_uri('0.0.0.0') - self.assertEqual(uri, 'https://0.0.0.0/com.snowplowanalytics.snowplow/tp2') + uri = Emitter.as_collector_uri("0.0.0.0") + self.assertEqual(uri, "https://0.0.0.0/com.snowplowanalytics.snowplow/tp2") def test_as_collector_uri_get(self) -> None: - uri = Emitter.as_collector_uri('0.0.0.0', method='get') - self.assertEqual(uri, 'https://0.0.0.0/i') + uri = Emitter.as_collector_uri("0.0.0.0", method="get") + self.assertEqual(uri, "https://0.0.0.0/i") def test_as_collector_uri_port(self) -> None: - uri = Emitter.as_collector_uri('0.0.0.0', port=9090) - self.assertEqual(uri, 'https://0.0.0.0:9090/com.snowplowanalytics.snowplow/tp2') + uri = Emitter.as_collector_uri("0.0.0.0", port=9090) + self.assertEqual(uri, "https://0.0.0.0:9090/com.snowplowanalytics.snowplow/tp2") def test_as_collector_uri_http(self) -> None: - uri = Emitter.as_collector_uri('0.0.0.0', protocol="http") - self.assertEqual(uri, 'http://0.0.0.0/com.snowplowanalytics.snowplow/tp2') + uri = Emitter.as_collector_uri("0.0.0.0", protocol="http") + self.assertEqual(uri, "http://0.0.0.0/com.snowplowanalytics.snowplow/tp2") def test_as_collector_uri_empty_string(self) -> None: with self.assertRaises(ValueError): - Emitter.as_collector_uri('') + Emitter.as_collector_uri("") def test_as_collector_uri_endpoint_protocol(self) -> None: uri = Emitter.as_collector_uri("https://0.0.0.0") @@ -116,12 +116,12 @@ def test_as_collector_uri_endpoint_protocol(self) -> None: def test_as_collector_uri_endpoint_protocol_http(self) -> None: uri = Emitter.as_collector_uri("http://0.0.0.0") self.assertEqual(uri, "http://0.0.0.0/com.snowplowanalytics.snowplow/tp2") - - @mock.patch('snowplow_tracker.Emitter.flush') + + @mock.patch("snowplow_tracker.Emitter.flush") def test_input_no_flush(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', method="get", batch_size=2) + e = Emitter("0.0.0.0", method="get", batch_size=2) nvPairs = {"n0": "v0", "n1": "v1"} e.input(nvPairs) @@ -131,11 +131,11 @@ def test_input_no_flush(self, mok_flush: Any) -> None: self.assertFalse(e.reached_limit()) mok_flush.assert_not_called() - @mock.patch('snowplow_tracker.Emitter.flush') + @mock.patch("snowplow_tracker.Emitter.flush") def test_input_flush_byte_limit(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', method="get", batch_size=2, byte_limit=16) + e = Emitter("0.0.0.0", method="get", batch_size=2, byte_limit=16) nvPairs = {"n0": "v0", "n1": "v1"} e.input(nvPairs) @@ -144,11 +144,11 @@ def test_input_flush_byte_limit(self, mok_flush: Any) -> None: self.assertTrue(e.reached_limit()) self.assertEqual(mok_flush.call_count, 1) - @mock.patch('snowplow_tracker.Emitter.flush') + @mock.patch("snowplow_tracker.Emitter.flush") def test_input_flush_buffer(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', method="get", batch_size=2, byte_limit=1024) + e = Emitter("0.0.0.0", method="get", batch_size=2, byte_limit=1024) nvPairs = {"n0": "v0", "n1": "v1"} e.input(nvPairs) @@ -163,11 +163,11 @@ def test_input_flush_buffer(self, mok_flush: Any) -> None: self.assertTrue(e.reached_limit()) self.assertEqual(mok_flush.call_count, 1) - @mock.patch('snowplow_tracker.Emitter.flush') + @mock.patch("snowplow_tracker.Emitter.flush") def test_input_bytes_queued(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', method="get", batch_size=2, byte_limit=1024) + e = Emitter("0.0.0.0", method="get", batch_size=2, byte_limit=1024) nvPairs = {"n0": "v0", "n1": "v1"} e.input(nvPairs) @@ -177,21 +177,23 @@ def test_input_bytes_queued(self, mok_flush: Any) -> None: e.input(nvPairs) self.assertEqual(e.bytes_queued, 48) - @mock.patch('snowplow_tracker.Emitter.flush') + @mock.patch("snowplow_tracker.Emitter.flush") def test_input_bytes_post(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0') + e = Emitter("0.0.0.0") nvPairs = {"testString": "test", "testNum": 2.72} e.input(nvPairs) - self.assertEqual(e.event_store.event_buffer, [{"testString": "test", "testNum": "2.72"}]) + self.assertEqual( + e.event_store.event_buffer, [{"testString": "test", "testNum": "2.72"}] + ) - @mock.patch('snowplow_tracker.Emitter.http_post') + @mock.patch("snowplow_tracker.Emitter.http_post") def test_flush(self, mok_send_events: Any) -> None: mok_send_events.side_effect = mocked_http_response_success - e = Emitter('0.0.0.0', batch_size=2, byte_limit=None) + e = Emitter("0.0.0.0", batch_size=2, byte_limit=None) nvPairs = {"n": "v"} e.input(nvPairs) e.input(nvPairs) @@ -199,11 +201,11 @@ def test_flush(self, mok_send_events: Any) -> None: self.assertEqual(mok_send_events.call_count, 1) self.assertEqual(len(e.event_store.event_buffer), 0) - @mock.patch('snowplow_tracker.Emitter.http_post') + @mock.patch("snowplow_tracker.Emitter.http_post") def test_flush_bytes_queued(self, mok_send_events: Any) -> None: mok_send_events.side_effect = mocked_http_response_success - e = Emitter('0.0.0.0', batch_size=2, byte_limit=256) + e = Emitter("0.0.0.0", batch_size=2, byte_limit=256) nvPairs = {"n": "v"} e.input(nvPairs) e.input(nvPairs) @@ -214,7 +216,7 @@ def test_flush_bytes_queued(self, mok_send_events: Any) -> None: @freeze_time("2021-04-14 00:00:02") # unix: 1618358402000 def test_attach_sent_tstamp(self) -> None: - e = Emitter('0.0.0.0') + e = Emitter("0.0.0.0") ev_list = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.attach_sent_timestamp(ev_list) @@ -223,11 +225,11 @@ def test_attach_sent_tstamp(self) -> None: reduced = reduced and "stm" in ev.keys() and ev["stm"] == "1618358402000" self.assertTrue(reduced) - @mock.patch('snowplow_tracker.Emitter.flush') + @mock.patch("snowplow_tracker.Emitter.flush") def test_flush_timer(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - e = Emitter('0.0.0.0', batch_size=10) + e = Emitter("0.0.0.0", batch_size=10) ev_list = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] for i in ev_list: e.input(i) @@ -237,71 +239,87 @@ def test_flush_timer(self, mok_flush: Any) -> None: time.sleep(5) self.assertGreaterEqual(mok_flush.call_count, 1) - @mock.patch('snowplow_tracker.Emitter.http_get') + @mock.patch("snowplow_tracker.Emitter.http_get") def test_send_events_get_success(self, mok_http_get: Any) -> None: mok_http_get.side_effect = mocked_http_response_success mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', method="get", batch_size=10, on_success=mok_success, on_failure=mok_failure) + e = Emitter( + "0.0.0.0", + method="get", + batch_size=10, + on_success=mok_success, + on_failure=mok_failure, + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) mok_success.assert_called_once_with(evBuffer) mok_failure.assert_not_called() - @mock.patch('snowplow_tracker.Emitter.http_get') + @mock.patch("snowplow_tracker.Emitter.http_get") def test_send_events_get_failure(self, mok_http_get: Any) -> None: mok_http_get.side_effect = mocked_http_response_failure mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', method="get", batch_size=10, on_success=mok_success, on_failure=mok_failure) + e = Emitter( + "0.0.0.0", + method="get", + batch_size=10, + on_success=mok_success, + on_failure=mok_failure, + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) mok_success.assert_not_called() mok_failure.assert_called_once_with(0, evBuffer) - @mock.patch('snowplow_tracker.Emitter.http_post') + @mock.patch("snowplow_tracker.Emitter.http_post") def test_send_events_post_success(self, mok_http_post: Any) -> None: mok_http_post.side_effect = mocked_http_response_success mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', batch_size=10, on_success=mok_success, on_failure=mok_failure) + e = Emitter( + "0.0.0.0", batch_size=10, on_success=mok_success, on_failure=mok_failure + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) mok_success.assert_called_once_with(evBuffer) mok_failure.assert_not_called() - @mock.patch('snowplow_tracker.Emitter.http_post') + @mock.patch("snowplow_tracker.Emitter.http_post") def test_send_events_post_failure(self, mok_http_post: Any) -> None: mok_http_post.side_effect = mocked_http_response_failure mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', batch_size=10, on_success=mok_success, on_failure=mok_failure) + e = Emitter( + "0.0.0.0", batch_size=10, on_success=mok_success, on_failure=mok_failure + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) mok_success.assert_not_called() mok_failure.assert_called_with(0, evBuffer) - @mock.patch('snowplow_tracker.emitters.requests.post') + @mock.patch("snowplow_tracker.emitters.requests.post") def test_http_post_connect_timeout_error(self, mok_post_request: Any) -> None: mok_post_request.side_effect = ConnectTimeout - e = Emitter('0.0.0.0') + e = Emitter("0.0.0.0") response = e.http_post("dummy_string") post_succeeded = Emitter.is_good_status_code(response) self.assertFalse(post_succeeded) - @mock.patch('snowplow_tracker.emitters.requests.post') + @mock.patch("snowplow_tracker.emitters.requests.post") def test_http_get_connect_timeout_error(self, mok_post_request: Any) -> None: mok_post_request.side_effect = ConnectTimeout - e = Emitter('0.0.0.0', method='get') + e = Emitter("0.0.0.0", method="get") response = e.http_get({"a": "b"}) get_succeeded = Emitter.is_good_status_code(response) self.assertFalse(get_succeeded) @@ -309,11 +327,13 @@ def test_http_get_connect_timeout_error(self, mok_post_request: Any) -> None: ### # AsyncEmitter ### - @mock.patch('snowplow_tracker.AsyncEmitter.flush') + @mock.patch("snowplow_tracker.AsyncEmitter.flush") def test_async_emitter_input(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - ae = AsyncEmitter('0.0.0.0', port=9090, method="get", batch_size=3, thread_count=5) + ae = AsyncEmitter( + "0.0.0.0", port=9090, method="get", batch_size=3, thread_count=5 + ) self.assertTrue(ae.queue.empty()) ae.input({"a": "aa"}) @@ -325,11 +345,18 @@ def test_async_emitter_input(self, mok_flush: Any) -> None: ae.input({"c": "cc"}) # meet buffer size self.assertEqual(mok_flush.call_count, 1) - @mock.patch('snowplow_tracker.AsyncEmitter.send_events') + @mock.patch("snowplow_tracker.AsyncEmitter.send_events") def test_async_emitter_sync_flash(self, mok_send_events: Any) -> None: mok_send_events.side_effect = mocked_send_events - ae = AsyncEmitter('0.0.0.0', port=9090, method="get", batch_size=3, thread_count=5, byte_limit=1024) + ae = AsyncEmitter( + "0.0.0.0", + port=9090, + method="get", + batch_size=3, + thread_count=5, + byte_limit=1024, + ) self.assertTrue(ae.queue.empty()) ae.input({"a": "aa"}) @@ -343,52 +370,68 @@ def test_async_emitter_sync_flash(self, mok_send_events: Any) -> None: self.assertEqual(ae.bytes_queued, 0) self.assertEqual(mok_send_events.call_count, 1) - @mock.patch('snowplow_tracker.Emitter.http_get') + @mock.patch("snowplow_tracker.Emitter.http_get") def test_async_send_events_get_success(self, mok_http_get: Any) -> None: mok_http_get.side_effect = mocked_http_response_success mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - ae = AsyncEmitter('0.0.0.0', method="get", batch_size=10, on_success=mok_success, on_failure=mok_failure) + ae = AsyncEmitter( + "0.0.0.0", + method="get", + batch_size=10, + on_success=mok_success, + on_failure=mok_failure, + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] ae.send_events(evBuffer) mok_success.assert_called_once_with(evBuffer) mok_failure.assert_not_called() - @mock.patch('snowplow_tracker.Emitter.http_get') + @mock.patch("snowplow_tracker.Emitter.http_get") def test_async_send_events_get_failure(self, mok_http_get: Any) -> None: mok_http_get.side_effect = mocked_http_response_failure mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - ae = AsyncEmitter('0.0.0.0', method="get", batch_size=10, on_success=mok_success, on_failure=mok_failure) + ae = AsyncEmitter( + "0.0.0.0", + method="get", + batch_size=10, + on_success=mok_success, + on_failure=mok_failure, + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] ae.send_events(evBuffer) mok_success.assert_not_called() mok_failure.assert_called_once_with(0, evBuffer) - @mock.patch('snowplow_tracker.Emitter.http_post') + @mock.patch("snowplow_tracker.Emitter.http_post") def test_async_send_events_post_success(self, mok_http_post: Any) -> None: mok_http_post.side_effect = mocked_http_response_success mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - ae = Emitter('0.0.0.0', batch_size=10, on_success=mok_success, on_failure=mok_failure) + ae = Emitter( + "0.0.0.0", batch_size=10, on_success=mok_success, on_failure=mok_failure + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] ae.send_events(evBuffer) mok_success.assert_called_once_with(evBuffer) mok_failure.assert_not_called() - @mock.patch('snowplow_tracker.Emitter.http_post') + @mock.patch("snowplow_tracker.Emitter.http_post") def test_async_send_events_post_failure(self, mok_http_post: Any) -> None: mok_http_post.side_effect = mocked_http_response_failure mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - ae = Emitter('0.0.0.0', batch_size=10, on_success=mok_success, on_failure=mok_failure) + ae = Emitter( + "0.0.0.0", batch_size=10, on_success=mok_success, on_failure=mok_failure + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] ae.send_events(evBuffer) @@ -396,115 +439,147 @@ def test_async_send_events_post_failure(self, mok_http_post: Any) -> None: mok_failure.assert_called_with(0, evBuffer) # Unicode - @mock.patch('snowplow_tracker.AsyncEmitter.flush') + @mock.patch("snowplow_tracker.AsyncEmitter.flush") def test_input_unicode_get(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - payload = {"unicode": u'\u0107', "alsoAscii": "abc"} - ae = AsyncEmitter('0.0.0.0', method="get", batch_size=2) + payload = {"unicode": "\u0107", "alsoAscii": "abc"} + ae = AsyncEmitter("0.0.0.0", method="get", batch_size=2) ae.input(payload) self.assertEqual(len(ae.event_store.event_buffer), 1) self.assertDictEqual(payload, ae.event_store.event_buffer[0]) - @mock.patch('snowplow_tracker.AsyncEmitter.flush') + @mock.patch("snowplow_tracker.AsyncEmitter.flush") def test_input_unicode_post(self, mok_flush: Any) -> None: mok_flush.side_effect = mocked_flush - payload = {"unicode": u'\u0107', "alsoAscii": "abc"} - ae = AsyncEmitter('0.0.0.0', batch_size=2) + payload = {"unicode": "\u0107", "alsoAscii": "abc"} + ae = AsyncEmitter("0.0.0.0", batch_size=2) ae.input(payload) self.assertEqual(len(ae.event_store.event_buffer), 1) self.assertDictEqual(payload, ae.event_store.event_buffer[0]) - @mock.patch('snowplow_tracker.Emitter.http_post') + @mock.patch("snowplow_tracker.Emitter.http_post") def test_send_events_post_retry(self, mok_http_post: Any) -> None: mok_http_post.side_effect = mocked_http_response_failure_retry mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', batch_size=10, on_success=mok_success, on_failure=mok_failure) + e = Emitter( + "0.0.0.0", batch_size=10, on_success=mok_success, on_failure=mok_failure + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) - + mok_http_post.side_effect = mocked_http_response_success time.sleep(5) mok_failure.assert_called_with(0, evBuffer) mok_success.assert_called_with(evBuffer) - @mock.patch('snowplow_tracker.Emitter.http_get') + @mock.patch("snowplow_tracker.Emitter.http_get") def test_send_events_get_retry(self, mok_http_get: Any) -> None: mok_http_get.side_effect = mocked_http_response_failure_retry mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', method='get', batch_size=1, on_success=mok_success, on_failure=mok_failure) + e = Emitter( + "0.0.0.0", + method="get", + batch_size=1, + on_success=mok_success, + on_failure=mok_failure, + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) - + mok_http_get.side_effect = mocked_http_response_success time.sleep(5) mok_failure.assert_called_with(0, evBuffer) mok_success.assert_called_with(evBuffer) - @mock.patch('snowplow_tracker.Emitter.http_get') + @mock.patch("snowplow_tracker.Emitter.http_get") def test_send_events_get_no_retry(self, mok_http_get: Any) -> None: mok_http_get.side_effect = mocked_http_response_failure mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', method='get', batch_size=1, on_success=mok_success, on_failure=mok_failure) + e = Emitter( + "0.0.0.0", + method="get", + batch_size=1, + on_success=mok_success, + on_failure=mok_failure, + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) - + mok_failure.assert_called_once_with(0, evBuffer) mok_success.assert_not_called() - @mock.patch('snowplow_tracker.Emitter.http_post') + @mock.patch("snowplow_tracker.Emitter.http_post") def test_send_events_post_no_retry(self, mok_http_post: Any) -> None: mok_http_post.side_effect = mocked_http_response_failure mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', method='get', batch_size=1, on_success=mok_success, on_failure=mok_failure) + e = Emitter( + "0.0.0.0", + method="get", + batch_size=1, + on_success=mok_success, + on_failure=mok_failure, + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) - + mok_failure.assert_called_once_with(0, evBuffer) mok_success.assert_not_called() - @mock.patch('snowplow_tracker.Emitter.http_post') + @mock.patch("snowplow_tracker.Emitter.http_post") def test_send_events_post_custom_retry(self, mok_http_post: Any) -> None: mok_http_post.side_effect = mocked_http_response_failure mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', batch_size=10, on_success=mok_success, on_failure=mok_failure, custom_retry_codes={400: True}) + e = Emitter( + "0.0.0.0", + batch_size=10, + on_success=mok_success, + on_failure=mok_failure, + custom_retry_codes={400: True}, + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) - + mok_http_post.side_effect = mocked_http_response_success time.sleep(5) mok_failure.assert_called_with(0, evBuffer) mok_success.assert_called_with(evBuffer) - @mock.patch('snowplow_tracker.Emitter.http_get') + @mock.patch("snowplow_tracker.Emitter.http_get") def test_send_events_get_custom_retry(self, mok_http_get: Any) -> None: mok_http_get.side_effect = mocked_http_response_failure mok_success = mock.Mock(return_value="success mocked") mok_failure = mock.Mock(return_value="failure mocked") - e = Emitter('0.0.0.0', method='get',batch_size=10, on_success=mok_success, on_failure=mok_failure, custom_retry_codes={400: True}) + e = Emitter( + "0.0.0.0", + method="get", + batch_size=10, + on_success=mok_success, + on_failure=mok_failure, + custom_retry_codes={400: True}, + ) evBuffer = [{"a": "aa"}, {"b": "bb"}, {"c": "cc"}] e.send_events(evBuffer) - + mok_http_get.side_effect = mocked_http_response_success time.sleep(5) mok_failure.assert_called_with(0, evBuffer) mok_success.assert_called_with(evBuffer) - diff --git a/snowplow_tracker/test/unit/test_in_memory_event_store.py b/snowplow_tracker/test/unit/test_in_memory_event_store.py index 90c9e887..93a0c8b2 100644 --- a/snowplow_tracker/test/unit/test_in_memory_event_store.py +++ b/snowplow_tracker/test/unit/test_in_memory_event_store.py @@ -1,7 +1,7 @@ # """ # test_in_memory_event_store.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import unittest diff --git a/snowplow_tracker/test/unit/test_payload.py b/snowplow_tracker/test/unit/test_payload.py index 6d89caa1..c174e8f4 100644 --- a/snowplow_tracker/test/unit/test_payload.py +++ b/snowplow_tracker/test/unit/test_payload.py @@ -1,7 +1,7 @@ # """ # test_payload.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import json @@ -55,7 +51,6 @@ def date_encoder(o: Any) -> str: class TestPayload(unittest.TestCase): - def setUp(self) -> None: pass @@ -64,7 +59,12 @@ def test_object_generation(self) -> None: self.assertDictEqual({}, p.nv_pairs) def test_object_generation_2(self) -> None: - p = payload.Payload({"test1": "result1", "test2": "result2", }) + p = payload.Payload( + { + "test1": "result1", + "test2": "result2", + } + ) output = {"test1": "result1", "test2": "result2"} self.assertDictEqual(output, p.nv_pairs) @@ -72,7 +72,10 @@ def test_add(self) -> None: p = payload.Payload() p.add("name1", "value1") p.add("name2", "value2") - output = {"name1": "value1", "name2": "value2", } + output = { + "name1": "value1", + "name2": "value2", + } self.assertDictEqual(output, p.nv_pairs) def test_add_empty_val(self) -> None: @@ -88,51 +91,58 @@ def test_add_none(self) -> None: self.assertDictEqual(output, p.nv_pairs) def test_add_dict(self) -> None: - p = payload.Payload({"n1": "v1", "n2": "v2", }) - p.add_dict({"name4": 4, "name3": 3}) # Order doesn't matter + p = payload.Payload( + { + "n1": "v1", + "n2": "v2", + } + ) + p.add_dict({"name4": 4, "name3": 3}) # Order doesn't matter output = {"n1": "v1", "n2": "v2", "name3": 3, "name4": 4} self.assertDictEqual(output, p.nv_pairs) def test_add_json_empty(self) -> None: - p = payload.Payload({'name': 'value'}) + p = payload.Payload({"name": "value"}) input = {} - p.add_json(input, False, 'ue_px', 'ue_pr') - output = {'name': 'value'} + p.add_json(input, False, "ue_px", "ue_pr") + output = {"name": "value"} self.assertDictEqual(output, p.nv_pairs) def test_add_json_none(self) -> None: - p = payload.Payload({'name': 'value'}) + p = payload.Payload({"name": "value"}) input = None - p.add_json(input, False, 'ue_px', 'ue_pr') - output = {'name': 'value'} + p.add_json(input, False, "ue_px", "ue_pr") + output = {"name": "value"} self.assertDictEqual(output, p.nv_pairs) def test_add_json_encode_false(self) -> None: p = payload.Payload() - input = {'a': 1} - p.add_json(input, False, 'ue_px', 'ue_pr') - self.assertTrue('ue_pr' in p.nv_pairs.keys()) - self.assertFalse('ue_px' in p.nv_pairs.keys()) + input = {"a": 1} + p.add_json(input, False, "ue_px", "ue_pr") + self.assertTrue("ue_pr" in p.nv_pairs.keys()) + self.assertFalse("ue_px" in p.nv_pairs.keys()) def test_add_json_encode_true(self) -> None: p = payload.Payload() - input = {'a': 1} - p.add_json(input, True, 'ue_px', 'ue_pr') - self.assertFalse('ue_pr' in p.nv_pairs.keys()) - self.assertTrue('ue_px' in p.nv_pairs.keys()) + input = {"a": 1} + p.add_json(input, True, "ue_px", "ue_pr") + self.assertFalse("ue_pr" in p.nv_pairs.keys()) + self.assertTrue("ue_px" in p.nv_pairs.keys()) def test_add_json_unicode_encode_false(self) -> None: p = payload.Payload() - input = {'a': u'\u0107', u'\u0107': 'b'} - p.add_json(input, False, 'ue_px', 'ue_pr') + input = {"a": "\u0107", "\u0107": "b"} + p.add_json(input, False, "ue_px", "ue_pr") ue_pr = json.loads(p.nv_pairs["ue_pr"]) self.assertDictEqual(input, ue_pr) def test_add_json_unicode_encode_true(self) -> None: p = payload.Payload() - input = {'a': '\u0107', '\u0107': 'b'} - p.add_json(input, True, 'ue_px', 'ue_pr') - ue_px = json.loads(base64.urlsafe_b64decode(p.nv_pairs["ue_px"]).decode('utf-8')) + input = {"a": "\u0107", "\u0107": "b"} + p.add_json(input, True, "ue_px", "ue_pr") + ue_px = json.loads( + base64.urlsafe_b64decode(p.nv_pairs["ue_px"]).decode("utf-8") + ) self.assertDictEqual(input, ue_px) def test_add_json_with_custom_enc(self) -> None: @@ -148,5 +158,5 @@ def test_add_json_with_custom_enc(self) -> None: self.assertTrue(is_subset({"key1": "2020-02-01"}, results)) def test_subject_get(self) -> None: - p = payload.Payload({'name1': 'val1'}) + p = payload.Payload({"name1": "val1"}) self.assertDictEqual(p.get(), p.nv_pairs) diff --git a/snowplow_tracker/test/unit/test_subject.py b/snowplow_tracker/test/unit/test_subject.py index 93e2b278..7645781a 100644 --- a/snowplow_tracker/test/unit/test_subject.py +++ b/snowplow_tracker/test/unit/test_subject.py @@ -1,7 +1,7 @@ # """ # test_subject.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import unittest @@ -26,7 +22,6 @@ class TestSubject(unittest.TestCase): - def setUp(self) -> None: pass @@ -61,18 +56,14 @@ def test_subject_0(self) -> None: "duid": "domain-user-id", "sid": "domain-session-id", "vid": 1, - "tnuid": "network-user-id" + "tnuid": "network-user-id", } self.assertDictEqual(s.standard_nv_pairs, exp) def test_subject_1(self) -> None: s = _subject.Subject().set_platform("srv").set_user_id("1234").set_lang("EN") - exp = { - "p": "srv", - "uid": "1234", - "lang": "EN" - } + exp = {"p": "srv", "uid": "1234", "lang": "EN"} self.assertDictEqual(s.standard_nv_pairs, exp) with pytest.raises(KeyError): diff --git a/snowplow_tracker/test/unit/test_tracker.py b/snowplow_tracker/test/unit/test_tracker.py index 91a2eff5..9db98395 100644 --- a/snowplow_tracker/test/unit/test_tracker.py +++ b/snowplow_tracker/test/unit/test_tracker.py @@ -1,7 +1,7 @@ # """ # test_tracker.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import re diff --git a/snowplow_tracker/tracker.py b/snowplow_tracker/tracker.py index 7791b103..4f9d9047 100644 --- a/snowplow_tracker/tracker.py +++ b/snowplow_tracker/tracker.py @@ -1,7 +1,7 @@ # """ # tracker.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ import time diff --git a/snowplow_tracker/tracker_configuration.py b/snowplow_tracker/tracker_configuration.py index 9c717721..af2a4b9d 100644 --- a/snowplow_tracker/tracker_configuration.py +++ b/snowplow_tracker/tracker_configuration.py @@ -1,7 +1,7 @@ # """ # tracker_configuration.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Jack Keene, Anuj More, Alex Dean, Fred Blundun, Paul Boocock -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ from typing import Optional diff --git a/snowplow_tracker/typing.py b/snowplow_tracker/typing.py index db27a671..52b68664 100644 --- a/snowplow_tracker/typing.py +++ b/snowplow_tracker/typing.py @@ -1,7 +1,7 @@ # """ # typing.py -# Copyright (c) 2013-2022 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2023 Snowplow Analytics Ltd. All rights reserved. # This program is licensed to you under the Apache License Version 2.0, # and you may not use this file except in compliance with the Apache License @@ -13,10 +13,6 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the Apache License Version 2.0 for the specific # language governing permissions and limitations there under. - -# Authors: Anuj More, Alex Dean, Fred Blundun, Paul Boocock, Matus Tomlein -# Copyright: Copyright (c) 2013-2022 Snowplow Analytics Ltd -# License: Apache License Version 2.0 # """ from typing import Dict, List, Callable, Any, Optional, Union, Tuple From 65539bd80f57a77335fc61890c70e0e7ed7a1c0d Mon Sep 17 00:00:00 2001 From: Jack-Keene Date: Mon, 23 Jan 2023 16:51:44 +0000 Subject: [PATCH 11/11] Prepare for release 0.13.0 --- CHANGES.txt | 11 +++++++++++ docs/source/conf.py | 2 +- setup.py | 2 +- snowplow_tracker/_version.py | 2 +- 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 06ba1ae9..d6e2f6be 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,14 @@ +Version 0.13.0 (2023-01-24) +--------------------------- +Adds Snowplow Interface (#295) +Adds retry for failed events (#296) +Adds customisable retry codes (#297) +Adds EventStore with max limit (#309) +Adds Snowplow Example App (#302) +Fix Collector URL with trailing '/' (#300) +Rename unstruct_event to self_describing_event (#298) +Upgrade `set-output` in cd (#294) + Version 0.12.0 (2022-11-03) --------------------------- Adds Domain Session ID and Domain Session Index to Subject class (#282) (Thanks to @cpnat) diff --git a/docs/source/conf.py b/docs/source/conf.py index f18cc0eb..9edb36f8 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -28,7 +28,7 @@ author = 'Alex Dean, Paul Boocock, Matus Tomlein, Jack Keene' # The full version, including alpha/beta/rc tags -release = '0.12' +release = "0.13" # -- General configuration --------------------------------------------------- diff --git a/setup.py b/setup.py index e12f7835..2ae76007 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ setup( name="snowplow-tracker", - version="0.12.0", + version="0.13.0", author=authors_str, author_email=authors_email_str, packages=[ diff --git a/snowplow_tracker/_version.py b/snowplow_tracker/_version.py index aa7e4852..aacd565c 100644 --- a/snowplow_tracker/_version.py +++ b/snowplow_tracker/_version.py @@ -15,6 +15,6 @@ # language governing permissions and limitations there under. # """ -__version_info__ = (0, 12, 0) +__version_info__ = (0, 13, 0) __version__ = ".".join(str(x) for x in __version_info__) __build_version__ = __version__ + ""