diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f512284..3cf65e46 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +- Added logging. + ## [1.0.0-alpha.1] - 2023-10-20 diff --git a/Makefile b/Makefile index cae6eb3d..48c611fd 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ dev: # Run this with the production gunicorn WSGI Server prod: - gunicorn -w 1 -b 0.0.0.0:80 --access-logfile gunicornlog.txt -k eventlet node_monitor.__main__:app + gunicorn -w 1 -b 0.0.0.0:80 --access-logfile=logs/gunicorn_access.log --error-logfile=logs/gunicorn_error.log -k eventlet node_monitor.__main__:app # We're forced to run this with only one worker because we're instantiating # node_monitor in a thread from inside the app. If we had multiple workers, # we would have multiple instances of node_monitor running, each sending diff --git a/README.md b/README.md index a894bcb1..0277b0c3 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,14 @@ $ make prod For more control over testing, see `tests/conftest.py`, and/or `Makefile`. +## Logging + +Node Monitor writes all logs to the `logs/` directory. +- `logs/gunicorn_access.log` contains all HTTP requests. +- `logs/gunicorn_error.log` contains all gunicorn errors and information. +- `logs/node_monitor.log` contains all `Node Monitor` specific logs. + + ## Notes diff --git a/node_monitor/__main__.py b/node_monitor/__main__.py index f74e4f0f..12cdb5b0 100644 --- a/node_monitor/__main__.py +++ b/node_monitor/__main__.py @@ -1,4 +1,5 @@ import threading +import logging from node_monitor.bot_email import EmailBot from node_monitor.bot_slack import SlackBot @@ -9,7 +10,16 @@ import node_monitor.load_config as c -## Initialize +## Calling `basicConfig` will globally overwrite the root logging +## configuration, so changes propagate to all loggers in the application. +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s: %(message)s', + datefmt='[%Y-%m-%d %H:%M:%S %z]', # Uses the same format as Gunicorn + filename='logs/node_monitor.log', +) + +## Initialize Node Monitor ## Objects are passed by reference, so we can pass around the NodeMonitor ## instance and work on the same data in different functions/threads email_bot = EmailBot(c.EMAIL_USERNAME, c.EMAIL_PASSWORD) @@ -25,10 +35,10 @@ ## can we call nm.mainloop as the target without creating a new fn? def start_node_monitor() -> None: nm.mainloop() -print("Starting NodeMonitor...", end=" ") +logging.info("Starting NodeMonitor...") thread = threading.Thread(target=start_node_monitor, daemon=True) thread.start() -print("Running.") +logging.info("NodeMonitor is running.") ## Run Flask server in main thread app = create_server(nm, thread.is_alive) diff --git a/node_monitor/load_config.py b/node_monitor/load_config.py index a56c3c95..1fdbadf8 100644 --- a/node_monitor/load_config.py +++ b/node_monitor/load_config.py @@ -7,16 +7,17 @@ ############################################## ## Secrets -EMAIL_USERNAME = os.environ.get('EMAIL_USERNAME', '') -EMAIL_PASSWORD = os.environ.get('EMAIL_PASSWORD', '') -TOKEN_DISCORD = os.environ.get('TOKEN_DISCORD', '') # Not implemented -TOKEN_SLACK = os.environ.get('TOKEN_SLACK', '') -TOKEN_TELEGRAM = os.environ.get('TOKEN_TELEGRAM', '') -DB_HOST = os.environ.get('DB_HOST', '') -DB_USERNAME = os.environ.get('DB_USERNAME', '') -DB_PASSWORD = os.environ.get('DB_PASSWORD', '') -DB_NAME = os.environ.get('DB_NAME', '') -DB_PORT = os.environ.get('DB_PORT', '') +EMAIL_USERNAME = os.environ.get('EMAIL_USERNAME', '') +EMAIL_PASSWORD = os.environ.get('EMAIL_PASSWORD', '') +TOKEN_DISCORD = os.environ.get('TOKEN_DISCORD', '') # Not implemented +TOKEN_SLACK = os.environ.get('TOKEN_SLACK', '') +TOKEN_TELEGRAM = os.environ.get('TOKEN_TELEGRAM', '') +DB_HOST = os.environ.get('DB_HOST', '') +DB_USERNAME = os.environ.get('DB_USERNAME', '') +DB_PASSWORD = os.environ.get('DB_PASSWORD', '') +DB_NAME = os.environ.get('DB_NAME', '') +DB_PORT = os.environ.get('DB_PORT', '') +FEEDBACK_FORM_URL = os.environ.get('FEEDBACK_FORM_URL', '') ## Pre-flight check diff --git a/node_monitor/node_monitor.py b/node_monitor/node_monitor.py index 24a3d9a8..94f6dc8b 100644 --- a/node_monitor/node_monitor.py +++ b/node_monitor/node_monitor.py @@ -3,6 +3,7 @@ from typing import Deque, List, Dict, Optional from toolz import groupby # type: ignore import schedule +import logging import node_monitor.ic_api as ic_api from node_monitor.bot_email import EmailBot @@ -74,6 +75,7 @@ def _resync(self, override_data: ic_api.Nodes | None = None) -> None: override_data: If provided, this arg will be used instead of live fetching Nodes from the ic-api. Useful for testing. """ + logging.info("Resyncing node states from ic-api...") data = override_data if override_data else ic_api.get_nodes() self.snapshots.append(data) self.last_update = time.time() @@ -111,15 +113,22 @@ def broadcast_alerts(self) -> None: # - - - - - - - - - - - - - - - - - if preferences['notify_email'] == True: recipients = email_recipients[node_provider_id] + logging.info(f"Sending alert email message to {recipients}...") self.email_bot.send_emails(recipients, subject, message) if preferences['notify_slack'] == True: if self.slack_bot is not None: channel_name = channels[node_provider_id]['slack_channel_name'] - self.slack_bot.send_message(channel_name, message) + logging.info(f"Sending alert slack message to {channel_name}...") + err1 = self.slack_bot.send_message(channel_name, message) + if err1 is not None: + logging.error(f"SlackBot.send_message() failed with error: {err1}") if preferences['notify_telegram_chat'] == True: if self.telegram_bot is not None: chat_id = channels[node_provider_id]['telegram_chat_id'] - self.telegram_bot.send_message(chat_id, message) + logging.info(f"Sending alert telegram message to {chat_id}...") + err2 = self.telegram_bot.send_message(chat_id, message) + if err2 is not None: + logging.error(f"TelegramBot.send_message() failed with error: {err2}") # - - - - - - - - - - - - - - - - - @@ -141,20 +150,28 @@ def broadcast_status_report(self) -> None: if k in subscribers.keys()} # - - - - - - - - - - - - - - - - - for node_provider_id, nodes in reportable_nodes.items(): + logging.info(f"Broadcasting status report {node_provider_id}...") preferences = subscribers[node_provider_id] subject, message = messages.nodes_status_message(nodes, node_labels) # - - - - - - - - - - - - - - - - - if preferences['notify_email'] == True: recipients = email_recipients[node_provider_id] + logging.info(f"Sending status report email to {recipients}...") self.email_bot.send_emails(recipients, subject, message) if preferences['notify_slack'] == True: if self.slack_bot is not None: channel_name = channels[node_provider_id]['slack_channel_name'] - self.slack_bot.send_message(channel_name, message) + logging.info(f"Sending status report slack message to {channel_name}...") + err1 = self.slack_bot.send_message(channel_name, message) + if err1 is not None: + logging.error(f"SlackBot.send_message() failed with error: {err1}") if preferences['notify_telegram_chat'] == True: if self.telegram_bot is not None: chat_id = channels[node_provider_id]['telegram_chat_id'] - self.telegram_bot.send_message(chat_id, message) + logging.info(f"Sending status report telegram message to {chat_id}...") + err2 = self.telegram_bot.send_message(chat_id, message) + if err2 is not None: + logging.error(f"TelegramBot.send_message() failed with error: {err2}") # - - - - - - - - - - - - - - - - - @@ -167,7 +184,7 @@ def step(self) -> None: self._analyze() self.broadcast_alerts() except Exception as e: - print(f"NodeMonitor.step() failed with error: {e}") + logging.error(f"NodeMonitor.step() failed with error: {e}") def mainloop(self) -> None: diff --git a/node_monitor/node_monitor_helpers/messages.py b/node_monitor/node_monitor_helpers/messages.py index dd55a68d..35dfe5c1 100644 --- a/node_monitor/node_monitor_helpers/messages.py +++ b/node_monitor/node_monitor_helpers/messages.py @@ -2,10 +2,17 @@ from typing import List, Dict, Tuple import node_monitor.ic_api as ic_api +import node_monitor.load_config as c # Forgive me Lord Guido, for I have broken PEP8. Principal = str +def datetime_iso8601() -> str: + """Returns the current time in ISO 8601 format, excluding milliseconds. + Example: 2021-05-01T00:00:00. + """ + return datetime.utcnow().isoformat(timespec='seconds') + def detailnode(node: ic_api.Node, label: str) -> str: """Returns: @@ -73,7 +80,7 @@ def _make_subject() -> str: f"{formatted_nodes_down}\n" f"\n" f"Node Monitor by Aviate Labs\n" - f"Report Generated: {datetime.utcnow().isoformat()} UTC\n" + f"Report Generated: {datetime_iso8601()} UTC\n" f"Help us serve you better! Provide your feedback!\n") return (subject, message) @@ -96,7 +103,7 @@ def _make_diagnostic_message() -> str: case 0: return "" case _: return (f"🛑 Node(s) Compromised:\n" f"\n" - f"{detailnodes(nodes_down, labels)}\n") + f"{detailnodes(nodes_down, labels)}\n\n") def _make_subject() -> str: datacenters = {node.dc_id.upper() for node in nodes_down} match len(nodes_down): @@ -109,10 +116,8 @@ def _render_frac(numerator: int, denominator: int) -> str: # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - subject = _make_subject() message = ( - f"{_make_diagnostic_message()}\n" - f"\n" + f"{_make_diagnostic_message()}" f"🔎 Node Status Breakdown:\n" - f"Total Nodes: { total_nodes }\n" f"Nodes Up: { _render_frac(len(nodes_up ), total_nodes) }\n" f"Nodes Down: { _render_frac(len(nodes_down), total_nodes) }\n" f"Nodes Unassigned: { _render_frac(len(nodes_unassigned), total_nodes) }\n" @@ -124,6 +129,6 @@ def _render_frac(numerator: int, denominator: int) -> str: f"\n" f"Thanks for reviewing today's report. We'll be back tomorrow!\n" f"Node Monitor by Aviate Labs.\n" - f"Report generated: {datetime.utcnow().isoformat()} UTC\n" + f"Report generated: {datetime_iso8601()} UTC\n" f"Help us serve you better! Provide your feedback!\n") return (subject, message) diff --git a/tests/conftest.py b/tests/conftest.py index 013c9b72..f88566a2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,6 +9,7 @@ ## --db is a custom flag to test CRUD operations on the database ## example: pytest -s --send_emails tests/test_bot_email.py ## example: pytest -s --send_slack tests/test_bot_slack.py +## example: pytest -s --send_telegram tests/test_bot_telegram.py ## example: pytest -s --db tests/test_node_provider_db.py diff --git a/tests/test_bot_email.py b/tests/test_bot_email.py index 3ac76491..1c484132 100644 --- a/tests/test_bot_email.py +++ b/tests/test_bot_email.py @@ -59,16 +59,21 @@ def test_send_emails_network(): ## Init the authenticated email bot instance email_bot = EmailBot(c.EMAIL_USERNAME, c.EMAIL_PASSWORD) - ## Set paramaters (uses an anonymous email inbox for testing) - ## Send out nodes_down_message and nodes_status_message. We test both. - ## We append time to the subject to act as an identifier for the test. + ## Set recipient (we use an anonymous email inbox for testing) recipients = ['nodemonitortest@mailnesia.com'] - subject1, message1 = messages.nodes_down_message([fakenode], fakelabel) - subject1 = str(f'{time.time()} - {subject1}') - email_bot.send_emails(recipients, subject1, message1) - subject2, message2 = messages.nodes_status_message([fakenode], fakelabel) + ## Create the messages. We use unittest.mock.patch to remove the private URL. + with patch.object(c, 'FEEDBACK_FORM_URL', 'https://url-has-been-redacted.ninja'): + subject1, message1 = messages.nodes_down_message([fakenode], fakelabel) + subject2, message2 = messages.nodes_status_message([fakenode], fakelabel) + + ## Append the time to the subject to act as an identifier for the test, + ## making it easy to do a regex search to validate the email. + subject1 = str(f'{time.time()} - {subject1}') subject2 = str(f'{time.time()} - {subject2}') + + ## Send both nodes_down_message and nodes_status_message as emails. + email_bot.send_emails(recipients, subject1, message1) email_bot.send_emails(recipients, subject2, message2) ## Automatically check the email inbox diff --git a/tests/test_bot_slack.py b/tests/test_bot_slack.py index 11529657..2ac64f73 100644 --- a/tests/test_bot_slack.py +++ b/tests/test_bot_slack.py @@ -1,8 +1,8 @@ import pytest from unittest.mock import patch -import node_monitor.load_config as c from node_monitor.bot_slack import SlackBot +import node_monitor.load_config as c @patch("slack_sdk.WebClient") @@ -24,13 +24,12 @@ def test_send_message(mock_web_client): def test_send_message_slack(): """Send a real test message to a Slack workspace""" slack_bot = SlackBot(c.TOKEN_SLACK) - slack_channel_name = "node-monitor" - message = "🔬 Hello from test_send_message_slack()" + message = "🔬 This is a test message from Node Monitor" - # SlackBot.send_message() returns an error without raising an exception - # to prevent NodeMonitor from crashing if the message fails to send. - # Instead, we raise it here. + ## SlackBot.send_message() normally returns an error without raising + ## an exception to prevent NodeMonitor from crashing if the message + ## fails to send. We make sure to raise it here to purposely fail the test. err = slack_bot.send_message(slack_channel_name, message) if err is not None: raise err diff --git a/tests/test_bot_telegram.py b/tests/test_bot_telegram.py index 96216af5..3f7aae6c 100644 --- a/tests/test_bot_telegram.py +++ b/tests/test_bot_telegram.py @@ -25,8 +25,9 @@ def test_send_message(mock_get): def test_send_live_message(): telegram_bot = TelegramBot(c.TOKEN_TELEGRAM) chat_id = "-1001925583150" - message = "Test message" + message = "🔬 This is a test message from Node Monitor" err = telegram_bot.send_message(chat_id, message) if err is not None: raise err +