Skip to content

Commit

Permalink
Merge branch 'main' into release
Browse files Browse the repository at this point in the history
  • Loading branch information
mourginakis committed Oct 26, 2023
2 parents 2aa65f5 + dce7f9b commit 67a461a
Show file tree
Hide file tree
Showing 11 changed files with 88 additions and 39 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
<!-- ## [1.0.0-alpha.2] - Unreleased -->

- Added logging.


## [1.0.0-alpha.1] - 2023-10-20

Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ dev:

# Run this with the production gunicorn WSGI Server
prod:
gunicorn -w 1 -b 0.0.0.0:80 --access-logfile gunicornlog.txt -k eventlet node_monitor.__main__:app
gunicorn -w 1 -b 0.0.0.0:80 --access-logfile=logs/gunicorn_access.log --error-logfile=logs/gunicorn_error.log -k eventlet node_monitor.__main__:app
# We're forced to run this with only one worker because we're instantiating
# node_monitor in a thread from inside the app. If we had multiple workers,
# we would have multiple instances of node_monitor running, each sending
Expand Down
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@ $ make prod
For more control over testing, see `tests/conftest.py`, and/or `Makefile`.


## Logging

Node Monitor writes all logs to the `logs/` directory.
- `logs/gunicorn_access.log` contains all HTTP requests.
- `logs/gunicorn_error.log` contains all gunicorn errors and information.
- `logs/node_monitor.log` contains all `Node Monitor` specific logs.



## Notes

Expand Down
16 changes: 13 additions & 3 deletions node_monitor/__main__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import threading
import logging

from node_monitor.bot_email import EmailBot
from node_monitor.bot_slack import SlackBot
Expand All @@ -9,7 +10,16 @@
import node_monitor.load_config as c


## Initialize
## Calling `basicConfig` will globally overwrite the root logging
## configuration, so changes propagate to all loggers in the application.
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s: %(message)s',
datefmt='[%Y-%m-%d %H:%M:%S %z]', # Uses the same format as Gunicorn
filename='logs/node_monitor.log',
)

## Initialize Node Monitor
## Objects are passed by reference, so we can pass around the NodeMonitor
## instance and work on the same data in different functions/threads
email_bot = EmailBot(c.EMAIL_USERNAME, c.EMAIL_PASSWORD)
Expand All @@ -25,10 +35,10 @@
## can we call nm.mainloop as the target without creating a new fn?
def start_node_monitor() -> None:
nm.mainloop()
print("Starting NodeMonitor...", end=" ")
logging.info("Starting NodeMonitor...")
thread = threading.Thread(target=start_node_monitor, daemon=True)
thread.start()
print("Running.")
logging.info("NodeMonitor is running.")

## Run Flask server in main thread
app = create_server(nm, thread.is_alive)
Expand Down
21 changes: 11 additions & 10 deletions node_monitor/load_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,17 @@
##############################################
## Secrets

EMAIL_USERNAME = os.environ.get('EMAIL_USERNAME', '')
EMAIL_PASSWORD = os.environ.get('EMAIL_PASSWORD', '')
TOKEN_DISCORD = os.environ.get('TOKEN_DISCORD', '') # Not implemented
TOKEN_SLACK = os.environ.get('TOKEN_SLACK', '')
TOKEN_TELEGRAM = os.environ.get('TOKEN_TELEGRAM', '')
DB_HOST = os.environ.get('DB_HOST', '')
DB_USERNAME = os.environ.get('DB_USERNAME', '')
DB_PASSWORD = os.environ.get('DB_PASSWORD', '')
DB_NAME = os.environ.get('DB_NAME', '')
DB_PORT = os.environ.get('DB_PORT', '')
EMAIL_USERNAME = os.environ.get('EMAIL_USERNAME', '')
EMAIL_PASSWORD = os.environ.get('EMAIL_PASSWORD', '')
TOKEN_DISCORD = os.environ.get('TOKEN_DISCORD', '') # Not implemented
TOKEN_SLACK = os.environ.get('TOKEN_SLACK', '')
TOKEN_TELEGRAM = os.environ.get('TOKEN_TELEGRAM', '')
DB_HOST = os.environ.get('DB_HOST', '')
DB_USERNAME = os.environ.get('DB_USERNAME', '')
DB_PASSWORD = os.environ.get('DB_PASSWORD', '')
DB_NAME = os.environ.get('DB_NAME', '')
DB_PORT = os.environ.get('DB_PORT', '')
FEEDBACK_FORM_URL = os.environ.get('FEEDBACK_FORM_URL', '')


## Pre-flight check
Expand Down
27 changes: 22 additions & 5 deletions node_monitor/node_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import Deque, List, Dict, Optional
from toolz import groupby # type: ignore
import schedule
import logging

import node_monitor.ic_api as ic_api
from node_monitor.bot_email import EmailBot
Expand Down Expand Up @@ -74,6 +75,7 @@ def _resync(self, override_data: ic_api.Nodes | None = None) -> None:
override_data: If provided, this arg will be used instead of
live fetching Nodes from the ic-api. Useful for testing.
"""
logging.info("Resyncing node states from ic-api...")
data = override_data if override_data else ic_api.get_nodes()
self.snapshots.append(data)
self.last_update = time.time()
Expand Down Expand Up @@ -111,15 +113,22 @@ def broadcast_alerts(self) -> None:
# - - - - - - - - - - - - - - - - -
if preferences['notify_email'] == True:
recipients = email_recipients[node_provider_id]
logging.info(f"Sending alert email message to {recipients}...")
self.email_bot.send_emails(recipients, subject, message)
if preferences['notify_slack'] == True:
if self.slack_bot is not None:
channel_name = channels[node_provider_id]['slack_channel_name']
self.slack_bot.send_message(channel_name, message)
logging.info(f"Sending alert slack message to {channel_name}...")
err1 = self.slack_bot.send_message(channel_name, message)
if err1 is not None:
logging.error(f"SlackBot.send_message() failed with error: {err1}")
if preferences['notify_telegram_chat'] == True:
if self.telegram_bot is not None:
chat_id = channels[node_provider_id]['telegram_chat_id']
self.telegram_bot.send_message(chat_id, message)
logging.info(f"Sending alert telegram message to {chat_id}...")
err2 = self.telegram_bot.send_message(chat_id, message)
if err2 is not None:
logging.error(f"TelegramBot.send_message() failed with error: {err2}")
# - - - - - - - - - - - - - - - - -


Expand All @@ -141,20 +150,28 @@ def broadcast_status_report(self) -> None:
if k in subscribers.keys()}
# - - - - - - - - - - - - - - - - -
for node_provider_id, nodes in reportable_nodes.items():
logging.info(f"Broadcasting status report {node_provider_id}...")
preferences = subscribers[node_provider_id]
subject, message = messages.nodes_status_message(nodes, node_labels)
# - - - - - - - - - - - - - - - - -
if preferences['notify_email'] == True:
recipients = email_recipients[node_provider_id]
logging.info(f"Sending status report email to {recipients}...")
self.email_bot.send_emails(recipients, subject, message)
if preferences['notify_slack'] == True:
if self.slack_bot is not None:
channel_name = channels[node_provider_id]['slack_channel_name']
self.slack_bot.send_message(channel_name, message)
logging.info(f"Sending status report slack message to {channel_name}...")
err1 = self.slack_bot.send_message(channel_name, message)
if err1 is not None:
logging.error(f"SlackBot.send_message() failed with error: {err1}")
if preferences['notify_telegram_chat'] == True:
if self.telegram_bot is not None:
chat_id = channels[node_provider_id]['telegram_chat_id']
self.telegram_bot.send_message(chat_id, message)
logging.info(f"Sending status report telegram message to {chat_id}...")
err2 = self.telegram_bot.send_message(chat_id, message)
if err2 is not None:
logging.error(f"TelegramBot.send_message() failed with error: {err2}")
# - - - - - - - - - - - - - - - - -


Expand All @@ -167,7 +184,7 @@ def step(self) -> None:
self._analyze()
self.broadcast_alerts()
except Exception as e:
print(f"NodeMonitor.step() failed with error: {e}")
logging.error(f"NodeMonitor.step() failed with error: {e}")


def mainloop(self) -> None:
Expand Down
17 changes: 11 additions & 6 deletions node_monitor/node_monitor_helpers/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,17 @@
from typing import List, Dict, Tuple

import node_monitor.ic_api as ic_api
import node_monitor.load_config as c

# Forgive me Lord Guido, for I have broken PEP8.
Principal = str

def datetime_iso8601() -> str:
"""Returns the current time in ISO 8601 format, excluding milliseconds.
Example: 2021-05-01T00:00:00.
"""
return datetime.utcnow().isoformat(timespec='seconds')


def detailnode(node: ic_api.Node, label: str) -> str:
"""Returns:
Expand Down Expand Up @@ -73,7 +80,7 @@ def _make_subject() -> str:
f"{formatted_nodes_down}\n"
f"\n"
f"Node Monitor by Aviate Labs\n"
f"Report Generated: {datetime.utcnow().isoformat()} UTC\n"
f"Report Generated: {datetime_iso8601()} UTC\n"
f"Help us serve you better! Provide your feedback!\n")
return (subject, message)

Expand All @@ -96,7 +103,7 @@ def _make_diagnostic_message() -> str:
case 0: return ""
case _: return (f"🛑 Node(s) Compromised:\n"
f"\n"
f"{detailnodes(nodes_down, labels)}\n")
f"{detailnodes(nodes_down, labels)}\n\n")
def _make_subject() -> str:
datacenters = {node.dc_id.upper() for node in nodes_down}
match len(nodes_down):
Expand All @@ -109,10 +116,8 @@ def _render_frac(numerator: int, denominator: int) -> str:
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
subject = _make_subject()
message = (
f"{_make_diagnostic_message()}\n"
f"\n"
f"{_make_diagnostic_message()}"
f"🔎 Node Status Breakdown:\n"
f"Total Nodes: { total_nodes }\n"
f"Nodes Up: { _render_frac(len(nodes_up ), total_nodes) }\n"
f"Nodes Down: { _render_frac(len(nodes_down), total_nodes) }\n"
f"Nodes Unassigned: { _render_frac(len(nodes_unassigned), total_nodes) }\n"
Expand All @@ -124,6 +129,6 @@ def _render_frac(numerator: int, denominator: int) -> str:
f"\n"
f"Thanks for reviewing today's report. We'll be back tomorrow!\n"
f"Node Monitor by Aviate Labs.\n"
f"Report generated: {datetime.utcnow().isoformat()} UTC\n"
f"Report generated: {datetime_iso8601()} UTC\n"
f"Help us serve you better! Provide your feedback!\n")
return (subject, message)
1 change: 1 addition & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
## --db is a custom flag to test CRUD operations on the database
## example: pytest -s --send_emails tests/test_bot_email.py
## example: pytest -s --send_slack tests/test_bot_slack.py
## example: pytest -s --send_telegram tests/test_bot_telegram.py
## example: pytest -s --db tests/test_node_provider_db.py


Expand Down
19 changes: 12 additions & 7 deletions tests/test_bot_email.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,16 +59,21 @@ def test_send_emails_network():
## Init the authenticated email bot instance
email_bot = EmailBot(c.EMAIL_USERNAME, c.EMAIL_PASSWORD)

## Set paramaters (uses an anonymous email inbox for testing)
## Send out nodes_down_message and nodes_status_message. We test both.
## We append time to the subject to act as an identifier for the test.
## Set recipient (we use an anonymous email inbox for testing)
recipients = ['[email protected]']
subject1, message1 = messages.nodes_down_message([fakenode], fakelabel)
subject1 = str(f'{time.time()} - {subject1}')
email_bot.send_emails(recipients, subject1, message1)

subject2, message2 = messages.nodes_status_message([fakenode], fakelabel)
## Create the messages. We use unittest.mock.patch to remove the private URL.
with patch.object(c, 'FEEDBACK_FORM_URL', 'https://url-has-been-redacted.ninja'):
subject1, message1 = messages.nodes_down_message([fakenode], fakelabel)
subject2, message2 = messages.nodes_status_message([fakenode], fakelabel)

## Append the time to the subject to act as an identifier for the test,
## making it easy to do a regex search to validate the email.
subject1 = str(f'{time.time()} - {subject1}')
subject2 = str(f'{time.time()} - {subject2}')

## Send both nodes_down_message and nodes_status_message as emails.
email_bot.send_emails(recipients, subject1, message1)
email_bot.send_emails(recipients, subject2, message2)

## Automatically check the email inbox
Expand Down
11 changes: 5 additions & 6 deletions tests/test_bot_slack.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import pytest
from unittest.mock import patch

import node_monitor.load_config as c
from node_monitor.bot_slack import SlackBot
import node_monitor.load_config as c


@patch("slack_sdk.WebClient")
Expand All @@ -24,13 +24,12 @@ def test_send_message(mock_web_client):
def test_send_message_slack():
"""Send a real test message to a Slack workspace"""
slack_bot = SlackBot(c.TOKEN_SLACK)

slack_channel_name = "node-monitor"
message = "🔬 Hello from test_send_message_slack()"
message = "🔬 This is a test message from Node Monitor"

# SlackBot.send_message() returns an error without raising an exception
# to prevent NodeMonitor from crashing if the message fails to send.
# Instead, we raise it here.
## SlackBot.send_message() normally returns an error without raising
## an exception to prevent NodeMonitor from crashing if the message
## fails to send. We make sure to raise it here to purposely fail the test.
err = slack_bot.send_message(slack_channel_name, message)
if err is not None:
raise err
3 changes: 2 additions & 1 deletion tests/test_bot_telegram.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@ def test_send_message(mock_get):
def test_send_live_message():
telegram_bot = TelegramBot(c.TOKEN_TELEGRAM)
chat_id = "-1001925583150"
message = "Test message"
message = "🔬 This is a test message from Node Monitor"

err = telegram_bot.send_message(chat_id, message)
if err is not None:
raise err

0 comments on commit 67a461a

Please sign in to comment.