Skip to content

Commit

Permalink
feat: add snapshot Verification service
Browse files Browse the repository at this point in the history
Signed-off-by: samuelarogbonlo <[email protected]>
  • Loading branch information
samuelarogbonlo committed Oct 11, 2023
1 parent 1048635 commit 8f53375
Show file tree
Hide file tree
Showing 11 changed files with 504 additions and 0 deletions.
171 changes: 171 additions & 0 deletions terraform/modules/verify_snapshot/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
# This terraform script executes the following steps:
# - Zip the ruby and shell script files (the hash of this zip file is used to
# determine when to re-deploy the service)
# - Boot a new droplet
# - Copy over the zip file
# - Run the init.sh script in the background

terraform {
required_version = "~> 1.3"

required_providers {
digitalocean = {
source = "digitalocean/digitalocean"
version = "~> 2.0"
}
external = {
source = "hashicorp/external"
version = "~> 2.1"
}
local = {
source = "hashicorp/local"
version = "~> 2.1"
}

}
}

provider "digitalocean" {
token = var.digitalocean_token
}

// Ugly hack because 'archive_file' cannot mix files and folders.
data "external" "sources_tar" {
program = ["sh", "${path.module}/prep_sources.sh", path.module]
}


data "local_file" "sources" {
filename = data.external.sources_tar.result.path
}

// Note: The init.sh file is also included in the sources.zip such that the hash
// of the archive captures the entire state of the machine.
// This is a workaround, and because of this, we need to suppress the tflint warning here
// for unused declarations related to the 'init.sh' file.
// tflint-ignore: terraform_unused_declarations
data "local_file" "init" {
filename = "${path.module}/service/init.sh"
}

data "digitalocean_ssh_keys" "keys" {
sort {
key = "name"
direction = "asc"
}
}

# Set required environment variables
locals {
env_content = templatefile("${path.module}/service/forest-env.tpl", {
slack_token = var.slack_token,
slack_channel = var.slack_channel,
NEW_RELIC_ACCOUNT_ID = var.NEW_RELIC_ACCOUNT_ID,
NEW_RELIC_API_KEY = var.NEW_RELIC_API_KEY
NEW_RELIC_REGION = var.NEW_RELIC_REGION,
BASE_FOLDER = "/root",
forest_tag = var.forest_tag
})
}

locals {
init_commands = ["cd /root/",
"tar xf sources.tar",
# Set required environment variables
"echo '${local.env_content}' >> /root/.forest_env",
"echo '. ~/.forest_env' >> .bashrc",
". ~/.forest_env",
"nohup sh ./init.sh > init_log.txt &",
# Exiting without a sleep sometimes kills the script :-/
"sleep 60s"
]
}

resource "digitalocean_droplet" "forest" {
image = var.image
name = var.name
region = var.region
size = var.size
# Re-initialize resource if this hash changes:
user_data = join("-", [data.local_file.sources.content_sha256, sha256(join("", local.init_commands))])
tags = ["iac"]
ssh_keys = data.digitalocean_ssh_keys.keys.ssh_keys[*].fingerprint
monitoring = true

graceful_shutdown = false

connection {
host = self.ipv4_address
user = "root"
type = "ssh"
}

# Push the sources.tar file to the newly booted droplet
provisioner "file" {
source = data.local_file.sources.filename
destination = "/root/sources.tar"
}

provisioner "remote-exec" {
inline = local.init_commands
}
}


data "digitalocean_project" "forest_project" {
name = var.project
}

# Connect the droplet to the forest project (otherwise it ends up in
# "ChainBridge" which is the default project)
resource "digitalocean_project_resources" "connect_forest_project" {
project = data.digitalocean_project.forest_project.id
resources = [digitalocean_droplet.forest.urn]
}

resource "digitalocean_firewall" "forest-firewall" {
name = var.name

inbound_rule {
protocol = "tcp"
port_range = "22"
source_addresses = var.source_addresses
}

inbound_rule {
protocol = "tcp"
port_range = "2345"
source_addresses = var.source_addresses
}

inbound_rule {
protocol = "tcp"
port_range = "80"
source_addresses = var.source_addresses
}

inbound_rule {
protocol = "udp"
port_range = "53"
source_addresses = var.source_addresses
}

outbound_rule {
protocol = "tcp"
port_range = "all"
destination_addresses = var.destination_addresses
}

outbound_rule {
protocol = "udp"
port_range = "53"
destination_addresses = var.destination_addresses
}

droplet_ids = [digitalocean_droplet.forest.id]
}

# This ip address may be used in the future by monitoring software
output "ip" {
value = [digitalocean_droplet.forest.ipv4_address]
}
11 changes: 11 additions & 0 deletions terraform/modules/verify_snapshot/prep_sources.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash

# Copy local source files in a folder together with ruby_common and create a zip archive.

cd "$1" || exit
cp -Rp ../../../scripts/ruby_common service/ || exit

rm -f sources.tar
(cd service && gtar cf ../sources.tar --sort=name --mtime='UTC 2019-01-01' ./* > /dev/null 2>&1) || exit
rm -fr service/ruby_common
echo "{ \"path\": \"$1/sources.tar\" }"
6 changes: 6 additions & 0 deletions terraform/modules/verify_snapshot/service/forest-env.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
export FOREST_SLACK_API_TOKEN="${slack_token}"
export FOREST_SLACK_NOTIF_CHANNEL="${slack_channel}"
export NEW_RELIC_API_KEY="${NEW_RELIC_API_KEY}"
export NEW_RELIC_ACCOUNT_ID="${NEW_RELIC_ACCOUNT_ID}"
export NEW_RELIC_REGION="${NEW_RELIC_REGION}"
export FOREST_TAG="${forest_tag}"
24 changes: 24 additions & 0 deletions terraform/modules/verify_snapshot/service/init.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/bin/bash

set -eux

# Wait for cloud-init to finish initializing the machine
cloud-init status --wait

# Setting DEBIAN_FRONTEND to ensure non-interactive operations for APT
export DEBIAN_FRONTEND=noninteractive

# Use APT specific mechanism to wait for the lock
apt-get -qqq --yes update
apt-get -qqq --yes install -y ruby ruby-dev anacron awscli zstd

# Install the gems
gem install docker-api slack-ruby-client
gem install activesupport -v 7.0.8

apt-get update && apt-get install -y zstd

mkdir snapshot
chmod 777 snapshot

cp verify_snapshot_cron_job /etc/cron.hourly/
35 changes: 35 additions & 0 deletions terraform/modules/verify_snapshot/service/verify_snapshot.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# frozen_string_literal: true

require_relative 'ruby_common/slack_client'
require_relative 'ruby_common/docker_utils'
require_relative 'ruby_common/utils'

require 'date'
require 'logger'
require 'fileutils'
require 'active_support/time'

BASE_FOLDER = get_and_assert_env_variable 'BASE_FOLDER'
SLACK_TOKEN = get_and_assert_env_variable 'SLACK_API_TOKEN'
CHANNEL = get_and_assert_env_variable 'SLACK_NOTIF_CHANNEL'

# Current datetime, to append to the log files
DATE = Time.new.strftime '%FT%H:%M:%S'
LOG_EXPORT_SCRIPT_RUN = "mainnet_#{DATE}_script_run.txt"

client = SlackClient.new CHANNEL, SLACK_TOKEN


# conditionally add timestamps to logs without timestamps
add_timestamps_cmd = "awk '{ if ($0 !~ /^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\\.[0-9]{6}Z/) print strftime(\"[%Y-%m-%d %H:%M:%S]\"), $0; else print $0; fflush(); }'"

# Sync and export snapshot
snapshot_uploaded = system("bash -c 'timeout --signal=KILL 24h ./verify_snapshot.sh' | #{add_timestamps_cmd} > #{LOG_EXPORT_SCRIPT_RUN} 2>&1")

if snapshot_uploaded
puts "✅ Verification of Mainnet Snapshot Successful. 🌲🌳🌲🌳🌲"
else
client.post_message "⛔ Verification of Mainnet Snapshot failed. 🔥🌲🔥 "
# attach the log file and print the contents to STDOUT
client.attach_files(LOG_EXPORT_SCRIPT_RUN)
end
42 changes: 42 additions & 0 deletions terraform/modules/verify_snapshot/service/verify_snapshot.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/bin/bash

set -euo pipefail

# Function to send Slack alert
send_slack_alert() {
local message="$1"
curl -X POST -H 'Content-type: application/json' -H "Authorization: Bearer $SLACK_API_TOKEN" \
--data "{\"channel\":\"${SLACK_NOTIF_CHANNEL}\",\"text\":\"${message}\"}" \
https://slack.com/api/chat.postMessage
}

COMMANDS=$(cat << HEREDOC
set -eux
apt-get update && apt-get install -y zstd
cd snapshot/
forest-tool snapshot fetch --vendor filops --chain mainnet
forest-tool snapshot fetch --vendor forest --chain mainnet
zstd -d filops_*.car.zst
forest-tool archive export filops_*.car -o exported_snapshot.car.zst
HEREDOC
)

docker run \
--name compare-snapshot \
--rm \
--user root \
--volume=/root/snapshot:/home/forest/snapshot \
--entrypoint /bin/bash \
ghcr.io/chainsafe/forest:latest \
-c "$COMMANDS" || exit 1

cd /root/snapshot
zstd -d exported_snapshot.car.zst

if cmp --silent filops_*.car exported_snapshot.car; then
echo "Snapshots are identical."
else
echo "Snapshot not identical"
send_slack_alert "Checksum failed. 🔥🌲🔥. Snapshots do not match byte-for-byte with filops snapshot"
fi
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash

# shellcheck source=/dev/null
source ~/.forest_env
cd "$BASE_FOLDER" || exit
flock -n /tmp/mainnet.lock -c "ruby verify_snapshot.rb > verify_log.txt 2>&1" || exit
78 changes: 78 additions & 0 deletions terraform/modules/verify_snapshot/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
variable "digitalocean_token" {
description = "Token for authentication."
type = string
}

variable "name" {
description = "The name of Forest Droplet"
type = string
}

variable "size" {
description = "The size of the droplet instance to launch"
type = string
}

variable "slack_channel" {
description = "slack channel name for notifications"
type = string
}

variable "slack_token" {
description = "slack access token"
type = string
}

variable "forest_tag" {
description = "Image tag for the Forest container"
type = string
default = "latest"
}

variable "image" {
description = "The ID of the AMI to use for the Droplet"
type = string
default = "docker-20-04"
}

variable "region" {
description = "The region where resources will be created"
type = string
default = "fra1"
}

variable "project" {
description = "DigitalOcean project used as parent for the created droplet"
type = string
default = "Forest-DEV" # Alternative: "Default"
}

variable "source_addresses" {
description = "List of source addresses."
type = list(string)
default = ["0.0.0.0/0", "::/0"]
}

variable "destination_addresses" {
description = "List of destination addresses."
type = list(string)
default = ["0.0.0.0/0", "::/0"]
}

variable "NEW_RELIC_REGION" {
description = "The New Relic Platform Region"
type = string
default = "EU"
}

variable "NEW_RELIC_API_KEY" {
description = "New Relic API KEY"
default = ""
type = string
}

variable "NEW_RELIC_ACCOUNT_ID" {
description = "The New Relic Account ID"
default = ""
type = string
}
Loading

0 comments on commit 8f53375

Please sign in to comment.