Skip to content

Commit

Permalink
Merge pull request #29 from TheCacophonyProject/fix-object-backup
Browse files Browse the repository at this point in the history
Fix object backup
  • Loading branch information
CameronRP authored Sep 9, 2024
2 parents 57b46e3 + 8208cb7 commit 9d271de
Show file tree
Hide file tree
Showing 10 changed files with 235 additions and 105 deletions.
32 changes: 32 additions & 0 deletions .github/workflows/black.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: Check Python code format

on:
push:
branches:
- '**'
tags:
- '*'
pull_request:
branches:
- '**'

jobs:
black:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v2

- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.x'

- name: Install dependencies
run: |
pip install black
- name: Check formatting with black
run: |
black --check .
46 changes: 27 additions & 19 deletions backups/grafana/grafana-backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,25 @@
print(f"failed to find config file '{CONFIG_FILE}'")
sys.exit()

with open(CONFIG_FILE, 'r') as f:
with open(CONFIG_FILE, "r") as f:
config = yaml.load(f, Loader=yaml.FullLoader)

print("Running grafana backup")

# File paths for Grafana configuration, database files, and plugins folder
GRAFANA_INI_FILE = '/etc/grafana/grafana.ini'
GRAFANA_DB_FILE = '/var/lib/grafana/grafana.db'
GRAFANA_PLUGINS_FOLDER = '/var/lib/grafana/plugins/'
GRAFANA_INI_FILE = "/etc/grafana/grafana.ini"
GRAFANA_DB_FILE = "/var/lib/grafana/grafana.db"
GRAFANA_PLUGINS_FOLDER = "/var/lib/grafana/plugins/"

# Temporary backup file names
TMP_BACKUP_INI_FILE = '/tmp/grafana_backup_ini.ini'
TMP_BACKUP_DB_FILE = '/tmp/grafana_backup_db.db'
TMP_BACKUP_PLUGINS_ZIP = '/tmp/grafana_backup_plugins.zip'
TMP_BACKUP_INI_FILE = "/tmp/grafana_backup_ini.ini"
TMP_BACKUP_DB_FILE = "/tmp/grafana_backup_db.db"
TMP_BACKUP_PLUGINS_ZIP = "/tmp/grafana_backup_plugins.zip"

# Backup path target file names
BACKUP_INI_FILE = 'grafana.ini'
BACKUP_DB_FILE = 'grafana.db'
BACKUP_PLUGINS_ZIP = 'grafana_plugins.zip'
BACKUP_INI_FILE = "grafana.ini"
BACKUP_DB_FILE = "grafana.db"
BACKUP_PLUGINS_ZIP = "grafana_plugins.zip"

# Copy files to temporary backup files
print("Copying files to temporary backup files")
Expand All @@ -42,20 +42,25 @@

# Zip plugins folder
print("Zipping plugins folder")
archive_path, _ = os.path.splitext(TMP_BACKUP_PLUGINS_ZIP) # Remove extension as make_archive will add it
shutil.make_archive(archive_path, 'zip', GRAFANA_PLUGINS_FOLDER)
archive_path, _ = os.path.splitext(
TMP_BACKUP_PLUGINS_ZIP
) # Remove extension as make_archive will add it
shutil.make_archive(archive_path, "zip", GRAFANA_PLUGINS_FOLDER)

# Initialize Backblaze B2 API
print("Initializing Backblaze B2 API")
info = InMemoryAccountInfo()
b2_api = B2Api(info)
b2_api.authorize_account("production", config["b2"]["app_key_id"], config["b2"]["app_key"])
b2_api.authorize_account(
"production", config["b2"]["app_key_id"], config["b2"]["app_key"]
)
bucket = b2_api.get_bucket_by_name(config["b2"]["bucket"])


# Upload file to Backblaze B2 and delete original file
def upload_to_b2(file_name, object_name):
try:
with open(file_name, 'rb') as f:
with open(file_name, "rb") as f:
data = f.read()
source = UploadSourceBytes(data)
bucket.upload(source, object_name)
Expand All @@ -65,7 +70,8 @@ def upload_to_b2(file_name, object_name):
except Exception as e:
print(f"File {file_name} could not be uploaded to Backblaze B2. Error: {e}")
return False



print("Uploading files to Backblaze B2")
success = True
success &= upload_to_b2(TMP_BACKUP_INI_FILE, BACKUP_INI_FILE)
Expand All @@ -78,16 +84,18 @@ def upload_to_b2(file_name, object_name):
print("Grafana backup failed")

print("Logging to influx")
json_body = [{
json_body = [
{
"measurement": "backup",
"tags": {
"host": HOST_NAME,
},
"fields": {
"success": 1.0 if success else 0.0,
}
}]
client = InfluxDBClient(**config['influx'])
},
}
]
client = InfluxDBClient(**config["influx"])
print(json_body)
client.write_points(json_body)

Expand Down
33 changes: 25 additions & 8 deletions backups/object-backup/object-backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,22 +74,32 @@ def get_archive_key(key):
return os.path.join(config["archive"]["prefix"], key)


def dont_backup_key(key):
if key.endswith("-thumb"):
return True


# It is very easy to configure it to upload to the wrong bucket, so this checks that at least 80
# out a random 100 recordings are already on the target bucket. Meaning it's probably the correct bucket.
## TODO: Make an API request to the server for getting a random sample of keys from the target bucket.
print(
"Check that some files already match as a way of checking that the correct buckets are being/prefix used."
)
keys = []
i = 0
for obj in local_bucket.objects.page_size(10000):
keys_sample_size = 10000
for obj in local_bucket.objects.page_size(1000):
if dont_backup_key(obj.key):
continue
keys.append(obj.key)
i += 1
if i >= 10000:
if i >= keys_sample_size:
break

random_keys = []
for i in range(100):
random_keys.append(random.choice(keys))
random.shuffle(keys)

# Select the first 100 random keys after shuffling
random_keys = keys[:100]

matching = 0

Expand All @@ -106,9 +116,16 @@ def check_matching_key(key):
for key in random_keys:
executor.submit(check_matching_key, key)

if matching < 50:
minimum_matching = 60
if matching < minimum_matching:
print(
f"{matching} out of 100 objects are already on the target bucket. Canceling backup."
textwrap.dedent(
f"""
Only {matching} out of 100 objects are already on the target bucket.This is less than {minimum_matching}.
A minimum of {minimum_matching} is required. Canceling backup.
This can be cased by a bucket misconfiguration or not a high enough keys to sample from, current size ({keys_sample_size})
"""
)
)
time.sleep(2)
sys.exit(0)
Expand Down Expand Up @@ -138,7 +155,7 @@ def handle_file(obj):
global file_changed_count
global matching_count
try:
if obj.key.endswith("-thumb"):
if dont_backup_key(obj.key):
return
archive_key = os.path.join(config["archive"]["prefix"], obj.key)
archive_obj = archive_bucket.Object(archive_key)
Expand Down
21 changes: 13 additions & 8 deletions backups/object-recover/object-recover.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import os
from minio import Minio


def check_file_exists(minio_client, bucket_name, object_name):
try:
minio_client.stat_object(bucket_name, object_name)
Expand Down Expand Up @@ -50,11 +51,11 @@ def check_file_exists(minio_client, bucket_name, object_name):
object_keys = file_object_keys + raw_file_object_keys

minio_client = Minio(
minio["endpoint"],
access_key=minio["access_key"],
secret_key=minio["secret_key"],
secure=minio["http"],
)
minio["endpoint"],
access_key=minio["access_key"],
secret_key=minio["secret_key"],
secure=minio["http"],
)

print("Finding keys that are not in local object store")
transfers = []
Expand All @@ -69,24 +70,28 @@ def check_file_exists(minio_client, bucket_name, object_name):
completed_transfers = 0
lock = Lock()


def transfer_file(source, destination):
try:
subprocess.run(
["mc", "cp", "--quiet", source, destination],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=True)
check=True,
)
with lock:
global completed_transfers
completed_transfers += 1
print(f"{completed_transfers}/{len(transfers)} Transferred '{source}' to '{destination}'")
print(
f"{completed_transfers}/{len(transfers)} Transferred '{source}' to '{destination}'"
)
except subprocess.CalledProcessError as e:
print(f"Failed to transfer '{source}': {e}")


size = len(transfers)
print(f"Objects to recover: {size}")


with ThreadPoolExecutor(max_workers=20) as executor:
results = list(executor.map(lambda args: transfer_file(*args), transfers))

Loading

0 comments on commit 9d271de

Please sign in to comment.