Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix object backup #29

Merged
merged 3 commits into from
Sep 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions .github/workflows/black.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: Check Python code format

on:
push:
branches:
- '**'
tags:
- '*'
pull_request:
branches:
- '**'

jobs:
black:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v2

- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.x'

- name: Install dependencies
run: |
pip install black

- name: Check formatting with black
run: |
black --check .
46 changes: 27 additions & 19 deletions backups/grafana/grafana-backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,25 @@
print(f"failed to find config file '{CONFIG_FILE}'")
sys.exit()

with open(CONFIG_FILE, 'r') as f:
with open(CONFIG_FILE, "r") as f:
config = yaml.load(f, Loader=yaml.FullLoader)

print("Running grafana backup")

# File paths for Grafana configuration, database files, and plugins folder
GRAFANA_INI_FILE = '/etc/grafana/grafana.ini'
GRAFANA_DB_FILE = '/var/lib/grafana/grafana.db'
GRAFANA_PLUGINS_FOLDER = '/var/lib/grafana/plugins/'
GRAFANA_INI_FILE = "/etc/grafana/grafana.ini"
GRAFANA_DB_FILE = "/var/lib/grafana/grafana.db"
GRAFANA_PLUGINS_FOLDER = "/var/lib/grafana/plugins/"

# Temporary backup file names
TMP_BACKUP_INI_FILE = '/tmp/grafana_backup_ini.ini'
TMP_BACKUP_DB_FILE = '/tmp/grafana_backup_db.db'
TMP_BACKUP_PLUGINS_ZIP = '/tmp/grafana_backup_plugins.zip'
TMP_BACKUP_INI_FILE = "/tmp/grafana_backup_ini.ini"
TMP_BACKUP_DB_FILE = "/tmp/grafana_backup_db.db"
TMP_BACKUP_PLUGINS_ZIP = "/tmp/grafana_backup_plugins.zip"

# Backup path target file names
BACKUP_INI_FILE = 'grafana.ini'
BACKUP_DB_FILE = 'grafana.db'
BACKUP_PLUGINS_ZIP = 'grafana_plugins.zip'
BACKUP_INI_FILE = "grafana.ini"
BACKUP_DB_FILE = "grafana.db"
BACKUP_PLUGINS_ZIP = "grafana_plugins.zip"

# Copy files to temporary backup files
print("Copying files to temporary backup files")
Expand All @@ -42,20 +42,25 @@

# Zip plugins folder
print("Zipping plugins folder")
archive_path, _ = os.path.splitext(TMP_BACKUP_PLUGINS_ZIP) # Remove extension as make_archive will add it
shutil.make_archive(archive_path, 'zip', GRAFANA_PLUGINS_FOLDER)
archive_path, _ = os.path.splitext(
TMP_BACKUP_PLUGINS_ZIP
) # Remove extension as make_archive will add it
shutil.make_archive(archive_path, "zip", GRAFANA_PLUGINS_FOLDER)

# Initialize Backblaze B2 API
print("Initializing Backblaze B2 API")
info = InMemoryAccountInfo()
b2_api = B2Api(info)
b2_api.authorize_account("production", config["b2"]["app_key_id"], config["b2"]["app_key"])
b2_api.authorize_account(
"production", config["b2"]["app_key_id"], config["b2"]["app_key"]
)
bucket = b2_api.get_bucket_by_name(config["b2"]["bucket"])


# Upload file to Backblaze B2 and delete original file
def upload_to_b2(file_name, object_name):
try:
with open(file_name, 'rb') as f:
with open(file_name, "rb") as f:
data = f.read()
source = UploadSourceBytes(data)
bucket.upload(source, object_name)
Expand All @@ -65,7 +70,8 @@ def upload_to_b2(file_name, object_name):
except Exception as e:
print(f"File {file_name} could not be uploaded to Backblaze B2. Error: {e}")
return False



print("Uploading files to Backblaze B2")
success = True
success &= upload_to_b2(TMP_BACKUP_INI_FILE, BACKUP_INI_FILE)
Expand All @@ -78,16 +84,18 @@ def upload_to_b2(file_name, object_name):
print("Grafana backup failed")

print("Logging to influx")
json_body = [{
json_body = [
{
"measurement": "backup",
"tags": {
"host": HOST_NAME,
},
"fields": {
"success": 1.0 if success else 0.0,
}
}]
client = InfluxDBClient(**config['influx'])
},
}
]
client = InfluxDBClient(**config["influx"])
print(json_body)
client.write_points(json_body)

Expand Down
33 changes: 25 additions & 8 deletions backups/object-backup/object-backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,22 +74,32 @@ def get_archive_key(key):
return os.path.join(config["archive"]["prefix"], key)


def dont_backup_key(key):
if key.endswith("-thumb"):
return True


# It is very easy to configure it to upload to the wrong bucket, so this checks that at least 80
# out a random 100 recordings are already on the target bucket. Meaning it's probably the correct bucket.
## TODO: Make an API request to the server for getting a random sample of keys from the target bucket.
print(
"Check that some files already match as a way of checking that the correct buckets are being/prefix used."
)
keys = []
i = 0
for obj in local_bucket.objects.page_size(10000):
keys_sample_size = 10000
for obj in local_bucket.objects.page_size(1000):
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is reducing 10000 to 1000 intended?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah that is fine, just changes how many objects it gets in one query/page.

if dont_backup_key(obj.key):
continue
keys.append(obj.key)
i += 1
if i >= 10000:
if i >= keys_sample_size:
break

random_keys = []
for i in range(100):
random_keys.append(random.choice(keys))
random.shuffle(keys)

# Select the first 100 random keys after shuffling
random_keys = keys[:100]

matching = 0

Expand All @@ -106,9 +116,16 @@ def check_matching_key(key):
for key in random_keys:
executor.submit(check_matching_key, key)

if matching < 50:
minimum_matching = 60
if matching < minimum_matching:
print(
f"{matching} out of 100 objects are already on the target bucket. Canceling backup."
textwrap.dedent(
f"""
Only {matching} out of 100 objects are already on the target bucket.This is less than {minimum_matching}.
A minimum of {minimum_matching} is required. Canceling backup.
This can be cased by a bucket misconfiguration or not a high enough keys to sample from, current size ({keys_sample_size})
"""
)
)
time.sleep(2)
sys.exit(0)
Expand Down Expand Up @@ -138,7 +155,7 @@ def handle_file(obj):
global file_changed_count
global matching_count
try:
if obj.key.endswith("-thumb"):
if dont_backup_key(obj.key):
return
archive_key = os.path.join(config["archive"]["prefix"], obj.key)
archive_obj = archive_bucket.Object(archive_key)
Expand Down
21 changes: 13 additions & 8 deletions backups/object-recover/object-recover.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import os
from minio import Minio


def check_file_exists(minio_client, bucket_name, object_name):
try:
minio_client.stat_object(bucket_name, object_name)
Expand Down Expand Up @@ -50,11 +51,11 @@ def check_file_exists(minio_client, bucket_name, object_name):
object_keys = file_object_keys + raw_file_object_keys

minio_client = Minio(
minio["endpoint"],
access_key=minio["access_key"],
secret_key=minio["secret_key"],
secure=minio["http"],
)
minio["endpoint"],
access_key=minio["access_key"],
secret_key=minio["secret_key"],
secure=minio["http"],
)

print("Finding keys that are not in local object store")
transfers = []
Expand All @@ -69,24 +70,28 @@ def check_file_exists(minio_client, bucket_name, object_name):
completed_transfers = 0
lock = Lock()


def transfer_file(source, destination):
try:
subprocess.run(
["mc", "cp", "--quiet", source, destination],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=True)
check=True,
)
with lock:
global completed_transfers
completed_transfers += 1
print(f"{completed_transfers}/{len(transfers)} Transferred '{source}' to '{destination}'")
print(
f"{completed_transfers}/{len(transfers)} Transferred '{source}' to '{destination}'"
)
except subprocess.CalledProcessError as e:
print(f"Failed to transfer '{source}': {e}")


size = len(transfers)
print(f"Objects to recover: {size}")


with ThreadPoolExecutor(max_workers=20) as executor:
results = list(executor.map(lambda args: transfer_file(*args), transfers))

Loading
Loading