Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] refact!: start working on moving DRS to s3-only backend #65

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 28 additions & 9 deletions chord_drs/backend.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import boto3
from flask import current_app, g

from chord_drs.backends.base import Backend
from chord_drs.data_sources import DATA_SOURCE_BACKENDS
from urllib.parse import urlparse


__all__ = [
Expand All @@ -10,15 +9,35 @@
]


def _get_backend() -> Backend | None:
# Instantiate backend if needed
backend_class = DATA_SOURCE_BACKENDS.get(current_app.config["SERVICE_DATA_SOURCE"])
return backend_class() if backend_class else None
class Backend:
def __init__(self, resource=None):
self._client = resource or boto3.resource(
"s3",
endpoint_url=current_app.config["DRS_S3_API_URL"],
aws_access_key_id=current_app.config["DRS_S3_ACCESS_KEY"],
aws_secret_access_key=current_app.config["DRS_S3_SECRET_KEY"]
)

self.bucket = self._client.Bucket(current_app.config["DRS_S3_BUCKET"])

@staticmethod
def build_minio_location(obj):
host = urlparse(current_app.config["MINIO_URL"]).netloc
return f"s3://{host}/{obj.bucket_name}/{obj.key}"

def get_minio_object(self, location: str):
obj = self.bucket.Object(location.split("/")[-1])
return obj.get()

def save(self, current_location: str, filename: str) -> str:
with open(current_location, "rb") as f:
obj = self.bucket.put_object(Key=filename, Body=f)
return self.build_minio_location(obj)


def get_backend() -> Backend | None:
def get_backend() -> Backend:
if "backend" not in g:
g.backend = _get_backend()
g.backend = Backend()
return g.backend


Expand Down
Empty file removed chord_drs/backends/__init__.py
Empty file.
18 changes: 0 additions & 18 deletions chord_drs/backends/base.py

This file was deleted.

25 changes: 0 additions & 25 deletions chord_drs/backends/local.py

This file was deleted.

35 changes: 0 additions & 35 deletions chord_drs/backends/minio.py

This file was deleted.

44 changes: 17 additions & 27 deletions chord_drs/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from dotenv import load_dotenv

from .constants import SERVICE_NAME, SERVICE_TYPE
from .data_sources import DATA_SOURCE_LOCAL, DATA_SOURCE_MINIO
from .logger import logger


Expand Down Expand Up @@ -41,26 +40,21 @@ def _get_from_environ_or_fail(var: str) -> str:
AUTHZ_ENABLED = os.environ.get("AUTHZ_ENABLED", "true").strip().lower() in TRUTH_VALUES
AUTHZ_URL: str = _get_from_environ_or_fail("BENTO_AUTHZ_SERVICE_URL").strip().rstrip("/") if AUTHZ_ENABLED else ""

# MinIO-related, check if the credentials have been provided in a file
MINIO_URL = os.environ.get("MINIO_URL")
MINIO_ACCESS_KEY_FILE = os.environ.get("MINIO_ACCESS_KEY_FILE")
MINIO_SECRET_KEY_FILE = os.environ.get("MINIO_ACCESS_KEY_FILE")
# S3 backend-related, check if the credentials have been provided in a file
DRS_S3_API_URL = os.environ.get("DRS_S3_API_URL")

MINIO_USERNAME = os.environ.get("MINIO_USERNAME")
MINIO_PASSWORD = os.environ.get("MINIO_PASSWORD")
DRS_S3_ACCESS_KEY = os.environ.get("DRS_S3_ACCESS_KEY")
DRS_S3_SECRET_KEY = os.environ.get("DRS_S3_SECRET_KEY")

if MINIO_SECRET_KEY_FILE:
MINIO_ACCESS_KEY_PATH = Path(MINIO_ACCESS_KEY_FILE).resolve()
if DRS_S3_ACCESS_KEY_FILE := os.environ.get("DRS_S3_ACCESS_KEY_FILE"):
if (kp := Path(DRS_S3_ACCESS_KEY_FILE).resolve()).exists():
with open(kp, "r") as f:
DRS_S3_ACCESS_KEY = f.read().strip()

if MINIO_ACCESS_KEY_PATH.exists():
with open(MINIO_ACCESS_KEY_PATH, "r") as f:
MINIO_USERNAME = f.read().strip()

if MINIO_SECRET_KEY_FILE:
MINIO_SECRET_KEY_PATH = Path(MINIO_SECRET_KEY_FILE).resolve()
if MINIO_SECRET_KEY_PATH.exists():
with open(MINIO_SECRET_KEY_PATH, "r") as f:
MINIO_PASSWORD = f.read().strip()
if DRS_S3_SECRET_KEY_FILE := os.environ.get("DRS_S3_SECRET_KEY_FILE"):
if (kp := Path(DRS_S3_SECRET_KEY_FILE).resolve()).exists():
with open(kp, "r") as f:
DRS_S3_SECRET_KEY = f.read().strip()


class Config:
Expand All @@ -70,14 +64,12 @@ class Config:
PROMETHEUS_ENABLED: bool = os.environ.get("PROMETHEUS_ENABLED", "false").strip().lower() in TRUTH_VALUES

SERVICE_ID: str = os.environ.get("SERVICE_ID", ":".join(list(SERVICE_TYPE.values())[:2]))
SERVICE_DATA_SOURCE: str = DATA_SOURCE_MINIO if MINIO_URL else DATA_SOURCE_LOCAL
SERVICE_DATA: str | None = None if MINIO_URL else SERVICE_DATA
SERVICE_BASE_URL: str = os.environ.get("SERVICE_BASE_URL", "http://127.0.0.1").strip().rstrip("/")

MINIO_URL: str | None = MINIO_URL
MINIO_USERNAME: str | None = MINIO_USERNAME
MINIO_PASSWORD: str | None = MINIO_PASSWORD
MINIO_BUCKET: str | None = os.environ.get("MINIO_BUCKET") if MINIO_URL else None
DRS_S3_API_URL: str | None = DRS_S3_API_URL
DRS_S3_ACCESS_KEY: str | None = DRS_S3_ACCESS_KEY
DRS_S3_SECRET_KEY: str | None = DRS_S3_SECRET_KEY
DRS_S3_BUCKET: str | None = os.environ.get("DRS_S3_BUCKET")
BENTO_DEBUG = os.environ.get("BENTO_DEBUG", os.environ.get("FLASK_DEBUG", "false")).strip().lower() in TRUTH_VALUES

# CORS
Expand All @@ -89,6 +81,4 @@ class Config:


print(f"[{SERVICE_NAME}] Using: database URI {Config.SQLALCHEMY_DATABASE_URI}")
print(f"[{SERVICE_NAME}] data source {Config.SERVICE_DATA_SOURCE}")
print(f"[{SERVICE_NAME}] data path {Config.SERVICE_DATA}")
print(f"[{SERVICE_NAME}] minio URL {Config.MINIO_URL}", flush=True)
print(f"[{SERVICE_NAME}] s3 URL {Config.DRS_S3_API_URL}", flush=True)
21 changes: 0 additions & 21 deletions chord_drs/data_sources.py

This file was deleted.

10 changes: 1 addition & 9 deletions chord_drs/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from uuid import uuid4

from .backend import get_backend
from .backends.minio import MinioBackend
from .db import db
from .utils import drs_file_checksum

Expand Down Expand Up @@ -93,8 +92,6 @@ def __init__(self, *args, **kwargs):

backend = get_backend()

if not backend:
raise Exception("The backend for this instance is not properly configured.")
try:
self.location = backend.save(location, new_filename)
self.size = os.path.getsize(p)
Expand All @@ -115,9 +112,4 @@ def return_minio_object(self):
if parsed_url.scheme != "s3":
return None

backend = get_backend()

if not backend or not isinstance(backend, MinioBackend):
raise Exception("The backend for this instance is not properly configured.")

return backend.get_minio_object(self.location)
return get_backend().get_minio_object(self.location)
1 change: 0 additions & 1 deletion chord_drs/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
from . import __version__
from .authz import authz_middleware, PERMISSION_INGEST_DATA, PERMISSION_QUERY_DATA, PERMISSION_DOWNLOAD_DATA
from .constants import BENTO_SERVICE_KIND, SERVICE_NAME, SERVICE_TYPE
from .data_sources import DATA_SOURCE_LOCAL, DATA_SOURCE_MINIO
from .db import db
from .models import DrsBlob, DrsBundle
from .types import DRSAccessMethodDict, DRSContentsDict, DRSObjectDict
Expand Down