Skip to content

Commit

Permalink
feat: add download-artifact POST endpoint for run file outputs
Browse files Browse the repository at this point in the history
  • Loading branch information
davidlougheed committed Mar 11, 2024
1 parent 9a0983f commit ce8383f
Showing 1 changed file with 75 additions and 14 deletions.
89 changes: 75 additions & 14 deletions bento_wes/runs.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import itertools
import json
import os
import sqlite3
import pydantic
import requests
import shutil
import traceback
import urllib.parse
import uuid

from bento_lib.auth.permissions import P_INGEST_DATA, P_VIEW_RUNS
Expand All @@ -17,9 +19,9 @@
flask_not_found_error,
flask_forbidden_error,
)
from flask import Blueprint, Response, current_app, jsonify, request
from flask import Blueprint, Request, Response, current_app, jsonify, request
from pathlib import Path
from typing import Callable, Iterator
from typing import Any, Callable, Iterator
from werkzeug.utils import secure_filename

from . import states
Expand Down Expand Up @@ -47,6 +49,8 @@
parse_workflow_host_allow_list,
)

MIME_OCTET_STREAM = "application/octet-stream"
CHUNK_SIZE = 1024 * 16 # Read 16 KB at a time

bp_runs = Blueprint("runs", __name__)

Expand Down Expand Up @@ -85,10 +89,19 @@ def _check_runs_permission(run_requests: list[RunRequest], permission: str) -> I
))


def _check_single_run_permission_and_mark(run_req: RunRequest, permission: str) -> bool:
def _post_headers_getter(r: Request) -> dict[str, str]:
token = r.form.get("token")
return {"Authorization": f"Bearer {token}"} if token else {}


def _check_single_run_permission_and_mark(run_req: RunRequest, permission: str, form_mode: bool = False) -> bool:
# By calling this, the developer indicates that they will have handled permissions adequately:
return authz_middleware.evaluate_one(
request, _get_resource_for_run_request(run_req), permission, mark_authz_done=True
request,
_get_resource_for_run_request(run_req),
permission,
headers_getter=_post_headers_getter if form_mode else None,
mark_authz_done=True,
) if authz_enabled() else True


Expand Down Expand Up @@ -335,22 +348,75 @@ def run_list():
return jsonify(res_list)


def _run_none_response(run_id: uuid.UUID):
if authz_enabled():
# Without the required permissions, don't even leak if this run exists - just return forbidden
authz_middleware.mark_authz_done(request)
return flask_forbidden_error("Forbidden")
return flask_not_found_error(f"Run {str(run_id)} not found")


@bp_runs.route("/runs/<uuid:run_id>", methods=["GET"])
def run_detail(run_id: uuid.UUID):
run_details = get_run_with_details(get_db().cursor(), run_id, stream_content=False)

if run_details is None:
if authz_enabled():
return flask_forbidden_error("Forbidden")
else:
return flask_not_found_error(f"Run {run_id} not found")
return _run_none_response(run_id)

if not _check_single_run_permission_and_mark(run_details.request, P_VIEW_RUNS):
return flask_forbidden_error("Forbidden")

return jsonify(run_details.model_dump(mode="json"))


def _denest_list(x: Any) -> list:
if isinstance(x, list):
return list(itertools.chain.from_iterable(map(_denest_list, x)))
return [x]


@bp_runs.route("/runs/<uuid:run_id>/download-artifact", methods=["POST"])
def run_download_artifact(run_id: uuid.UUID):
run_details = get_run_with_details(get_db().cursor(), run_id, stream_content=False)

if run_details is None:
return _run_none_response(run_id)

if not _check_single_run_permission_and_mark(run_details.request, P_VIEW_RUNS, form_mode=True):
return flask_forbidden_error("Forbidden")

artifact_path = request.form.get("path")
if not artifact_path:
return flask_bad_request_error("Requested artifact path is blank or unspecified")

# Collect file artifacts
artifacts: set[str] = set()
for o in run_details.outputs.values():
if "File" in o.type:
dn: set[str] = set(_denest_list(o.value))
artifacts.update(dn)

if artifact_path not in artifacts:
return flask_not_found_error(f"Requested artifact path not found in run {run_id}")

p = Path(artifact_path)

if not p.exists():
return flask_internal_server_error(f"Artifact path does not exist on filesystem: {artifact_path}")

def generate_bytes():
with open(p, "rb") as fh:
while data := fh.read(CHUNK_SIZE):
yield data
if len(data) == 0:
break

r = current_app.response_class(generate_bytes(), status=200, mimetype=MIME_OCTET_STREAM)
r.headers["Content-Length"] = p.stat().st_size
r.headers["Content-Disposition"] = f"attachment; filename*=UTF-8' '{urllib.parse.quote(p.name, encoding='utf-8')}"
return r


def get_stream(c: sqlite3.Cursor, stream: RunStream, run_id: uuid.UUID):
run = get_run_with_details(c, run_id, stream_content=True)
return (current_app.response_class(
Expand All @@ -377,12 +443,7 @@ def check_run_authz_then_return_response(
run = get_run_with_details(c, run_id, stream_content=False)

if run is None:
if authz_enabled():
# Without the required permissions, don't even leak if this run exists - just return forbidden
authz_middleware.mark_authz_done(request)
return flask_forbidden_error("Forbidden")
else:
return flask_not_found_error(f"Run {run_id} not found")
return _run_none_response(run_id)

if not _check_single_run_permission_and_mark(run.request, permission):
return flask_forbidden_error("Forbidden")
Expand Down

0 comments on commit ce8383f

Please sign in to comment.