diff --git a/bento_wes/runs.py b/bento_wes/runs.py index d99280f..43180f4 100644 --- a/bento_wes/runs.py +++ b/bento_wes/runs.py @@ -1,3 +1,4 @@ +import itertools import json import os import sqlite3 @@ -5,6 +6,7 @@ import requests import shutil import traceback +import urllib.parse import uuid from bento_lib.auth.permissions import P_INGEST_DATA, P_VIEW_RUNS @@ -17,9 +19,9 @@ flask_not_found_error, flask_forbidden_error, ) -from flask import Blueprint, Response, current_app, jsonify, request +from flask import Blueprint, Request, Response, current_app, jsonify, request from pathlib import Path -from typing import Callable, Iterator +from typing import Any, Callable, Iterator from werkzeug.utils import secure_filename from . import states @@ -47,6 +49,8 @@ parse_workflow_host_allow_list, ) +MIME_OCTET_STREAM = "application/octet-stream" +CHUNK_SIZE = 1024 * 16 # Read 16 KB at a time bp_runs = Blueprint("runs", __name__) @@ -85,10 +89,19 @@ def _check_runs_permission(run_requests: list[RunRequest], permission: str) -> I )) -def _check_single_run_permission_and_mark(run_req: RunRequest, permission: str) -> bool: +def _post_headers_getter(r: Request) -> dict[str, str]: + token = r.form.get("token") + return {"Authorization": f"Bearer {token}"} if token else {} + + +def _check_single_run_permission_and_mark(run_req: RunRequest, permission: str, form_mode: bool = False) -> bool: # By calling this, the developer indicates that they will have handled permissions adequately: return authz_middleware.evaluate_one( - request, _get_resource_for_run_request(run_req), permission, mark_authz_done=True + request, + _get_resource_for_run_request(run_req), + permission, + headers_getter=_post_headers_getter if form_mode else None, + mark_authz_done=True, ) if authz_enabled() else True @@ -335,15 +348,20 @@ def run_list(): return jsonify(res_list) +def _run_none_response(run_id: uuid.UUID): + if authz_enabled(): + # Without the required permissions, don't even leak if this run exists - just return forbidden + authz_middleware.mark_authz_done(request) + return flask_forbidden_error("Forbidden") + return flask_not_found_error(f"Run {str(run_id)} not found") + + @bp_runs.route("/runs/", methods=["GET"]) def run_detail(run_id: uuid.UUID): run_details = get_run_with_details(get_db().cursor(), run_id, stream_content=False) if run_details is None: - if authz_enabled(): - return flask_forbidden_error("Forbidden") - else: - return flask_not_found_error(f"Run {run_id} not found") + return _run_none_response(run_id) if not _check_single_run_permission_and_mark(run_details.request, P_VIEW_RUNS): return flask_forbidden_error("Forbidden") @@ -351,6 +369,54 @@ def run_detail(run_id: uuid.UUID): return jsonify(run_details.model_dump(mode="json")) +def _denest_list(x: Any) -> list: + if isinstance(x, list): + return list(itertools.chain.from_iterable(map(_denest_list, x))) + return [x] + + +@bp_runs.route("/runs//download-artifact", methods=["POST"]) +def run_download_artifact(run_id: uuid.UUID): + run_details = get_run_with_details(get_db().cursor(), run_id, stream_content=False) + + if run_details is None: + return _run_none_response(run_id) + + if not _check_single_run_permission_and_mark(run_details.request, P_VIEW_RUNS, form_mode=True): + return flask_forbidden_error("Forbidden") + + artifact_path = request.form.get("path") + if not artifact_path: + return flask_bad_request_error("Requested artifact path is blank or unspecified") + + # Collect file artifacts + artifacts: set[str] = set() + for o in run_details.outputs.values(): + if "File" in o.type: + dn: set[str] = set(_denest_list(o.value)) + artifacts.update(dn) + + if artifact_path not in artifacts: + return flask_not_found_error(f"Requested artifact path not found in run {run_id}") + + p = Path(artifact_path) + + if not p.exists(): + return flask_internal_server_error(f"Artifact path does not exist on filesystem: {artifact_path}") + + def generate_bytes(): + with open(p, "rb") as fh: + while data := fh.read(CHUNK_SIZE): + yield data + if len(data) == 0: + break + + r = current_app.response_class(generate_bytes(), status=200, mimetype=MIME_OCTET_STREAM) + r.headers["Content-Length"] = p.stat().st_size + r.headers["Content-Disposition"] = f"attachment; filename*=UTF-8' '{urllib.parse.quote(p.name, encoding='utf-8')}" + return r + + def get_stream(c: sqlite3.Cursor, stream: RunStream, run_id: uuid.UUID): run = get_run_with_details(c, run_id, stream_content=True) return (current_app.response_class( @@ -377,12 +443,7 @@ def check_run_authz_then_return_response( run = get_run_with_details(c, run_id, stream_content=False) if run is None: - if authz_enabled(): - # Without the required permissions, don't even leak if this run exists - just return forbidden - authz_middleware.mark_authz_done(request) - return flask_forbidden_error("Forbidden") - else: - return flask_not_found_error(f"Run {run_id} not found") + return _run_none_response(run_id) if not _check_single_run_permission_and_mark(run.request, permission): return flask_forbidden_error("Forbidden")