From 8c31b431d96cfb158390bd035c07d216db97a279 Mon Sep 17 00:00:00 2001 From: Rusty Davis Date: Mon, 26 Feb 2024 16:20:35 -0700 Subject: [PATCH] Add pre/post script support --- beeflow/common/parser/parser.py | 5 +- beeflow/common/worker/slurm_worker.py | 17 +++- .../bee_workflows/clamr-wf_script/README.md | 10 +++ .../bee_workflows/clamr-wf_script/clamr.cwl | 83 +++++++++++++++++++ .../clamr-wf_script/clamr_job.json | 13 +++ .../clamr-wf_script/clamr_job.yml | 19 +++++ .../clamr-wf_script/clamr_wf.cwl | 82 ++++++++++++++++++ .../bee_workflows/clamr-wf_script/ffmpeg.cwl | 49 +++++++++++ .../bee_workflows/clamr-wf_script/post_run.sh | 1 + .../bee_workflows/clamr-wf_script/pre_run.sh | 1 + 10 files changed, 278 insertions(+), 2 deletions(-) create mode 100644 beeflow/data/cwl/bee_workflows/clamr-wf_script/README.md create mode 100644 beeflow/data/cwl/bee_workflows/clamr-wf_script/clamr.cwl create mode 100644 beeflow/data/cwl/bee_workflows/clamr-wf_script/clamr_job.json create mode 100644 beeflow/data/cwl/bee_workflows/clamr-wf_script/clamr_job.yml create mode 100644 beeflow/data/cwl/bee_workflows/clamr-wf_script/clamr_wf.cwl create mode 100644 beeflow/data/cwl/bee_workflows/clamr-wf_script/ffmpeg.cwl create mode 100644 beeflow/data/cwl/bee_workflows/clamr-wf_script/post_run.sh create mode 100644 beeflow/data/cwl/bee_workflows/clamr-wf_script/pre_run.sh diff --git a/beeflow/common/parser/parser.py b/beeflow/common/parser/parser.py index c6e845f66..40bca23c6 100644 --- a/beeflow/common/parser/parser.py +++ b/beeflow/common/parser/parser.py @@ -24,7 +24,6 @@ Requirement, generate_workflow_id) - # Map CWL types to Python types type_map = { "string": str, @@ -317,6 +316,10 @@ def parse_requirements(self, requirements, as_hints=False): # Load in the dockerfile at parse time if 'dockerFile' in items: self._read_requirement_file('dockerFile', items) + if 'pre_script_path' in items and items['enabled']: + self._read_requirement_file('pre_script_path', items) + if 'post_script_path' in items and items['enabled']: + self._read_requirement_file('post_script_path', items) if 'beeflow:bindMounts' in items: self._read_requirement_file('beeflow:bindMounts', items) reqs.append(Hint(req['class'], items)) diff --git a/beeflow/common/worker/slurm_worker.py b/beeflow/common/worker/slurm_worker.py index de999fd72..2a824909f 100644 --- a/beeflow/common/worker/slurm_worker.py +++ b/beeflow/common/worker/slurm_worker.py @@ -9,6 +9,7 @@ import getpass import requests_unixsocket import requests +import io from beeflow.common import log as bee_logging from beeflow.common.worker.worker import (Worker, WorkerError) @@ -53,6 +54,12 @@ def build_text(self, task): 'partition', default=self.default_partition) + scripts_enabled = task.get_requirement('beeflow:ScriptRequirement', 'enabled') + # We use StringIO here to properly break the script up into lines with readlines + pre_script = io.StringIO(task.get_requirement('beeflow:ScriptRequirement', + 'pre_script_path')).readlines() + post_script = io.StringIO(task.get_requirement('beeflow:ScriptRequirement', + 'post_script_path')).readlines() # sbatch header script = [ '#!/bin/bash', @@ -83,18 +90,26 @@ def srun(script_lines, script_cmd): script_lines.append(f'srun {cmd_args}') # Pre commands + if scripts_enabled: + for cmd in pre_script: + script.append(cmd) + for cmd in crt_res.pre_commands: srun(script, cmd) # Main command srun_args = ' '.join(main_command_srun_args) - print(crt_res.main_command) args = ' '.join(crt_res.main_command.args) script.append(f'srun --mpi={mpi_version} {srun_args} {args}') + # Post commands for cmd in crt_res.post_commands: srun(script, cmd) + if scripts_enabled: + for cmd in post_script: + script.append(cmd) + return '\n'.join(script) def write_script(self, task): diff --git a/beeflow/data/cwl/bee_workflows/clamr-wf_script/README.md b/beeflow/data/cwl/bee_workflows/clamr-wf_script/README.md new file mode 100644 index 000000000..8cc2e9873 --- /dev/null +++ b/beeflow/data/cwl/bee_workflows/clamr-wf_script/README.md @@ -0,0 +1,10 @@ +# CLAMR - FFMPEG workflow using CWL + +clamr_wf.cwl - the main cwl. +calmr_job.yml - yaml file for values used by the cwl files. +clamr.cwl - cwl file for the clamr step. +ffmpeg.cwl - cwl file for the ffmpeg step. + +The values in these files run on fog a LANL cluster, using the container runtime Charliecloud. Fog uses slurm as the workload scheduler. + + diff --git a/beeflow/data/cwl/bee_workflows/clamr-wf_script/clamr.cwl b/beeflow/data/cwl/bee_workflows/clamr-wf_script/clamr.cwl new file mode 100644 index 000000000..8b8f40465 --- /dev/null +++ b/beeflow/data/cwl/bee_workflows/clamr-wf_script/clamr.cwl @@ -0,0 +1,83 @@ +# -*- mode: YAML; -*- + +class: CommandLineTool +cwlVersion: v1.0 + +baseCommand: /clamr/CLAMR-master/clamr_cpuonly +# This is the stdout field which makes all stdout be captured in this file +# stderr is not currently implemented but it is also a thing +stdout: clamr_stdout.txt +# Arguments to the command +inputs: + amr_type: + # ? means the argument is optional + # All of the ? here are legacy from the original CWL + type: string? + # Declare extra options + # We support prefix and position + inputBinding: + # Prefix is the flag for cli command + prefix: -A + grid_res: + type: int? + inputBinding: + prefix: -n + max_levels: + type: int? + inputBinding: + prefix: -l + time_steps: + type: int? + inputBinding: + prefix: -t + output_steps: + type: int? + inputBinding: + prefix: -i + graphic_steps: + type: int? + inputBinding: + prefix: -g + graphics_type: + type: string? + inputBinding: + prefix: -G + rollback_images: + type: int? + inputBinding: + prefix: -b + checkpoint_disk_interval: + type: int? + inputBinding: + prefix: -c + checkpoint_mem_interval: + type: int? + inputBinding: + prefix: -C + hash_method: + type: string? + inputBinding: + prefix: -e + +outputs: + # Captures stdout. Name is arbitrary. + clamr_stdout: + # type is syntactic sugar to just grab the output file defined above + # stdout: + # type: File + # outputBinding: + # glob: clamr_stdout.txt + # stdout is easy shorthand + type: stdout + outdir: + # directory is just another type. Scan the files for a directory with the name specified in glob + # If you add a wildcard, it'd do expansion + type: Directory + outputBinding: + # Glob can be either a constant string or have a wildcard + # TODO verify CWLs glob support + glob: ./graphics_output/graph%05d.png + time_log: + type: File + outputBinding: + glob: total_execution_time.log diff --git a/beeflow/data/cwl/bee_workflows/clamr-wf_script/clamr_job.json b/beeflow/data/cwl/bee_workflows/clamr-wf_script/clamr_job.json new file mode 100644 index 000000000..ad88972f6 --- /dev/null +++ b/beeflow/data/cwl/bee_workflows/clamr-wf_script/clamr_job.json @@ -0,0 +1,13 @@ +{ + "grid_resolution": 32, + "max_levels": 3, + "time_steps": 5000, + "steps_between_outputs": 10, + "steps_between_graphics": 25, + "graphics_type": "png", + "input_format": "image2", + "frame_rate": 12, + "frame_size": "800x800", + "pixel_format": "yuv420p", + "output_filename": "CLAMR_movie.mp4" +} diff --git a/beeflow/data/cwl/bee_workflows/clamr-wf_script/clamr_job.yml b/beeflow/data/cwl/bee_workflows/clamr-wf_script/clamr_job.yml new file mode 100644 index 000000000..8fee23c0d --- /dev/null +++ b/beeflow/data/cwl/bee_workflows/clamr-wf_script/clamr_job.yml @@ -0,0 +1,19 @@ +# Inputs for CLAMR +# /clamr/CLAMR-master/clamr_cpuonly -n 32 -l 3 -t 5000 -i 10 -g 25 -G png + +grid_resolution: 32 +max_levels: 3 +time_steps: 5000 +steps_between_outputs: 10 +steps_between_graphics: 25 +graphics_type: png + +# Inputs for FFMPEG +#ffmpeg -f image2 -r 12 -s 800x800 -pix_fmt yuv420p CLAMR_movie.mp4 + +input_format: image2 +frame_rate: 12 +frame_size: 800x800 +pixel_format: yuv420p +# output_filename: CLAMR_movie.mp4 +output_filename: ./CLAMR_movie.mp4 diff --git a/beeflow/data/cwl/bee_workflows/clamr-wf_script/clamr_wf.cwl b/beeflow/data/cwl/bee_workflows/clamr-wf_script/clamr_wf.cwl new file mode 100644 index 000000000..dc8ef966e --- /dev/null +++ b/beeflow/data/cwl/bee_workflows/clamr-wf_script/clamr_wf.cwl @@ -0,0 +1,82 @@ +# -*- mode: YAML; -*- + +class: Workflow +cwlVersion: v1.0 + +# Main 3 components of workflow are inputs, outputs, and steps + +inputs: +# All inputs go here for each step. No way to break them up. +# We should talk to the CWL people about that. +##### CLAMR inputs ##### +# takes ID:Type syntax + grid_resolution: int + max_levels: int + time_steps: int + steps_between_outputs: int + steps_between_graphics: int + graphics_type: string +##### FFMPEG inputs ##### + input_format: string + frame_rate: int + frame_size: string + pixel_format: string + output_filename: string + +outputs: +# Outputs for all the steps +# Check where we copy these probably at CWD +# Files need to exist at end of workflow, but we aren't currently checking +# We have function to get workflow_outputs +# TODO add step to workflow_manager to confirm that each of these outputs exist + clamr_stdout: + type: File + outputSource: clamr/clamr_stdout + clamr_time_log: + type: File + outputSource: clamr/time_log + clamr_movie: + type: File + outputSource: ffmpeg/movie + ffmpeg_stderr: + type: File + outputSource: ffmpeg/ffmpeg_stderr + +steps: + clamr: + run: clamr.cwl + in: + grid_res: grid_resolution + max_levels: max_levels + time_steps: time_steps + output_steps: steps_between_outputs + graphic_steps: steps_between_graphics + graphics_type: graphics_type + out: [clamr_stdout, outdir, time_log] + hints: + beeflow:ScriptRequirement: + enabled: true + pre_script_path: "pre_run.sh" + post_script_path: "post_run.sh" + DockerRequirement: + # TODO Sort this out + #dockerImport: clamr_img.tar.gz + #beeflow:copyContainer: clamr + beeflow:copyContainer: "/usr/projects/beedev/clamr/clamr-toss.tar.gz" + + ffmpeg: + run: ffmpeg.cwl + in: + input_format: input_format + # input syntax is name: /dependent_object + ffmpeg_input: clamr/outdir + frame_rate: frame_rate + frame_size: frame_size + pixel_format: pixel_format + # Setting output file with file_name + # output_filename set in wf inputs + output_file: output_filename + # Multiple outputs can be in array + out: [movie, ffmpeg_stderr] + requirements: + InlineJavascriptRequirement: {} diff --git a/beeflow/data/cwl/bee_workflows/clamr-wf_script/ffmpeg.cwl b/beeflow/data/cwl/bee_workflows/clamr-wf_script/ffmpeg.cwl new file mode 100644 index 000000000..bd9f27513 --- /dev/null +++ b/beeflow/data/cwl/bee_workflows/clamr-wf_script/ffmpeg.cwl @@ -0,0 +1,49 @@ +# -*- mode: YAML; -*- + +class: CommandLineTool +cwlVersion: v1.0 + +baseCommand: ffmpeg -y + +stderr: ffmpeg_stderr.txt + +inputs: + input_format: + type: string? + inputBinding: + prefix: -f + position: 1 + ffmpeg_input: + type: Directory + inputBinding: + prefix: -i + position: 2 + valueFrom: $("/graph%05d.png") + frame_rate: + type: int? + inputBinding: + prefix: -r + position: 3 + frame_size: + type: string? + inputBinding: + prefix: -s + position: 4 + pixel_format: + type: string? + inputBinding: + prefix: -pix_fmt + position: 5 + output_file: + type: string + inputBinding: + position: 6 + +outputs: + movie: + type: File + outputBinding: + glob: $(inputs.output_file) + # glob: CLAMR_movie.mp4 + ffmpeg_stderr: + type: stderr diff --git a/beeflow/data/cwl/bee_workflows/clamr-wf_script/post_run.sh b/beeflow/data/cwl/bee_workflows/clamr-wf_script/post_run.sh new file mode 100644 index 000000000..e4eb7f6c9 --- /dev/null +++ b/beeflow/data/cwl/bee_workflows/clamr-wf_script/post_run.sh @@ -0,0 +1 @@ +echo "After run" diff --git a/beeflow/data/cwl/bee_workflows/clamr-wf_script/pre_run.sh b/beeflow/data/cwl/bee_workflows/clamr-wf_script/pre_run.sh new file mode 100644 index 000000000..dd7e00efc --- /dev/null +++ b/beeflow/data/cwl/bee_workflows/clamr-wf_script/pre_run.sh @@ -0,0 +1 @@ +echo "Before run"