Skip to content

Commit

Permalink
Add pre/post script support
Browse files Browse the repository at this point in the history
  • Loading branch information
rstyd committed Feb 26, 2024
1 parent d76b04f commit 8c31b43
Show file tree
Hide file tree
Showing 10 changed files with 278 additions and 2 deletions.
5 changes: 4 additions & 1 deletion beeflow/common/parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
Requirement,
generate_workflow_id)


# Map CWL types to Python types
type_map = {
"string": str,
Expand Down Expand Up @@ -317,6 +316,10 @@ def parse_requirements(self, requirements, as_hints=False):
# Load in the dockerfile at parse time
if 'dockerFile' in items:
self._read_requirement_file('dockerFile', items)
if 'pre_script_path' in items and items['enabled']:
self._read_requirement_file('pre_script_path', items)
if 'post_script_path' in items and items['enabled']:
self._read_requirement_file('post_script_path', items)
if 'beeflow:bindMounts' in items:
self._read_requirement_file('beeflow:bindMounts', items)
reqs.append(Hint(req['class'], items))
Expand Down
17 changes: 16 additions & 1 deletion beeflow/common/worker/slurm_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import getpass
import requests_unixsocket
import requests
import io

from beeflow.common import log as bee_logging
from beeflow.common.worker.worker import (Worker, WorkerError)
Expand Down Expand Up @@ -53,6 +54,12 @@ def build_text(self, task):
'partition',
default=self.default_partition)

scripts_enabled = task.get_requirement('beeflow:ScriptRequirement', 'enabled')
# We use StringIO here to properly break the script up into lines with readlines
pre_script = io.StringIO(task.get_requirement('beeflow:ScriptRequirement',
'pre_script_path')).readlines()
post_script = io.StringIO(task.get_requirement('beeflow:ScriptRequirement',
'post_script_path')).readlines()
# sbatch header
script = [
'#!/bin/bash',
Expand Down Expand Up @@ -83,18 +90,26 @@ def srun(script_lines, script_cmd):
script_lines.append(f'srun {cmd_args}')

# Pre commands
if scripts_enabled:
for cmd in pre_script:
script.append(cmd)

for cmd in crt_res.pre_commands:
srun(script, cmd)

# Main command
srun_args = ' '.join(main_command_srun_args)
print(crt_res.main_command)
args = ' '.join(crt_res.main_command.args)
script.append(f'srun --mpi={mpi_version} {srun_args} {args}')

# Post commands
for cmd in crt_res.post_commands:
srun(script, cmd)

if scripts_enabled:
for cmd in post_script:
script.append(cmd)

return '\n'.join(script)

def write_script(self, task):
Expand Down
10 changes: 10 additions & 0 deletions beeflow/data/cwl/bee_workflows/clamr-wf_script/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# CLAMR - FFMPEG workflow using CWL

clamr_wf.cwl - the main cwl.
calmr_job.yml - yaml file for values used by the cwl files.
clamr.cwl - cwl file for the clamr step.
ffmpeg.cwl - cwl file for the ffmpeg step.

The values in these files run on fog a LANL cluster, using the container runtime Charliecloud. Fog uses slurm as the workload scheduler.


83 changes: 83 additions & 0 deletions beeflow/data/cwl/bee_workflows/clamr-wf_script/clamr.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# -*- mode: YAML; -*-

class: CommandLineTool
cwlVersion: v1.0

baseCommand: /clamr/CLAMR-master/clamr_cpuonly
# This is the stdout field which makes all stdout be captured in this file
# stderr is not currently implemented but it is also a thing
stdout: clamr_stdout.txt
# Arguments to the command
inputs:
amr_type:
# ? means the argument is optional
# All of the ? here are legacy from the original CWL
type: string?
# Declare extra options
# We support prefix and position
inputBinding:
# Prefix is the flag for cli command
prefix: -A
grid_res:
type: int?
inputBinding:
prefix: -n
max_levels:
type: int?
inputBinding:
prefix: -l
time_steps:
type: int?
inputBinding:
prefix: -t
output_steps:
type: int?
inputBinding:
prefix: -i
graphic_steps:
type: int?
inputBinding:
prefix: -g
graphics_type:
type: string?
inputBinding:
prefix: -G
rollback_images:
type: int?
inputBinding:
prefix: -b
checkpoint_disk_interval:
type: int?
inputBinding:
prefix: -c
checkpoint_mem_interval:
type: int?
inputBinding:
prefix: -C
hash_method:
type: string?
inputBinding:
prefix: -e

outputs:
# Captures stdout. Name is arbitrary.
clamr_stdout:
# type is syntactic sugar to just grab the output file defined above
# stdout:
# type: File
# outputBinding:
# glob: clamr_stdout.txt
# stdout is easy shorthand
type: stdout
outdir:
# directory is just another type. Scan the files for a directory with the name specified in glob
# If you add a wildcard, it'd do expansion
type: Directory
outputBinding:
# Glob can be either a constant string or have a wildcard
# TODO verify CWLs glob support
glob: ./graphics_output/graph%05d.png
time_log:
type: File
outputBinding:
glob: total_execution_time.log
13 changes: 13 additions & 0 deletions beeflow/data/cwl/bee_workflows/clamr-wf_script/clamr_job.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"grid_resolution": 32,
"max_levels": 3,
"time_steps": 5000,
"steps_between_outputs": 10,
"steps_between_graphics": 25,
"graphics_type": "png",
"input_format": "image2",
"frame_rate": 12,
"frame_size": "800x800",
"pixel_format": "yuv420p",
"output_filename": "CLAMR_movie.mp4"
}
19 changes: 19 additions & 0 deletions beeflow/data/cwl/bee_workflows/clamr-wf_script/clamr_job.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Inputs for CLAMR
# /clamr/CLAMR-master/clamr_cpuonly -n 32 -l 3 -t 5000 -i 10 -g 25 -G png

grid_resolution: 32
max_levels: 3
time_steps: 5000
steps_between_outputs: 10
steps_between_graphics: 25
graphics_type: png

# Inputs for FFMPEG
#ffmpeg -f image2 -r 12 -s 800x800 -pix_fmt yuv420p CLAMR_movie.mp4

input_format: image2
frame_rate: 12
frame_size: 800x800
pixel_format: yuv420p
# output_filename: CLAMR_movie.mp4
output_filename: ./CLAMR_movie.mp4
82 changes: 82 additions & 0 deletions beeflow/data/cwl/bee_workflows/clamr-wf_script/clamr_wf.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# -*- mode: YAML; -*-

class: Workflow
cwlVersion: v1.0

# Main 3 components of workflow are inputs, outputs, and steps

inputs:
# All inputs go here for each step. No way to break them up.
# We should talk to the CWL people about that.
##### CLAMR inputs #####
# takes ID:Type syntax
grid_resolution: int
max_levels: int
time_steps: int
steps_between_outputs: int
steps_between_graphics: int
graphics_type: string
##### FFMPEG inputs #####
input_format: string
frame_rate: int
frame_size: string
pixel_format: string
output_filename: string

outputs:
# Outputs for all the steps
# Check where we copy these probably at CWD
# Files need to exist at end of workflow, but we aren't currently checking
# We have function to get workflow_outputs
# TODO add step to workflow_manager to confirm that each of these outputs exist
clamr_stdout:
type: File
outputSource: clamr/clamr_stdout
clamr_time_log:
type: File
outputSource: clamr/time_log
clamr_movie:
type: File
outputSource: ffmpeg/movie
ffmpeg_stderr:
type: File
outputSource: ffmpeg/ffmpeg_stderr

steps:
clamr:
run: clamr.cwl
in:
grid_res: grid_resolution
max_levels: max_levels
time_steps: time_steps
output_steps: steps_between_outputs
graphic_steps: steps_between_graphics
graphics_type: graphics_type
out: [clamr_stdout, outdir, time_log]
hints:
beeflow:ScriptRequirement:
enabled: true
pre_script_path: "pre_run.sh"
post_script_path: "post_run.sh"
DockerRequirement:
# TODO Sort this out
#dockerImport: clamr_img.tar.gz
#beeflow:copyContainer: clamr
beeflow:copyContainer: "/usr/projects/beedev/clamr/clamr-toss.tar.gz"

ffmpeg:
run: ffmpeg.cwl
in:
input_format: input_format
# input syntax is name: <step>/dependent_object
ffmpeg_input: clamr/outdir
frame_rate: frame_rate
frame_size: frame_size
pixel_format: pixel_format
# Setting output file with file_name
# output_filename set in wf inputs
output_file: output_filename
# Multiple outputs can be in array
out: [movie, ffmpeg_stderr]
requirements:
InlineJavascriptRequirement: {}
49 changes: 49 additions & 0 deletions beeflow/data/cwl/bee_workflows/clamr-wf_script/ffmpeg.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# -*- mode: YAML; -*-

class: CommandLineTool
cwlVersion: v1.0

baseCommand: ffmpeg -y

stderr: ffmpeg_stderr.txt

inputs:
input_format:
type: string?
inputBinding:
prefix: -f
position: 1
ffmpeg_input:
type: Directory
inputBinding:
prefix: -i
position: 2
valueFrom: $("/graph%05d.png")
frame_rate:
type: int?
inputBinding:
prefix: -r
position: 3
frame_size:
type: string?
inputBinding:
prefix: -s
position: 4
pixel_format:
type: string?
inputBinding:
prefix: -pix_fmt
position: 5
output_file:
type: string
inputBinding:
position: 6

outputs:
movie:
type: File
outputBinding:
glob: $(inputs.output_file)
# glob: CLAMR_movie.mp4
ffmpeg_stderr:
type: stderr
1 change: 1 addition & 0 deletions beeflow/data/cwl/bee_workflows/clamr-wf_script/post_run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
echo "After run"
1 change: 1 addition & 0 deletions beeflow/data/cwl/bee_workflows/clamr-wf_script/pre_run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
echo "Before run"

0 comments on commit 8c31b43

Please sign in to comment.