From 0bf307cd49efe3f2868d24a83f3700d1e16937e9 Mon Sep 17 00:00:00 2001 From: tefirman Date: Fri, 20 Dec 2024 09:28:17 -0800 Subject: [PATCH] Adding more map variable tests to address Sita's PR comments --- mapTypeTest/README.md | 96 ++++++++++++++++++++++------ mapTypeTest/inputs.json | 20 ++++-- mapTypeTest/mapTypeTest.wdl | 122 ++++++++++++++++++++++++++++++++++-- 3 files changed, 210 insertions(+), 28 deletions(-) diff --git a/mapTypeTest/README.md b/mapTypeTest/README.md index f01f944..d03d494 100644 --- a/mapTypeTest/README.md +++ b/mapTypeTest/README.md @@ -1,24 +1,50 @@ # Map Types WDL Workflow ## Overview -A workflow that demonstrates the usage of map types in WDL. The workflow processes sample data including sample types (normal/tumor) and read lengths, showing how to work with map data types and array scattering in WDL. +A comprehensive workflow that demonstrates advanced usage of map types in WDL. The workflow processes sample data including sample types (normal/tumor), read lengths, and nested patient information, showcasing various map operations and edge cases in WDL. ## Workflow Components -### Workflow: `map_example` -The main workflow that processes sample information using maps for metadata and read lengths. +### Workflow: `enhanced_map_test` +The main workflow that demonstrates various map operations and edge cases. **Inputs:** - `samples`: Array[String] containing sample names - `sample_metadata`: Map[String, String] mapping samples to their types (normal/tumor) - `read_lengths`: Map[String, Int] mapping samples to their read lengths +- `empty_map`: Map[String, String] for testing empty map handling +- `nested_map`: Map[String, Map[String, String]] containing nested patient and sample information +- `patient_ids`: Array[String] containing patient identifiers for nested map processing **Process:** -- Scatters over the sample array -- For each sample, looks up corresponding metadata and read length from maps -- Processes each sample with the retrieved information +- Tests empty map processing +- Handles nested map structures for patient/sample relationships +- Scatters over sample array for basic map operations +- Generates result maps from processing outputs -### Task: `process_sample` +### Tasks: + +#### `process_empty_map` +Tests handling of empty maps. + +**Inputs:** +- `empty_map`: Map[String, String] + +**Outputs:** +- `success`: Boolean indicating if empty map was processed correctly + +#### `process_nested_map` +Processes nested map structures containing patient and sample information. + +**Inputs:** +- `patient_id`: String +- `patient_data`: Map[String, String] +- `samples_for_patient`: Array[String] + +**Outputs:** +- `message`: String containing processed nested map information + +#### `process_sample` Processes individual sample information. **Inputs:** @@ -29,47 +55,77 @@ Processes individual sample information. **Outputs:** - `message`: String containing processed sample information +#### `create_result_map` +Aggregates processing results into a map structure. + +**Inputs:** +- `sample_names`: Array[String] +- `processing_messages`: Array[String] + +**Outputs:** +- `output_map`: Map[String, String] containing sample processing results + **Runtime Requirements:** -- Docker: ubuntu:latest +All tasks use Docker containers: +- `process_sample`, `process_empty_map`, `process_nested_map`: ubuntu:latest +- `create_result_map`: python:3.8-slim ## Usage ```bash # Execute with cromwell -java -jar cromwell.jar run structAndMapTypes.wdl -i inputs.json +java -jar cromwell.jar run enhanced_map_test.wdl -i inputs.json # Execute with miniwdl -miniwdl run structAndMapTypes.wdl +miniwdl run enhanced_map_test.wdl -i inputs.json ``` Example inputs.json: ```json { - "map_example.samples": ["sample1", "sample2", "sample3"], - "map_example.sample_metadata": { + "enhanced_map_test.samples": ["sample1", "sample2", "sample3"], + "enhanced_map_test.sample_metadata": { "sample1": "normal", "sample2": "tumor", "sample3": "normal" }, - "map_example.read_lengths": { + "enhanced_map_test.read_lengths": { "sample1": 100, "sample2": 150, "sample3": 100 - } + }, + "enhanced_map_test.empty_map": {}, + "enhanced_map_test.nested_map": { + "patient1": { + "sample1": "normal", + "sample2": "tumor" + }, + "patient2": { + "sample3": "normal", + "sample4": "tumor" + } + }, + "enhanced_map_test.patient_ids": ["patient1", "patient2"] } ``` ## Purpose -This workflow serves as a test case for: -- Map type handling in WDL +This workflow serves as a comprehensive test case for: +- Basic map operations in WDL +- Empty map handling +- Nested map structures +- Map output generation - Array scattering with map lookups -- Basic sample data processing - Docker container usage +- Map to array conversion techniques ## Version WDL 1.0 ## Notes -- Demonstrates proper map type usage +- Demonstrates proper map type usage and common patterns - Shows how to access map values using array elements as keys -- Illustrates basic scatter operation over an array -- Tests input parsing for map types +- Illustrates handling of empty maps +- Examples of nested map processing +- Demonstrates map output generation +- Provides workarounds for WDL 1.0 limitations (e.g., no built-in keys() function) +- Shows proper error handling for map operations \ No newline at end of file diff --git a/mapTypeTest/inputs.json b/mapTypeTest/inputs.json index 496a293..eae3319 100644 --- a/mapTypeTest/inputs.json +++ b/mapTypeTest/inputs.json @@ -1,13 +1,25 @@ { - "map_example.samples": ["sample1", "sample2", "sample3"], - "map_example.sample_metadata": { + "enhanced_map_test.samples": ["sample1", "sample2", "sample3"], + "enhanced_map_test.sample_metadata": { "sample1": "normal", "sample2": "tumor", "sample3": "normal" }, - "map_example.read_lengths": { + "enhanced_map_test.read_lengths": { "sample1": 100, "sample2": 150, "sample3": 100 - } + }, + "enhanced_map_test.empty_map": {}, + "enhanced_map_test.nested_map": { + "patient1": { + "sample1": "normal", + "sample2": "tumor" + }, + "patient2": { + "sample3": "normal", + "sample4": "tumor" + } + }, + "enhanced_map_test.patient_ids": ["patient1", "patient2"] } \ No newline at end of file diff --git a/mapTypeTest/mapTypeTest.wdl b/mapTypeTest/mapTypeTest.wdl index 482222d..9b8c0bc 100644 --- a/mapTypeTest/mapTypeTest.wdl +++ b/mapTypeTest/mapTypeTest.wdl @@ -4,15 +4,47 @@ version 1.0 #### WORKFLOW DEFINITION -workflow map_example { +workflow enhanced_map_test { input { + # Original inputs Array[String] samples Map[String, String] sample_metadata Map[String, Int] read_lengths + + # New test inputs + Map[String, String] empty_map = {} + Map[String, Map[String, String]] nested_map = { + "patient1": { + "sample1": "normal", + "sample2": "tumor" + }, + "patient2": { + "sample3": "normal", + "sample4": "tumor" + } + } + # We need to provide keys as arrays since WDL 1.0 doesn't have a keys() function + Array[String] patient_ids = ["patient1", "patient2"] + } + + # Test processing empty map + call process_empty_map { + input: + empty_map = empty_map + } + + # Test nested map processing + scatter (patient_id in patient_ids) { + call process_nested_map { + input: + patient_id = patient_id, + patient_data = nested_map[patient_id], + # We need to provide the sample names explicitly + samples_for_patient = if patient_id == "patient1" then ["sample1", "sample2"] else ["sample3", "sample4"] + } } - # To iterate over a map in WDL 1.0, we need to provide the keys as an array, super annoying - # WDL 1.1+ has a "keys" function that you can use to loop through the keys of the map + # Original sample processing with output map generation scatter (sample in samples) { call process_sample { input: @@ -21,9 +53,65 @@ workflow map_example { read_length = read_lengths[sample] } } + + # Aggregate results into a map + call create_result_map { + input: + sample_names = samples, + processing_messages = process_sample.message + } + + output { + Map[String, String] result_map = create_result_map.output_map + Array[String] nested_map_results = process_nested_map.message + Boolean empty_map_processed = process_empty_map.success + } } -#### TASK DEFINITIONS +task process_empty_map { + input { + Map[String, String] empty_map + } + + command <<< + # Convert the map to a string and check if it's empty + MAP_STR="~{write_map(empty_map)}" + if [ "$MAP_STR" == "{}" ]; then + echo "true" + else + echo "false" + fi + >>> + + output { + Boolean success = read_boolean(stdout()) + } + + runtime { + docker: "ubuntu:latest" + } +} + +task process_nested_map { + input { + String patient_id + Map[String, String] patient_data + Array[String] samples_for_patient + } + + command <<< + echo "Processing patient ~{patient_id}" + ~{sep='\n' select_all(prefix('echo "Sample: ', samples_for_patient))} Type: ~{patient_data[samples_for_patient[0]]} + >>> + + output { + String message = read_string(stdout()) + } + + runtime { + docker: "ubuntu:latest" + } +} task process_sample { input { @@ -44,3 +132,29 @@ task process_sample { docker: "ubuntu:latest" } } + +task create_result_map { + input { + Array[String] sample_names + Array[String] processing_messages + } + + command <<< + python <>> + + output { + Map[String, String] output_map = read_map('output.txt') + } + + runtime { + docker: "python:3.8-slim" + } +} \ No newline at end of file