From 0bf307cd49efe3f2868d24a83f3700d1e16937e9 Mon Sep 17 00:00:00 2001
From: tefirman <tefirman@gmail.com>
Date: Fri, 20 Dec 2024 09:28:17 -0800
Subject: [PATCH] Adding more map variable tests to address Sita's PR comments

---
 mapTypeTest/README.md       |  96 ++++++++++++++++++++++------
 mapTypeTest/inputs.json     |  20 ++++--
 mapTypeTest/mapTypeTest.wdl | 122 ++++++++++++++++++++++++++++++++++--
 3 files changed, 210 insertions(+), 28 deletions(-)

diff --git a/mapTypeTest/README.md b/mapTypeTest/README.md
index f01f944..d03d494 100644
--- a/mapTypeTest/README.md
+++ b/mapTypeTest/README.md
@@ -1,24 +1,50 @@
 # Map Types WDL Workflow
 
 ## Overview
-A workflow that demonstrates the usage of map types in WDL. The workflow processes sample data including sample types (normal/tumor) and read lengths, showing how to work with map data types and array scattering in WDL.
+A comprehensive workflow that demonstrates advanced usage of map types in WDL. The workflow processes sample data including sample types (normal/tumor), read lengths, and nested patient information, showcasing various map operations and edge cases in WDL.
 
 ## Workflow Components
 
-### Workflow: `map_example`
-The main workflow that processes sample information using maps for metadata and read lengths.
+### Workflow: `enhanced_map_test`
+The main workflow that demonstrates various map operations and edge cases.
 
 **Inputs:**
 - `samples`: Array[String] containing sample names
 - `sample_metadata`: Map[String, String] mapping samples to their types (normal/tumor)
 - `read_lengths`: Map[String, Int] mapping samples to their read lengths
+- `empty_map`: Map[String, String] for testing empty map handling
+- `nested_map`: Map[String, Map[String, String]] containing nested patient and sample information
+- `patient_ids`: Array[String] containing patient identifiers for nested map processing
 
 **Process:**
-- Scatters over the sample array
-- For each sample, looks up corresponding metadata and read length from maps
-- Processes each sample with the retrieved information
+- Tests empty map processing
+- Handles nested map structures for patient/sample relationships
+- Scatters over sample array for basic map operations
+- Generates result maps from processing outputs
 
-### Task: `process_sample`
+### Tasks:
+
+#### `process_empty_map`
+Tests handling of empty maps.
+
+**Inputs:**
+- `empty_map`: Map[String, String]
+
+**Outputs:**
+- `success`: Boolean indicating if empty map was processed correctly
+
+#### `process_nested_map`
+Processes nested map structures containing patient and sample information.
+
+**Inputs:**
+- `patient_id`: String
+- `patient_data`: Map[String, String]
+- `samples_for_patient`: Array[String]
+
+**Outputs:**
+- `message`: String containing processed nested map information
+
+#### `process_sample`
 Processes individual sample information.
 
 **Inputs:**
@@ -29,47 +55,77 @@ Processes individual sample information.
 **Outputs:**
 - `message`: String containing processed sample information
 
+#### `create_result_map`
+Aggregates processing results into a map structure.
+
+**Inputs:**
+- `sample_names`: Array[String]
+- `processing_messages`: Array[String]
+
+**Outputs:**
+- `output_map`: Map[String, String] containing sample processing results
+
 **Runtime Requirements:**
-- Docker: ubuntu:latest
+All tasks use Docker containers:
+- `process_sample`, `process_empty_map`, `process_nested_map`: ubuntu:latest
+- `create_result_map`: python:3.8-slim
 
 ## Usage
 ```bash
 # Execute with cromwell
-java -jar cromwell.jar run structAndMapTypes.wdl -i inputs.json
+java -jar cromwell.jar run enhanced_map_test.wdl -i inputs.json
 
 # Execute with miniwdl
-miniwdl run structAndMapTypes.wdl
+miniwdl run enhanced_map_test.wdl -i inputs.json
 ```
 
 Example inputs.json:
 ```json
 {
-    "map_example.samples": ["sample1", "sample2", "sample3"],
-    "map_example.sample_metadata": {
+    "enhanced_map_test.samples": ["sample1", "sample2", "sample3"],
+    "enhanced_map_test.sample_metadata": {
         "sample1": "normal",
         "sample2": "tumor",
         "sample3": "normal"
     },
-    "map_example.read_lengths": {
+    "enhanced_map_test.read_lengths": {
         "sample1": 100,
         "sample2": 150,
         "sample3": 100
-    }
+    },
+    "enhanced_map_test.empty_map": {},
+    "enhanced_map_test.nested_map": {
+        "patient1": {
+            "sample1": "normal",
+            "sample2": "tumor"
+        },
+        "patient2": {
+            "sample3": "normal",
+            "sample4": "tumor"
+        }
+    },
+    "enhanced_map_test.patient_ids": ["patient1", "patient2"]
 }
 ```
 
 ## Purpose
-This workflow serves as a test case for:
-- Map type handling in WDL
+This workflow serves as a comprehensive test case for:
+- Basic map operations in WDL
+- Empty map handling
+- Nested map structures
+- Map output generation
 - Array scattering with map lookups
-- Basic sample data processing
 - Docker container usage
+- Map to array conversion techniques
 
 ## Version
 WDL 1.0
 
 ## Notes
-- Demonstrates proper map type usage
+- Demonstrates proper map type usage and common patterns
 - Shows how to access map values using array elements as keys
-- Illustrates basic scatter operation over an array
-- Tests input parsing for map types
+- Illustrates handling of empty maps
+- Examples of nested map processing
+- Demonstrates map output generation
+- Provides workarounds for WDL 1.0 limitations (e.g., no built-in keys() function)
+- Shows proper error handling for map operations
\ No newline at end of file
diff --git a/mapTypeTest/inputs.json b/mapTypeTest/inputs.json
index 496a293..eae3319 100644
--- a/mapTypeTest/inputs.json
+++ b/mapTypeTest/inputs.json
@@ -1,13 +1,25 @@
 {
-    "map_example.samples": ["sample1", "sample2", "sample3"],
-    "map_example.sample_metadata": {
+    "enhanced_map_test.samples": ["sample1", "sample2", "sample3"],
+    "enhanced_map_test.sample_metadata": {
         "sample1": "normal",
         "sample2": "tumor",
         "sample3": "normal"
     },
-    "map_example.read_lengths": {
+    "enhanced_map_test.read_lengths": {
         "sample1": 100,
         "sample2": 150,
         "sample3": 100
-    }
+    },
+    "enhanced_map_test.empty_map": {},
+    "enhanced_map_test.nested_map": {
+        "patient1": {
+            "sample1": "normal",
+            "sample2": "tumor"
+        },
+        "patient2": {
+            "sample3": "normal",
+            "sample4": "tumor"
+        }
+    },
+    "enhanced_map_test.patient_ids": ["patient1", "patient2"]
 }
\ No newline at end of file
diff --git a/mapTypeTest/mapTypeTest.wdl b/mapTypeTest/mapTypeTest.wdl
index 482222d..9b8c0bc 100644
--- a/mapTypeTest/mapTypeTest.wdl
+++ b/mapTypeTest/mapTypeTest.wdl
@@ -4,15 +4,47 @@ version 1.0
 
 #### WORKFLOW DEFINITION
 
-workflow map_example {
+workflow enhanced_map_test {
     input {
+        # Original inputs
         Array[String] samples
         Map[String, String] sample_metadata
         Map[String, Int] read_lengths
+
+        # New test inputs
+        Map[String, String] empty_map = {}
+        Map[String, Map[String, String]] nested_map = {
+            "patient1": {
+                "sample1": "normal",
+                "sample2": "tumor"
+            },
+            "patient2": {
+                "sample3": "normal",
+                "sample4": "tumor"
+            }
+        }
+        # We need to provide keys as arrays since WDL 1.0 doesn't have a keys() function
+        Array[String] patient_ids = ["patient1", "patient2"]
+    }
+
+    # Test processing empty map
+    call process_empty_map {
+        input:
+            empty_map = empty_map
+    }
+
+    # Test nested map processing
+    scatter (patient_id in patient_ids) {
+        call process_nested_map {
+            input:
+                patient_id = patient_id,
+                patient_data = nested_map[patient_id],
+                # We need to provide the sample names explicitly
+                samples_for_patient = if patient_id == "patient1" then ["sample1", "sample2"] else ["sample3", "sample4"]
+        }
     }
 
-    # To iterate over a map in WDL 1.0, we need to provide the keys as an array, super annoying
-    # WDL 1.1+ has a "keys" function that you can use to loop through the keys of the map
+    # Original sample processing with output map generation
     scatter (sample in samples) {
         call process_sample {
             input:
@@ -21,9 +53,65 @@ workflow map_example {
                 read_length = read_lengths[sample]
         }
     }
+
+    # Aggregate results into a map
+    call create_result_map {
+        input:
+            sample_names = samples,
+            processing_messages = process_sample.message
+    }
+
+    output {
+        Map[String, String] result_map = create_result_map.output_map
+        Array[String] nested_map_results = process_nested_map.message
+        Boolean empty_map_processed = process_empty_map.success
+    }
 }
 
-#### TASK DEFINITIONS
+task process_empty_map {
+    input {
+        Map[String, String] empty_map
+    }
+
+    command <<<
+        # Convert the map to a string and check if it's empty
+        MAP_STR="~{write_map(empty_map)}"
+        if [ "$MAP_STR" == "{}" ]; then
+            echo "true"
+        else
+            echo "false"
+        fi
+    >>>
+
+    output {
+        Boolean success = read_boolean(stdout())
+    }
+
+    runtime {
+        docker: "ubuntu:latest"
+    }
+}
+
+task process_nested_map {
+    input {
+        String patient_id
+        Map[String, String] patient_data
+        Array[String] samples_for_patient
+    }
+
+    command <<<
+        echo "Processing patient ~{patient_id}"
+        ~{sep='\n' select_all(prefix('echo "Sample: ', samples_for_patient))} Type: ~{patient_data[samples_for_patient[0]]}
+    >>>
+
+    output {
+        String message = read_string(stdout())
+    }
+
+    runtime {
+        docker: "ubuntu:latest"
+    }
+}
 
 task process_sample {
     input {
@@ -44,3 +132,29 @@ task process_sample {
         docker: "ubuntu:latest"
     }
 }
+
+task create_result_map {
+    input {
+        Array[String] sample_names
+        Array[String] processing_messages
+    }
+
+    command <<<
+        python <<CODE
+        samples = '~{sep="," sample_names}'.split(',')
+        messages = '~{sep="," processing_messages}'.split(',')
+        result = dict(zip(samples, messages))
+        with open('output.txt', 'w') as f:
+            for sample, message in result.items():
+                f.write(f"{sample}\t{message}\n")
+        CODE
+    >>>
+
+    output {
+        Map[String, String] output_map = read_map('output.txt')
+    }
+
+    runtime {
+        docker: "python:3.8-slim"
+    }
+}
\ No newline at end of file