From e4db1572c5bb8f87b6fc89280de0e99e2c6630f5 Mon Sep 17 00:00:00 2001
From: Shreya Shankar <ss.shankar505@gmail.com>
Date: Fri, 1 Nov 2024 18:00:53 -0700
Subject: [PATCH 1/4] feat: add optimizer in the UI

---
 README.md                                     |  14 +-
 docetl/builder.py                             |  34 +-
 docetl/console.py                             |  46 +-
 docetl/operations/base.py                     |   2 +
 .../map_optimizer/config_generators.py        |   5 +-
 docetl/optimizers/map_optimizer/evaluator.py  |  29 +-
 docetl/optimizers/map_optimizer/optimizer.py  |  12 +
 .../map_optimizer/plan_generators.py          |   3 -
 .../map_optimizer/prompt_generators.py        |  30 +-
 docetl/optimizers/reduce_optimizer.py         |  12 +-
 docetl/runner.py                              |  11 +-
 docetl/utils.py                               |  18 +-
 server/app/routes/pipeline.py                 |  71 +-
 website/package-lock.json                     |  27 +-
 website/package.json                          |   1 +
 website/src/app/api/utils.ts                  |   7 +
 website/src/app/localStorageKeys.ts           |   1 +
 website/src/app/types.ts                      |   3 +-
 website/src/components/AnsiRenderer.tsx       |  57 +-
 website/src/components/OperationCard.tsx      | 123 ++-
 website/src/components/Output.tsx             |  67 +-
 website/src/components/PipelineGui.tsx        | 701 ++++++++++--------
 website/src/components/operations/args.tsx    | 148 +++-
 .../src/components/operations/components.tsx  |  38 +-
 website/src/components/ui/progress.tsx        |  28 +
 website/src/contexts/PipelineContext.tsx      |  37 +
 26 files changed, 1047 insertions(+), 478 deletions(-)
 create mode 100644 website/src/components/ui/progress.tsx

diff --git a/README.md b/README.md
index ae3edd43..34dd3cbb 100644
--- a/README.md
+++ b/README.md
@@ -33,17 +33,25 @@ DocETL is the ideal choice when you're looking to maximize correctness and outpu
 
 ## Installation
 
-See the documentation for installing from PyPI.
+You can install DocETL using either PyPI or from source. We recommend installing from source for the latest features and bug fixes.
 
 ### Prerequisites
 
 Before installing DocETL, ensure you have Python 3.10 or later installed on your system. You can check your Python version by running:
 
+```bash
 python --version
+```
+
+### Install from PyPI
+
+```bash
+pip install docetl
+```
 
-### Installation Steps (from Source)
+### Install from Source
 
-1. Clone the DocETL repository:
+1. Clone the DocETL repository (or your fork):
 
 ```bash
 git clone https://github.com/ucbepic/docetl.git
diff --git a/docetl/builder.py b/docetl/builder.py
index 4d3c8d30..58eb3d00 100644
--- a/docetl/builder.py
+++ b/docetl/builder.py
@@ -83,7 +83,6 @@ class Optimizer:
     def __init__(
         self,
         runner: "DSLRunner",
-        max_threads: Optional[int] = None,
         model: str = "gpt-4o",
         resume: bool = False,
         timeout: int = 60,
@@ -980,6 +979,9 @@ def _get_sample_data(
                 return self._get_reduce_sample(
                     data, op_config.get("reduce_key"), sample_size
                 )
+            
+        if not self.config.get("optimizer_config", {}).get("random_sample", False):
+            return data[:sample_size]
 
         # Take the random 500 examples or all if less than 500
         initial_data = random.sample(data, min(500, len(data)))
@@ -1038,7 +1040,13 @@ def _get_reduce_sample(
             group_sample_size = int(sample_size * group_proportion)
 
             # Sample from the group
-            group_sample = random.sample(items, min(group_sample_size, len(items)))
+            if not self.config.get("optimizer_config", {}).get("random_sample", False):
+                group_sample = items[:group_sample_size]
+            else:
+                group_sample = random.sample(
+                    items, min(group_sample_size, len(items))
+                )
+
             sample.extend(group_sample)
 
         # If we haven't reached the desired sample size, add more items randomly
@@ -1051,22 +1059,10 @@ def _get_reduce_sample(
             ]
             additional_sample = random.sample(
                 remaining_items,
-                min(sample_size - len(sample), len(remaining_items)),
-            )
-            sample.extend(additional_sample)
-
-        # Add items randomly from non-top groups to meet the sample size
-        if len(sample) < sample_size:
-            remaining_items = [
-                item
-                for _, items in grouped_data.items()
-                for item in items
-                if item not in sample
-            ]
-            additional_sample = random.sample(
-                remaining_items,
-                min(sample_size - len(sample), len(remaining_items)),
-            )
+                min(
+                    sample_size - len(sample), len(remaining_items)
+                ),
+            ) if self.config.get("optimizer_config", {}).get("random_sample", False) else remaining_items[:sample_size - len(sample)]
             sample.extend(additional_sample)
 
         # Create a histogram of group sizes
@@ -1201,7 +1197,7 @@ def _optimize_equijoin(
             if map_operation["optimize"]:
                 dataset_to_transform_sample = random.sample(
                     dataset_to_transform, self.sample_size_map.get("map")
-                )
+                ) if self.config.get("optimizer_config", {}).get("random_sample", False) else dataset_to_transform[:self.sample_size_map.get("map")]
                 optimized_map_operations = self._optimize_map(
                     map_operation, dataset_to_transform_sample
                 )
diff --git a/docetl/console.py b/docetl/console.py
index 4a07f35d..da389f44 100644
--- a/docetl/console.py
+++ b/docetl/console.py
@@ -1,10 +1,11 @@
 import os
-from typing import Any, Optional
+import time
+from typing import Any, Optional, Tuple
 from rich.console import Console
 from io import StringIO
 import threading
 import queue
-
+from docetl.utils import StageType, get_stage_description
 
 class ThreadSafeConsole(Console):
     def __init__(self, *args, **kwargs):
@@ -13,6 +14,47 @@ def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.input_event = threading.Event()
         self.input_value = None
+        self.optimizer_statuses = []
+        self.optimizer_rationale = None
+
+    def status(
+        self,
+        status: "RenderableType",
+        *,
+        spinner: str = "dots",
+        spinner_style: "StyleType" = "status.spinner",
+        speed: float = 1.0,
+        refresh_per_second: float = 12.5,
+    ) -> "Status":
+        from rich.status import Status
+
+        status_renderable = Status(
+            status,
+            console=None,
+            spinner=spinner,
+            spinner_style=spinner_style,
+            speed=speed,
+            refresh_per_second=refresh_per_second,
+        )
+        return status_renderable
+    
+    def post_optimizer_rationale(self, should_optimize: bool, rationale: str, validator_prompt: str):
+        self.optimizer_rationale = (should_optimize, rationale, validator_prompt)
+
+    def post_optimizer_status(self, stage: StageType):
+        self.optimizer_statuses.append((stage, time.time()))
+
+    def get_optimizer_progress(self) -> Tuple[str, float]:
+        if len(self.optimizer_statuses) == 0:
+            return ("Optimization starting...", 0)
+        
+        if len(self.optimizer_statuses) > 0 and self.optimizer_statuses[-1][0] == StageType.END:
+            return (get_stage_description(StageType.END), 1)
+
+        num_stages = len(StageType) - 1
+        num_completed = len([s for s in self.optimizer_statuses if s[1]]) - 1
+        current_stage = self.optimizer_statuses[-1][0]
+        return (get_stage_description(current_stage), num_completed / num_stages)
 
     def print(self, *args, **kwargs):
         super().print(*args, **kwargs)
diff --git a/docetl/operations/base.py b/docetl/operations/base.py
index 88377077..4f85b402 100644
--- a/docetl/operations/base.py
+++ b/docetl/operations/base.py
@@ -38,6 +38,7 @@ def __init__(
         max_threads: int,
         console: Optional[Console] = None,
         status: Optional[Status] = None,
+        is_build: bool = False,
         **kwargs,
     ):
         """
@@ -62,6 +63,7 @@ def __init__(
         self.num_retries_on_validate_failure = self.config.get(
             "num_retries_on_validate_failure", 0
         )
+        self.is_build = is_build
         self.syntax_check()
 
     # This must be overridden in a subclass
diff --git a/docetl/optimizers/map_optimizer/config_generators.py b/docetl/optimizers/map_optimizer/config_generators.py
index f9188fa0..5d7008e0 100644
--- a/docetl/optimizers/map_optimizer/config_generators.py
+++ b/docetl/optimizers/map_optimizer/config_generators.py
@@ -252,9 +252,8 @@ def _check_metadata_necessity(
         Determine if metadata is needed to perform the subtask.
 
         Consider:
-        1. Does the subtask require information that might be present in metadata?
-        2. Is the sample chunk or full input missing any crucial information that could be in metadata?
-        3. Would having metadata significantly improve the performance or accuracy of the subtask?
+        1. Does the input sample have any structural metadata that might be relevant to the subtask?
+        2. Is the sample chunk or full input missing any crucial information that could be in this metadata?
 
         Provide your response in the following format:
         """
diff --git a/docetl/optimizers/map_optimizer/evaluator.py b/docetl/optimizers/map_optimizer/evaluator.py
index fc963676..fb22825d 100644
--- a/docetl/optimizers/map_optimizer/evaluator.py
+++ b/docetl/optimizers/map_optimizer/evaluator.py
@@ -267,7 +267,7 @@ def _assess_operation(
         # Extract input variables from the prompt
         variables_in_prompt = extract_jinja_variables(op_config["prompt"])
         variables_in_prompt = [v.replace("input.", "") for v in variables_in_prompt]
-        input_sample = input_data[:2]
+        input_sample = input_data[:3]
         output_sample = [
             next(
                 (
@@ -291,7 +291,7 @@ def _assess_operation(
         )
         available_tokens = (
             model_input_context_length - prompt_tokens - 100
-        ) // 4  # 100 token buffer, divide by 4 for each sample
+        ) // 6  # 100 token buffer, divide by 6 for each sample
 
         # Prepare and truncate sample data
         input_1 = truncate_sample_data(
@@ -336,22 +336,43 @@ def _assess_operation(
         {json.dumps({"input": input_2, "output": output_2}, indent=2)}
         """
 
+        if len(input_sample) > 2:
+            input_3 = truncate_sample_data(
+                {key: input_sample[2].get(key, "N/A") for key in variables_in_prompt},
+                available_tokens,
+                [variables_in_prompt],
+                self.llm_client.model,
+            )
+            output_3 = truncate_sample_data(
+                {key: output_sample[2].get(key, "N/A") for key in output_schema.keys()},
+                available_tokens,
+                [list(output_schema.keys())],
+                self.llm_client.model,
+            )
+            prompt += f"""
+        ---Pair 3---
+        {json.dumps({"input": input_3, "output": output_3}, indent=2)}
+        """
+
         prompt += f"""
         Custom Validator Prompt:
         {validator_prompt}
 
-        Based on the above information, please assess the operation's performance. Provide your assessment in the following format:
+        Based on the above information, please assess the operation's performance. 
+        If it needs improvement, provide specific examples in your assessment.
+        Be very detailed in your reasons for improvements, if any.
+        Provide your assessment in the following format:
         """
 
         parameters = {
             "type": "object",
             "properties": {
-                "needs_improvement": {"type": "boolean"},
                 "reasons": {"type": "array", "items": {"type": "string"}},
                 "improvements": {
                     "type": "array",
                     "items": {"type": "string"},
                 },
+                "needs_improvement": {"type": "boolean"},
             },
             "required": ["needs_improvement", "reasons", "improvements"],
         }
diff --git a/docetl/optimizers/map_optimizer/optimizer.py b/docetl/optimizers/map_optimizer/optimizer.py
index 034f74ac..d27872e9 100644
--- a/docetl/optimizers/map_optimizer/optimizer.py
+++ b/docetl/optimizers/map_optimizer/optimizer.py
@@ -133,6 +133,7 @@ def optimize(
             The cost is the cost of the optimizer (from possibly synthesizing resolves).
 
         """
+        self.console.post_optimizer_status(StageType.SAMPLE_RUN)
         input_data = copy.deepcopy(input_data)
         # Add id to each input_data
         for i in range(len(input_data)):
@@ -184,7 +185,9 @@ def optimize(
             },
         )
 
+
         # Generate custom validator prompt
+        self.console.post_optimizer_status(StageType.SHOULD_OPTIMIZE)
         validator_prompt = self.prompt_generator._generate_validator_prompt(
             op_config, input_data, output_data
         )
@@ -218,6 +221,11 @@ def optimize(
                 "improvements": assessment.get("improvements", []),
             },
         )
+        self.console.post_optimizer_rationale(
+            assessment.get("needs_improvement", True),
+            "\n".join(assessment.get("reasons", [])),
+            validator_prompt
+        )
 
         # Check if improvement is needed based on the assessment
         if not data_exceeds_limit and not assessment.get("needs_improvement", True):
@@ -237,6 +245,7 @@ def optimize(
             candidate_plans["no_change"] = [op_config]
 
         # Generate chunk size plans
+        self.console.post_optimizer_status(StageType.CANDIDATE_PLANS)
         self.console.log("[bold magenta]Generating chunking plans...[/bold magenta]")
         chunk_size_plans = self.plan_generator._generate_chunk_size_plans(
             op_config, input_data, validator_prompt, model_input_context_length
@@ -290,6 +299,7 @@ def optimize(
             output=candidate_plans,
         )
 
+        self.console.post_optimizer_status(StageType.EVALUATION_RESULTS)
         self.console.log(
             f"[bold magenta]Evaluating {len(plans_list)} plans...[/bold magenta]"
         )
@@ -349,6 +359,7 @@ def optimize(
 
         # Check if there are no top plans
         if len(top_plans) == 0:
+            self.console.post_optimizer_status(StageType.END)
             raise ValueError(
                 "Agent did not generate any plans. Unable to proceed with optimization. Try again."
             )
@@ -422,6 +433,7 @@ def optimize(
             },
         )
 
+        self.console.post_optimizer_status(StageType.END)
         return (
             candidate_plans[best_plan_name],
             best_output,
diff --git a/docetl/optimizers/map_optimizer/plan_generators.py b/docetl/optimizers/map_optimizer/plan_generators.py
index 9f8a5f40..a3c1f9bd 100644
--- a/docetl/optimizers/map_optimizer/plan_generators.py
+++ b/docetl/optimizers/map_optimizer/plan_generators.py
@@ -781,9 +781,6 @@ def _generate_chain_plans(
         """
 
         output_schema = op_config["output"]["schema"]
-        if len(output_schema) <= 1:
-            return {}  # No need for chain decomposition if there's only one output key
-
         variables_in_prompt = extract_jinja_variables(op_config["prompt"])
         variables_in_prompt = [v.replace("input.", "") for v in variables_in_prompt]
 
diff --git a/docetl/optimizers/map_optimizer/prompt_generators.py b/docetl/optimizers/map_optimizer/prompt_generators.py
index f2bde628..91e18834 100644
--- a/docetl/optimizers/map_optimizer/prompt_generators.py
+++ b/docetl/optimizers/map_optimizer/prompt_generators.py
@@ -192,7 +192,7 @@ def _get_header_extraction_prompt(
 
         header_extraction_prompt = f"""Analyze the following chunk of a document and extract any headers you see.
 
-        {{ input.{split_key}_chunk }}
+        {{{{ input.{split_key}_chunk }}}}
 
         Examples of headers and their levels based on the document structure:
         {chr(10).join(header_examples)}
@@ -331,15 +331,41 @@ def _get_combine_prompt(
         {sample_inputs}
 
         Modify the original prompt to be a prompt that will combine these chunk results to accomplish the original task.
+        This prompt will be submitted to an LLM, so it must be a valid Jinja2 template, with natural language instructions.
 
         Guidelines for your prompt template:
         - The only variable you are allowed to use is the `inputs` variable, which contains all chunk results. Each value is a dictionary with the keys {', '.join(schema_keys)}
-        - Avoid using filters or complex logic, even though Jinja technically supports it
+        - Avoid using filters or complex logic like `do` statements, even though Jinja technically supports it
         - The prompt template must be a valid Jinja2 template
         - You must use the {{{{ inputs }}}} variable somehow, in a for loop. You must access specific keys in each item in the loop.
+        - The prompt template must also contain natural language instructions so the LLM knows what to do with the data
 
         Provide your prompt template as a single string.
         """
+        # Add example for combining themes
+        base_prompt += """
+        Example of a good combine prompt for combining themes:
+        ```
+        You are tasked with combining themes extracted from different chunks of text.
+
+        Here are the themes extracted from each chunk:
+        {% for item in inputs %}
+        Themes for chunk {loop.index}:
+        {{ item.themes }}
+        {% endfor %}
+
+        Analyze all the themes above and create a consolidated list that:
+        1. Combines similar or related themes
+        2. Preserves unique themes that appear in only one chunk
+        3. Prioritizes themes that appear multiple times across chunks
+        4. Maintains the original wording where possible
+
+        Provide the final consolidated list of themes, ensuring each theme is distinct and meaningful.
+        ```
+
+        Now generate a combine prompt for the current task.
+        """
+
         parameters = {
             "type": "object",
             "properties": {"combine_prompt": {"type": "string"}},
diff --git a/docetl/optimizers/reduce_optimizer.py b/docetl/optimizers/reduce_optimizer.py
index feee3f8b..dcbc39fc 100644
--- a/docetl/optimizers/reduce_optimizer.py
+++ b/docetl/optimizers/reduce_optimizer.py
@@ -15,7 +15,7 @@
 from docetl.operations.utils import truncate_messages
 from docetl.optimizers.join_optimizer import JoinOptimizer
 from docetl.optimizers.utils import LLMClient
-from docetl.utils import count_tokens, extract_jinja_variables
+from docetl.utils import count_tokens, extract_jinja_variables, StageType
 
 
 class ReduceOptimizer:
@@ -149,9 +149,11 @@ def optimize(
         #     # Return unoptimized map and reduce operations
         #     return [map_prompt, op_config], input_data, 0.0
 
+        self.console.post_optimizer_status(StageType.SAMPLE_RUN)
         original_output = self._run_operation(op_config, input_data)
 
         # Step 1: Synthesize a validator prompt
+        self.console.post_optimizer_status(StageType.SHOULD_OPTIMIZE)
         validator_prompt = self._generate_validator_prompt(
             op_config, input_data, original_output
         )
@@ -172,6 +174,11 @@ def optimize(
         # Print the validation results
         self.console.log("[bold]Validation Results on Initial Sample:[/bold]")
         if validation_results["needs_improvement"]:
+            self.console.post_optimizer_rationale(
+                should_optimize=True,
+                rationale="\n".join(validation_results["issues"]),
+                validator_prompt=validator_prompt,
+            )
             self.console.log(
                 "\n".join(
                     [
@@ -302,6 +309,7 @@ def _optimize_single_reduce(
         is_associative = self._is_associative(op_config, input_data)
 
         # Step 3: Create and evaluate multiple reduce plans
+        self.console.post_optimizer_status(StageType.CANDIDATE_PLANS)
         self.console.log("[bold magenta]Generating batched plans...[/bold magenta]")
         reduce_plans = self._create_reduce_plans(op_config, input_data, is_associative)
 
@@ -310,12 +318,14 @@ def _optimize_single_reduce(
         gleaning_plans = self._generate_gleaning_plans(reduce_plans, validator_prompt)
 
         self.console.log("[bold magenta]Evaluating plans...[/bold magenta]")
+        self.console.post_optimizer_status(StageType.EVALUATION_RESULTS)
         best_plan = self._evaluate_reduce_plans(
             op_config, reduce_plans + gleaning_plans, input_data, validator_prompt
         )
 
         # Step 4: Run the best reduce plan
         optimized_output = self._run_operation(best_plan, input_data)
+        self.console.post_optimizer_status(StageType.END)
 
         return [best_plan], optimized_output, 0.0
 
diff --git a/docetl/runner.py b/docetl/runner.py
index 5acf84cf..18c73eb2 100644
--- a/docetl/runner.py
+++ b/docetl/runner.py
@@ -487,15 +487,14 @@ def optimize(
 
         builder = Optimizer(
             self,
-            max_threads=self.max_threads,
             **kwargs,
         )
         cost = builder.optimize()
-
-        # Dump via pickle
-        import pickle
-        with open(f"{self.base_name}_optimizer_output.pkl", "wb") as f:
-            pickle.dump(builder.captured_output, f)
+    
+        # Dump via json
+        # import json
+        # with open(f"{self.base_name}_optimizer_output.json", "wb") as f:
+        #     json.dump(builder.captured_output.optimizer_output, f)
 
 
         if save:
diff --git a/docetl/utils.py b/docetl/utils.py
index 18bcb454..d190ecb5 100644
--- a/docetl/utils.py
+++ b/docetl/utils.py
@@ -1,18 +1,32 @@
 import json
 import re
 from typing import Any, Dict, List
-
+from enum import Enum
 import tiktoken
 import yaml
 from jinja2 import Environment, meta
 from litellm import completion_cost as lcc
 
 
-class StageType:
+class StageType(Enum):
     SAMPLE_RUN = "sample_run"
     SHOULD_OPTIMIZE = "should_optimize"
     CANDIDATE_PLANS = "candidate_plans"
     EVALUATION_RESULTS = "evaluation_results"
+    END = "end"
+
+def get_stage_description(stage_type: StageType) -> str:
+    if stage_type == StageType.SAMPLE_RUN:
+        return "Running samples..."
+    elif stage_type == StageType.SHOULD_OPTIMIZE:
+        return "Checking if optimization is needed..."
+    elif stage_type == StageType.CANDIDATE_PLANS:
+        return "Generating candidate plans..."
+    elif stage_type == StageType.EVALUATION_RESULTS:
+        return "Evaluating candidate plans..."
+    elif stage_type == StageType.END:
+        return "Optimization complete!"
+    raise ValueError(f"Unknown stage type: {stage_type}")
 
 class CapturedOutput:
     def __init__(self):
diff --git a/server/app/routes/pipeline.py b/server/app/routes/pipeline.py
index 666b43f0..38ec4e88 100644
--- a/server/app/routes/pipeline.py
+++ b/server/app/routes/pipeline.py
@@ -1,9 +1,16 @@
+import os
+import signal
 from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect
 from server.app.models import PipelineRequest
 from docetl.runner import DSLRunner
 import asyncio
-import queue
+from rich.logging import RichHandler
+import logging
 
+FORMAT = "%(message)s"
+logging.basicConfig(
+    level="INFO", format=FORMAT, datefmt="[%X]", handlers=[RichHandler()]
+)
 router = APIRouter()
 
 
@@ -29,9 +36,10 @@ async def websocket_run_pipeline(websocket: WebSocket):
             runner.clear_intermediate()
 
         if config.get("optimize", False):
-
+            logging.info(f"Optimizing pipeline with model {config.get('optimizer_model', 'gpt-4o')}")
+            
             async def run_pipeline():
-                return await asyncio.to_thread(runner.optimize, return_pipeline=False)
+                return await asyncio.to_thread(runner.optimize, return_pipeline=False, model=config.get("optimizer_model", "gpt-4o"))
 
         else:
 
@@ -44,11 +52,44 @@ async def run_pipeline():
             console_output = runner.console.file.getvalue()
             await websocket.send_json({"type": "output", "data": console_output})
 
+            if config.get("optimize", False):
+                optimizer_progress = runner.console.get_optimizer_progress()
+                rationale = runner.console.optimizer_rationale
+                await websocket.send_json({
+                    "type": "optimizer_progress", 
+                    "status": optimizer_progress[0], 
+                    "progress": optimizer_progress[1],
+                    "rationale": rationale[1] if rationale is not None else "",
+                    "should_optimize": rationale[0] if rationale is not None else False,
+                    "validator_prompt": rationale[2] if rationale is not None else ""
+                })
+
             # Check for incoming messages from the user
             try:
                 user_message = await asyncio.wait_for(
                     websocket.receive_json(), timeout=0.1
                 )
+
+                if user_message == "kill":
+                    runner.console.print("Killing process...")
+                    await websocket.send_json({
+                        "type": "error",
+                        "message": "Killing process. Service will restart automatically."
+                    })
+                    # Close websocket cleanly
+                    await websocket.close()
+                    
+                    # Get current process ID
+                    pid = os.getpid()
+                    
+                    # Schedule the process to kill itself
+                    async def delayed_kill():
+                        await asyncio.sleep(0.5)  # Give time for websocket to close
+                        os.kill(pid, signal.SIGTERM)
+                    
+                    asyncio.create_task(delayed_kill())
+                    return
+
                 # Process the user message and send it to the runner
                 runner.console.post_input(user_message)
             except asyncio.TimeoutError:
@@ -69,17 +110,15 @@ async def run_pipeline():
         # If optimize is true, send back the optimized operations
         if config.get("optimize", False):
             optimized_config, cost = result
-            # find the operation that has optimize = true
-            optimized_op = None
-            for op in optimized_config["operations"]:
-                if op.get("optimize", False):
-                    optimized_op = op
-                    break
-
-            if not optimized_op:
-                raise HTTPException(
-                    status_code=500, detail="No optimized operation found"
-                )
+
+            # Send the operations back in order
+            new_pipeline_steps = optimized_config["pipeline"]["steps"]
+            new_pipeline_op_name_to_op_map = {op["name"]: op for op in optimized_config["operations"]}
+            new_ops_in_order = []
+            for new_step in new_pipeline_steps:
+                for op in new_step.get("operations", []):
+                    if op not in new_ops_in_order:
+                        new_ops_in_order.append(new_pipeline_op_name_to_op_map[op])
 
             await websocket.send_json(
                 {
@@ -87,7 +126,7 @@ async def run_pipeline():
                     "data": {
                         "message": "Pipeline executed successfully",
                         "cost": cost,
-                        "optimized_op": optimized_op,
+                        "optimized_ops": new_ops_in_order,
                     },
                 }
             )
@@ -108,4 +147,4 @@ async def run_pipeline():
 
         error_traceback = traceback.format_exc()
         print(f"Error occurred:\n{error_traceback}")
-        await websocket.send_json({"type": "error", "data": str(e)})
+        await websocket.send_json({"type": "error", "data": str(e) + "\n" + error_traceback})
diff --git a/website/package-lock.json b/website/package-lock.json
index 5420756c..58cc7248 100644
--- a/website/package-lock.json
+++ b/website/package-lock.json
@@ -22,7 +22,8 @@
         "@radix-ui/react-icons": "^1.3.0",
         "@radix-ui/react-label": "^2.1.0",
         "@radix-ui/react-menubar": "^1.1.2",
-        "@radix-ui/react-popover": "^1.1.2",
+        "@radix-ui/react-popover": "^1.0.7",
+        "@radix-ui/react-progress": "^1.1.0",
         "@radix-ui/react-scroll-area": "^1.1.0",
         "@radix-ui/react-select": "^2.1.1",
         "@radix-ui/react-slot": "^1.1.0",
@@ -3039,6 +3040,30 @@
         }
       }
     },
+    "node_modules/@radix-ui/react-progress": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-progress/-/react-progress-1.1.0.tgz",
+      "integrity": "sha512-aSzvnYpP725CROcxAOEBVZZSIQVQdHgBr2QQFKySsaD14u8dNT0batuXI+AAGDdAHfXH8rbnHmjYFqVJ21KkRg==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-context": "1.1.0",
+        "@radix-ui/react-primitive": "2.0.0"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/@radix-ui/react-roving-focus": {
       "version": "1.1.0",
       "resolved": "https://registry.npmjs.org/@radix-ui/react-roving-focus/-/react-roving-focus-1.1.0.tgz",
diff --git a/website/package.json b/website/package.json
index 91ab0de2..4ca7c52c 100644
--- a/website/package.json
+++ b/website/package.json
@@ -24,6 +24,7 @@
     "@radix-ui/react-label": "^2.1.0",
     "@radix-ui/react-menubar": "^1.1.2",
     "@radix-ui/react-popover": "^1.0.7",
+    "@radix-ui/react-progress": "^1.1.0",
     "@radix-ui/react-scroll-area": "^1.1.0",
     "@radix-ui/react-select": "^2.1.1",
     "@radix-ui/react-slot": "^1.1.0",
diff --git a/website/src/app/api/utils.ts b/website/src/app/api/utils.ts
index 03fa50ae..0671fa92 100644
--- a/website/src/app/api/utils.ts
+++ b/website/src/app/api/utils.ts
@@ -52,6 +52,13 @@ export function generatePipelineConfig(
       delete newOp.id;
       delete newOp.llmType;
 
+      if (
+        op.gleaning &&
+        (op.gleaning.num_rounds === 0 || !op.gleaning.validation_prompt)
+      ) {
+        delete newOp.gleaning;
+      }
+
       if (!op.output || !op.output.schema) return newOp;
 
       const processSchemaItem = (item: SchemaItem): string => {
diff --git a/website/src/app/localStorageKeys.ts b/website/src/app/localStorageKeys.ts
index 786b6c85..5e0ea8d3 100644
--- a/website/src/app/localStorageKeys.ts
+++ b/website/src/app/localStorageKeys.ts
@@ -13,3 +13,4 @@ export const SAMPLE_SIZE_KEY = "docetl_sampleSize";
 export const FILES_KEY = "docetl_files";
 export const COST_KEY = "docetl_cost";
 export const DEFAULT_MODEL_KEY = "docetl_defaultModel";
+export const OPTIMIZER_MODEL_KEY = "docetl_optimizerModel";
diff --git a/website/src/app/types.ts b/website/src/app/types.ts
index 1afab46b..de10eee8 100644
--- a/website/src/app/types.ts
+++ b/website/src/app/types.ts
@@ -20,6 +20,7 @@ export type Operation = {
   prompt?: string;
   output?: { schema: SchemaItem[] };
   validate?: string[];
+  gleaning?: { num_rounds: number; validation_prompt: string };
   otherKwargs?: Record<string, any>;
   runIndex?: number;
   sample?: number;
@@ -60,7 +61,7 @@ export interface BookmarkContextType {
     text: string,
     source: string,
     color: string,
-    notes: UserNote[],
+    notes: UserNote[]
   ) => void;
   removeBookmark: (id: string) => void;
 }
diff --git a/website/src/components/AnsiRenderer.tsx b/website/src/components/AnsiRenderer.tsx
index 5151760b..937d240f 100644
--- a/website/src/components/AnsiRenderer.tsx
+++ b/website/src/components/AnsiRenderer.tsx
@@ -33,20 +33,24 @@ const AnsiRenderer: React.FC<AnsiRendererProps> = ({
   }, [text]);
 
   const handleSendMessage = () => {
-    if (userInput.trim()) {
-      sendMessage(userInput);
-      setUserInput("");
-    }
+    sendMessage(userInput.trim());
+    setUserInput("");
   };
 
+  const isWebSocketClosed = readyState === WebSocket.CLOSED;
+
   return (
-    <div className="flex flex-col w-full h-[500px] bg-black text-white font-mono rounded-lg overflow-hidden">
+    <div
+      className={`flex flex-col w-full h-[620px] bg-black text-white font-mono rounded-lg overflow-hidden ${
+        isWebSocketClosed ? "opacity-50" : ""
+      }`}
+    >
       <div
         ref={scrollRef}
         className="flex-grow overflow-auto p-4"
         style={{
-          height: "400px", // Reduced height to accommodate input field
-          maxHeight: "400px",
+          height: "600px", // Reduced height to accommodate input field
+          maxHeight: "600px",
         }}
       >
         <pre
@@ -61,32 +65,49 @@ const AnsiRenderer: React.FC<AnsiRendererProps> = ({
             value={userInput}
             onChange={(e) => setUserInput(e.target.value)}
             onKeyPress={(e) => e.key === "Enter" && handleSendMessage()}
-            className="flex-grow bg-gray-800 text-white px-2 py-1 rounded-l"
-            placeholder="Type a message..."
+            className={`flex-grow bg-gray-800 text-white px-2 py-1 rounded-l ${
+              isWebSocketClosed ? "cursor-not-allowed" : ""
+            }`}
+            placeholder={
+              isWebSocketClosed
+                ? "WebSocket disconnected..."
+                : "Type a message..."
+            }
+            disabled={isWebSocketClosed}
           />
           <button
             onClick={handleSendMessage}
-            className="bg-blue-500 text-white px-4 py-1 rounded-r"
+            className={`bg-blue-500 text-white px-4 py-1 rounded-r ${
+              isWebSocketClosed ? "cursor-not-allowed opacity-50" : ""
+            }`}
+            disabled={isWebSocketClosed}
           >
             Send
           </button>
         </div>
         <div className="flex justify-between items-center">
-          <div className="text-xs text-gray-500">
+          <div
+            className={`text-xs ${
+              isWebSocketClosed ? "text-red-500" : "text-gray-500"
+            }`}
+          >
             WebSocket State:{" "}
             {readyState === WebSocket.CONNECTING
               ? "Connecting"
               : readyState === WebSocket.OPEN
-                ? "Open"
-                : readyState === WebSocket.CLOSING
-                  ? "Closing"
-                  : readyState === WebSocket.CLOSED
-                    ? "Closed"
-                    : "Unknown"}
+              ? "Open"
+              : readyState === WebSocket.CLOSING
+              ? "Closing"
+              : readyState === WebSocket.CLOSED
+              ? "Closed"
+              : "Unknown"}
           </div>
           <button
             onClick={() => setTerminalOutput("")}
-            className="text-xs text-gray-500"
+            className={`text-xs text-gray-500 ${
+              isWebSocketClosed ? "cursor-not-allowed opacity-50" : ""
+            }`}
+            disabled={isWebSocketClosed}
           >
             Clear Output
           </button>
diff --git a/website/src/components/OperationCard.tsx b/website/src/components/OperationCard.tsx
index 0c953730..39a7b14f 100644
--- a/website/src/components/OperationCard.tsx
+++ b/website/src/components/OperationCard.tsx
@@ -30,13 +30,14 @@ import {
   Settings,
   ListCollapse,
   Wand2,
+  ChevronDown,
 } from "lucide-react";
 import { Operation, SchemaItem } from "@/app/types";
 import { usePipelineContext } from "@/contexts/PipelineContext";
 import { useToast } from "@/hooks/use-toast";
 import { Skeleton } from "@/components/ui/skeleton";
 import { debounce } from "lodash";
-import { Guardrails } from "./operations/args";
+import { Guardrails, GleaningConfig } from "./operations/args";
 import createOperationComponent from "./operations/components";
 import { useWebSocket } from "@/contexts/WebSocketContext";
 import { Badge } from "./ui/badge";
@@ -54,6 +55,7 @@ const OperationHeader: React.FC<{
   llmType: string;
   disabled: boolean;
   currOp: boolean;
+  expanded: boolean;
   onEdit: (name: string) => void;
   onDelete: () => void;
   onRunOperation: () => void;
@@ -61,6 +63,7 @@ const OperationHeader: React.FC<{
   onShowOutput: () => void;
   onOptimize: () => void;
   onAIEdit: (instruction: string) => void;
+  onToggleExpand: () => void;
 }> = React.memo(
   ({
     name,
@@ -68,6 +71,7 @@ const OperationHeader: React.FC<{
     llmType,
     disabled,
     currOp,
+    expanded,
     onEdit,
     onDelete,
     onRunOperation,
@@ -75,6 +79,7 @@ const OperationHeader: React.FC<{
     onShowOutput,
     onOptimize,
     onAIEdit,
+    onToggleExpand,
   }) => {
     const [isEditing, setIsEditing] = useState(false);
     const [editedName, setEditedName] = useState(name);
@@ -93,24 +98,38 @@ const OperationHeader: React.FC<{
       <div className="relative flex items-center justify-between py-3 px-4">
         {/* Left side buttons */}
         <div className="flex space-x-1 absolute left-1">
-          <Badge variant={currOp ? "default" : "secondary"}>{type}</Badge>
           <Button
             variant="ghost"
             size="sm"
             className="p-0.25 h-6 w-6"
-            onClick={onToggleSettings}
+            onClick={onToggleExpand}
           >
-            <Settings size={14} className="text-gray-500" />
+            <ChevronDown
+              size={14}
+              className={`text-gray-500 transform transition-transform ${
+                expanded ? "rotate-180" : ""
+              }`}
+            />
           </Button>
+          <Badge variant={currOp ? "default" : "secondary"}>{type}</Badge>
           <Button
             variant="ghost"
             size="sm"
             className="p-0.25 h-6 w-6"
-            disabled={type !== "resolve"}
-            onClick={onOptimize}
+            onClick={onToggleSettings}
           >
-            <Zap size={14} className="text-yellow-500" />
+            <Settings size={14} className="text-gray-500" />
           </Button>
+          {["resolve", "map", "reduce", "filter"].includes(type) && (
+            <Button
+              variant="ghost"
+              size="sm"
+              className="p-0.25 h-6 w-6"
+              onClick={onOptimize}
+            >
+              <Zap size={14} className="text-yellow-500" />
+            </Button>
+          )}
           <TooltipProvider>
             <Tooltip>
               <TooltipTrigger asChild>
@@ -331,7 +350,13 @@ type Action =
   | { type: "TOGGLE_GUARDRAILS" }
   | { type: "TOGGLE_SETTINGS" }
   | { type: "SET_RUN_INDEX"; payload: number }
-  | { type: "UPDATE_SETTINGS"; payload: Record<string, string> };
+  | { type: "UPDATE_SETTINGS"; payload: Record<string, string> }
+  | { type: "TOGGLE_EXPAND" }
+  | {
+      type: "UPDATE_GLEANINGS";
+      payload: { num_rounds: number; validation_prompt: string };
+    }
+  | { type: "TOGGLE_GLEANINGS" };
 
 // State type
 type State = {
@@ -340,6 +365,8 @@ type State = {
   isSchemaExpanded: boolean;
   isGuardrailsExpanded: boolean;
   isSettingsOpen: boolean;
+  isExpanded: boolean;
+  isGleaningsExpanded: boolean;
 };
 
 // Reducer function
@@ -401,6 +428,17 @@ function operationReducer(state: State, action: Action): State {
             operation: { ...state.operation, runIndex: action.payload },
           }
         : state;
+    case "TOGGLE_EXPAND":
+      return { ...state, isExpanded: !state.isExpanded };
+    case "UPDATE_GLEANINGS":
+      return state.operation
+        ? {
+            ...state,
+            operation: { ...state.operation, gleaning: action.payload },
+          }
+        : state;
+    case "TOGGLE_GLEANINGS":
+      return { ...state, isGleaningsExpanded: !state.isGleaningsExpanded };
     default:
       return state;
   }
@@ -413,6 +451,8 @@ const initialState: State = {
   isSchemaExpanded: false,
   isGuardrailsExpanded: false,
   isSettingsOpen: false,
+  isExpanded: true,
+  isGleaningsExpanded: false,
 };
 
 // Main component
@@ -424,6 +464,8 @@ export const OperationCard: React.FC<{ index: number }> = ({ index }) => {
     isSchemaExpanded,
     isGuardrailsExpanded,
     isSettingsOpen,
+    isExpanded,
+    isGleaningsExpanded,
   } = state;
 
   const {
@@ -440,6 +482,7 @@ export const OperationCard: React.FC<{ index: number }> = ({ index }) => {
     sampleSize,
     setCost,
     defaultModel,
+    optimizerModel,
     setTerminalOutput,
   } = usePipelineContext();
   const { toast } = useToast();
@@ -611,6 +654,7 @@ export const OperationCard: React.FC<{ index: number }> = ({ index }) => {
       sendMessage({
         yaml_config: filePath,
         optimize: true,
+        optimizer_model: optimizerModel,
       });
     } catch (error) {
       console.error("Error optimizing operation:", error);
@@ -754,6 +798,7 @@ export const OperationCard: React.FC<{ index: number }> = ({ index }) => {
                 llmType={operation.llmType}
                 disabled={isLoadingOutputs || pipelineOutput === undefined}
                 currOp={operation.id === pipelineOutput?.operationId}
+                expanded={isExpanded}
                 onEdit={(name) => {
                   dispatch({ type: "UPDATE_NAME", payload: name });
                   debouncedUpdate();
@@ -768,27 +813,49 @@ export const OperationCard: React.FC<{ index: number }> = ({ index }) => {
                 onShowOutput={onShowOutput}
                 onOptimize={onOptimize}
                 onAIEdit={handleAIEdit}
+                onToggleExpand={() => dispatch({ type: "TOGGLE_EXPAND" })}
               />
-              <CardContent className="py-2 px-3">
-                {createOperationComponent(
-                  operation,
-                  handleOperationUpdate,
-                  isSchemaExpanded,
-                  () => dispatch({ type: "TOGGLE_SCHEMA" })
-                )}
-              </CardContent>
-              {operation.llmType === "LLM" && (
-                <Guardrails
-                  guardrails={operation.validate || []}
-                  onUpdate={(newGuardrails) =>
-                    dispatch({
-                      type: "UPDATE_GUARDRAILS",
-                      payload: newGuardrails,
-                    })
-                  }
-                  isExpanded={isGuardrailsExpanded}
-                  onToggle={() => dispatch({ type: "TOGGLE_GUARDRAILS" })}
-                />
+              {isExpanded && (
+                <>
+                  <CardContent className="py-2 px-3">
+                    {createOperationComponent(
+                      operation,
+                      handleOperationUpdate,
+                      isSchemaExpanded,
+                      () => dispatch({ type: "TOGGLE_SCHEMA" })
+                    )}
+                  </CardContent>
+                  {operation.llmType === "LLM" && (
+                    <>
+                      <Guardrails
+                        guardrails={operation.validate || []}
+                        onUpdate={(newGuardrails) =>
+                          dispatch({
+                            type: "UPDATE_GUARDRAILS",
+                            payload: newGuardrails,
+                          })
+                        }
+                        isExpanded={isGuardrailsExpanded}
+                        onToggle={() => dispatch({ type: "TOGGLE_GUARDRAILS" })}
+                      />
+                    </>
+                  )}
+                  {(operation.type === "map" ||
+                    operation.type === "reduce" ||
+                    operation.type === "filter") && (
+                    <GleaningConfig
+                      gleaning={operation.gleaning || null}
+                      onUpdate={(newGleanings) =>
+                        dispatch({
+                          type: "UPDATE_GLEANINGS",
+                          payload: newGleanings,
+                        })
+                      }
+                      isExpanded={isGleaningsExpanded}
+                      onToggle={() => dispatch({ type: "TOGGLE_GLEANINGS" })}
+                    />
+                  )}
+                </>
               )}
               <SettingsModal
                 opName={operation.name}
diff --git a/website/src/components/Output.tsx b/website/src/components/Output.tsx
index a625fb88..91b55b3c 100644
--- a/website/src/components/Output.tsx
+++ b/website/src/components/Output.tsx
@@ -1,13 +1,10 @@
-import React, {
-  useState,
-  useEffect,
-  useMemo,
-} from "react";
+import React, { useState, useEffect, useMemo } from "react";
 import { ColumnType } from "@/components/ResizableDataTable";
 import ResizableDataTable from "@/components/ResizableDataTable";
 import { usePipelineContext } from "@/contexts/PipelineContext";
 import { Loader2, Download } from "lucide-react";
 import { Button } from "@/components/ui/button";
+import { Progress } from "@/components/ui/progress";
 import BookmarkableText from "@/components/BookmarkableText";
 import { Operation, OutputRow } from "@/app/types";
 import { Parser } from "json2csv";
@@ -22,11 +19,51 @@ import { useWebSocket } from "@/contexts/WebSocketContext";
 import AnsiRenderer from "./AnsiRenderer";
 
 export const ConsoleContent: React.FC = () => {
-  const { terminalOutput, setTerminalOutput } = usePipelineContext();
+  const { terminalOutput, setTerminalOutput, optimizerProgress } =
+    usePipelineContext();
   const { readyState } = useWebSocket();
 
   return (
     <div className="flex flex-col h-full w-full bg-black text-white font-mono rounded-lg overflow-hidden">
+      {optimizerProgress && (
+        <div className="p-4 border-b border-gray-800 bg-gray-900">
+          <div className="flex items-center justify-between mb-2">
+            <div className="text-sm font-medium text-blue-400">
+              {optimizerProgress.status}
+            </div>
+            <div className="text-xs text-gray-400">
+              {Math.round(optimizerProgress.progress * 100)}%
+            </div>
+          </div>
+          <Progress
+            value={optimizerProgress.progress * 100}
+            className="w-full h-2 bg-gray-700"
+          />
+          {optimizerProgress.shouldOptimize && (
+            <div className="mt-4 space-y-4">
+              <div>
+                <div className="text-xs uppercase tracking-wider text-gray-400 mb-1">
+                  Optimizing because
+                </div>
+                <div className="text-sm text-gray-200 leading-relaxed">
+                  {optimizerProgress.rationale}
+                </div>
+              </div>
+
+              {optimizerProgress.validatorPrompt && (
+                <div>
+                  <div className="text-xs uppercase tracking-wider text-gray-400 mb-1">
+                    Using this prompt to find the best plan
+                  </div>
+                  <div className="text-sm text-gray-200 leading-relaxed whitespace-pre-wrap border-l-4 border-gray-600 pl-4 my-2 italic">
+                    {optimizerProgress.validatorPrompt}
+                  </div>
+                </div>
+              )}
+            </div>
+          )}
+        </div>
+      )}
       <AnsiRenderer
         text={terminalOutput || ""}
         readyState={readyState}
@@ -59,12 +96,12 @@ export const Output: React.FC = () => {
 
   useEffect(() => {
     const foundOperation = operations.find(
-      (op: Operation) => op.id === output?.operationId,
+      (op: Operation) => op.id === output?.operationId
     );
     setOperation(foundOperation);
     setOpName(foundOperation?.name);
     setIsResolveOrReduce(
-      foundOperation?.type === "resolve" || foundOperation?.type === "reduce",
+      foundOperation?.type === "resolve" || foundOperation?.type === "reduce"
     );
   }, [operations, output]);
 
@@ -76,7 +113,7 @@ export const Output: React.FC = () => {
         try {
           // Fetch output data
           const outputResponse = await fetch(
-            `/api/readFile?path=${output.path}`,
+            `/api/readFile?path=${output.path}`
           );
           if (!outputResponse.ok) {
             throw new Error("Failed to fetch output file");
@@ -121,7 +158,7 @@ export const Output: React.FC = () => {
           // Fetch input data if inputPath exists
           if (output.inputPath) {
             const inputResponse = await fetch(
-              `/api/readFile?path=${output.inputPath}`,
+              `/api/readFile?path=${output.inputPath}`
             );
             if (!inputResponse.ok) {
               throw new Error("Failed to fetch input file");
@@ -129,7 +166,7 @@ export const Output: React.FC = () => {
             const inputContent = await inputResponse.text();
             const parsedInputs = JSON.parse(inputContent);
             setInputCount(
-              Array.isArray(parsedInputs) ? parsedInputs.length : 1,
+              Array.isArray(parsedInputs) ? parsedInputs.length : 1
             );
           } else {
             setInputCount(0);
@@ -205,8 +242,8 @@ export const Output: React.FC = () => {
       return outputs.length > 0 && reduceColumnName in outputs[0]
         ? { name: reduceColumnName, type: "reduce" }
         : outputs.length > 0 && resolveColumnName in outputs[0]
-          ? { name: resolveColumnName, type: "resolve" }
-          : null;
+        ? { name: resolveColumnName, type: "resolve" }
+        : null;
     }, [outputs, opName, operation]);
 
     if (!visualizationColumn || !operation) {
@@ -225,7 +262,7 @@ export const Output: React.FC = () => {
             .sort(
               (a, b) =>
                 Number(b[visualizationColumn.name]) -
-                Number(a[visualizationColumn.name]),
+                Number(a[visualizationColumn.name])
             )
             .map((row, index) => (
               <div key={index} className="mb-2">
@@ -252,7 +289,7 @@ export const Output: React.FC = () => {
           outputs.flatMap((row) => {
             const kvPairs = row[visualizationColumn.name];
             return Object.keys(kvPairs).filter((key) => key in row);
-          }),
+          })
         );
 
         const groupedByIntersection: { [key: string]: any[] } = {};
diff --git a/website/src/components/PipelineGui.tsx b/website/src/components/PipelineGui.tsx
index 4e590fe3..b8332cf9 100644
--- a/website/src/components/PipelineGui.tsx
+++ b/website/src/components/PipelineGui.tsx
@@ -23,6 +23,8 @@ import {
   Download,
   FileUp,
   Save,
+  Loader2,
+  StopCircle,
 } from "lucide-react";
 import { usePipelineContext } from "@/contexts/PipelineContext";
 import {
@@ -78,9 +80,14 @@ const PipelineGUI: React.FC = () => {
     setTerminalOutput,
     saveProgress,
     clearPipelineState,
+    optimizerModel,
+    setOptimizerModel,
+    optimizerProgress,
+    setOptimizerProgress,
   } = usePipelineContext();
   const [isSettingsOpen, setIsSettingsOpen] = useState(false);
   const [tempPipelineName, setTempPipelineName] = useState(pipelineName);
+  const [tempOptimizerModel, setTempOptimizerModel] = useState(defaultModel);
   const [tempSampleSize, setTempSampleSize] = useState(
     sampleSize?.toString() || ""
   );
@@ -91,55 +98,73 @@ const PipelineGUI: React.FC = () => {
   const { toast } = useToast();
   const { connect, sendMessage, lastMessage, readyState, disconnect } =
     useWebSocket();
+  const [runningButtonType, setRunningButtonType] = useState<
+    "run" | "clear-run" | null
+  >(null);
 
   useEffect(() => {
     if (lastMessage) {
       if (lastMessage.type === "output") {
         setTerminalOutput(lastMessage.data);
+      } else if (lastMessage.type === "optimizer_progress") {
+        setOptimizerProgress({
+          status: lastMessage.status,
+          progress: lastMessage.progress,
+          shouldOptimize: lastMessage.should_optimize,
+          rationale: lastMessage.rationale,
+          validatorPrompt: lastMessage.validator_prompt,
+        });
       } else if (lastMessage.type === "result") {
         const runCost = lastMessage.data.cost || 0;
+        setOptimizerProgress(null);
 
         // See if there was an optimized operation
-        const optimizedOp = lastMessage.data.optimized_op;
-        if (optimizedOp) {
-          const {
-            id,
-            llmType,
-            type,
-            name,
-            prompt,
-            output,
-            validate,
-            sample,
-            ...otherKwargs
-          } = optimizedOp;
-          const convertedOp = {
-            id: id || crypto.randomUUID(),
-            llmType:
-              type === "map" ||
-              type === "reduce" ||
-              type === "resolve" ||
-              type === "filter" ||
-              type === "parallel_map"
-                ? "LLM"
-                : "non-LLM",
-            type: type,
-            name: name || "Untitled Operation",
-            prompt: prompt,
-            output: output
-              ? {
-                  schema: schemaDictToItemSet(output.schema),
-                }
-              : undefined,
-            validate: validate,
-            sample: sample,
-            otherKwargs: otherKwargs || {},
-          };
-          setOperations((prev) =>
-            prev.map((op) =>
-              op.name === optimizedOp.name ? (convertedOp as Operation) : op
-            )
-          );
+        const optimizedOps = lastMessage.data.optimized_ops;
+        if (optimizedOps) {
+          const newOperations = optimizedOps.map((optimizedOp) => {
+            const {
+              id,
+              llmType,
+              type,
+              name,
+              prompt,
+              output,
+              validate,
+              gleaning,
+              sample,
+              ...otherKwargs
+            } = optimizedOp;
+
+            // Find matching operation in previous operations list
+            const existingOp = operations.find((op) => op.name === name);
+
+            return {
+              id: id || crypto.randomUUID(),
+              llmType:
+                type === "map" ||
+                type === "reduce" ||
+                type === "resolve" ||
+                type === "filter" ||
+                type === "parallel_map"
+                  ? "LLM"
+                  : "non-LLM",
+              type: type,
+              name: name || "Untitled Operation",
+              prompt: prompt,
+              output: output
+                ? {
+                    schema: schemaDictToItemSet(output.schema),
+                  }
+                : undefined,
+              validate: validate,
+              gleaning: gleaning,
+              sample: sample,
+              otherKwargs: otherKwargs || {},
+              ...(existingOp?.runIndex && { runIndex: existingOp.runIndex }),
+            } as Operation;
+          });
+
+          setOperations(newOperations);
         }
 
         setCost((prevCost) => prevCost + runCost);
@@ -186,6 +211,12 @@ const PipelineGUI: React.FC = () => {
     }
   }, [currentFile]);
 
+  useEffect(() => {
+    if (optimizerModel) {
+      setTempDefaultModel(tempOptimizerModel);
+    }
+  }, [optimizerModel]);
+
   const handleFileUpload = async (
     event: React.ChangeEvent<HTMLInputElement>
   ) => {
@@ -353,7 +384,9 @@ const PipelineGUI: React.FC = () => {
       if (lastOpIndex < 0) return;
 
       const lastOperation = operations[lastOpIndex];
+      setOptimizerProgress(null);
       setIsLoadingOutputs(true);
+      setRunningButtonType(clear_intermediate ? "clear-run" : "run");
       setNumOpRun((prevNum) => {
         const newNum = prevNum + operations.length;
         const updatedOperations = operations.map((op, index) => ({
@@ -412,6 +445,7 @@ const PipelineGUI: React.FC = () => {
         // Close the WebSocket connection
         disconnect();
         setIsLoadingOutputs(false);
+        setRunningButtonType(null);
       }
     },
     [
@@ -453,6 +487,7 @@ const PipelineGUI: React.FC = () => {
     setCurrentFile(tempCurrentFile);
     setDefaultModel(tempDefaultModel);
     setIsSettingsOpen(false);
+    setOptimizerModel(tempOptimizerModel);
   };
 
   const handleDragEnd = (result: DropResult) => {
@@ -473,297 +508,319 @@ const PipelineGUI: React.FC = () => {
     }
   };
 
+  const handleStop = () => {
+    sendMessage("kill");
+    setRunningButtonType(null);
+  };
+
   return (
-    <div className="relative">
-      <div className="h-full overflow-auto">
-        <div className="sticky top-0 z-10 p-2 bg-white">
-          <div className="flex justify-between items-center">
-            <div className="flex items-center space-x-2">
-              <h2 className="text-sm font-bold uppercase">
-                {pipelineName.toUpperCase()}
-              </h2>
-              {sampleSize && (
-                <TooltipProvider delayDuration={0}>
-                  <Tooltip>
-                    <TooltipTrigger>
-                      <div className="flex items-center">
-                        <PieChart size={16} className="text-primary mr-2" />
-                        <span className="text-xs text-primary">
-                          {sampleSize} samples
-                        </span>
-                      </div>
-                    </TooltipTrigger>
-                    <TooltipContent className="max-w-[200px]">
-                      <p>
-                        Pipeline will run on a sample of {sampleSize} random
-                        documents.
-                      </p>
-                    </TooltipContent>
-                  </Tooltip>
-                </TooltipProvider>
-              )}
-              <div className="flex p-0 space-x-0">
-                <TooltipProvider>
-                  <Tooltip>
-                    <TooltipTrigger asChild>
-                      <Button
-                        variant="ghost"
-                        size="icon"
-                        onClick={() => fileInputRef.current?.click()}
-                      >
-                        <FileUp size={16} />
-                      </Button>
-                    </TooltipTrigger>
-                    <TooltipContent>
-                      <p>Initialize from config file</p>
-                    </TooltipContent>
-                  </Tooltip>
-                </TooltipProvider>
-                <Input
-                  type="file"
-                  ref={fileInputRef}
-                  onChange={handleFileUpload}
-                  accept=".yaml,.yml"
-                  className="hidden"
-                />
-                <TooltipProvider>
-                  <Tooltip>
-                    <TooltipTrigger asChild>
-                      <Button
-                        size="icon"
-                        variant="ghost"
-                        onClick={() => handleExport()}
-                      >
-                        <Download size={16} />
-                      </Button>
-                    </TooltipTrigger>
-                    <TooltipContent>
-                      <p>Download pipeline config file</p>
-                    </TooltipContent>
-                  </Tooltip>
-                </TooltipProvider>
-
-                <Button
-                  size="icon"
-                  variant="ghost"
-                  onClick={() => setIsSettingsOpen(true)}
-                >
-                  <Settings size={16} />
-                </Button>
-              </div>
-            </div>
-            <div className="flex space-x-2">
-              <DropdownMenu>
-                <DropdownMenuTrigger asChild>
-                  <Button size="sm" className="rounded-sm">
-                    <Plus size={16} className="mr-2" /> Add Operation{" "}
-                    <ChevronDown size={16} className="ml-2" />
-                  </Button>
-                </DropdownMenuTrigger>
-                <DropdownMenuContent>
-                  <DropdownMenuLabel>LLM Operations</DropdownMenuLabel>
-                  <DropdownMenuItem
-                    onClick={() =>
-                      handleAddOperation("LLM", "map", "Untitled Map")
-                    }
-                  >
-                    Map
-                  </DropdownMenuItem>
-                  <DropdownMenuItem
-                    onClick={() =>
-                      handleAddOperation("LLM", "reduce", "Untitled Reduce")
-                    }
-                  >
-                    Reduce
-                  </DropdownMenuItem>
-                  <DropdownMenuItem
-                    onClick={() =>
-                      handleAddOperation("LLM", "resolve", "Untitled Resolve")
-                    }
-                  >
-                    Resolve
-                  </DropdownMenuItem>
-                  <DropdownMenuItem
-                    onClick={() =>
-                      handleAddOperation("LLM", "filter", "Untitled Filter")
-                    }
-                  >
-                    Filter
-                  </DropdownMenuItem>
-                  <DropdownMenuItem
-                    onClick={() =>
-                      handleAddOperation(
-                        "LLM",
-                        "parallel_map",
-                        "Untitled Parallel Map"
-                      )
-                    }
-                  >
-                    Parallel Map
-                  </DropdownMenuItem>
-                  <DropdownMenuSeparator />
-                  <DropdownMenuLabel>Non-LLM Operations</DropdownMenuLabel>
-                  <DropdownMenuItem
-                    onClick={() =>
-                      handleAddOperation("non-LLM", "unnest", "Untitled Unnest")
-                    }
-                  >
-                    Unnest
-                  </DropdownMenuItem>
-                  <DropdownMenuItem
-                    onClick={() =>
-                      handleAddOperation("non-LLM", "split", "Untitled Split")
-                    }
-                  >
-                    Split
-                  </DropdownMenuItem>
-                  <DropdownMenuItem
-                    onClick={() =>
-                      handleAddOperation("non-LLM", "gather", "Untitled Gather")
-                    }
-                  >
-                    Gather
-                  </DropdownMenuItem>
-                  <DropdownMenuItem
-                    onClick={() =>
-                      handleAddOperation("non-LLM", "sample", "Untitled Sample")
-                    }
-                  >
-                    Sample
-                  </DropdownMenuItem>
-                </DropdownMenuContent>
-              </DropdownMenu>
-              <div className="flex space-x-2">
-                <TooltipProvider>
-                  <Tooltip>
-                    <TooltipTrigger asChild>
-                      <Button
-                        size="sm"
-                        className="rounded-sm"
-                        disabled={isLoadingOutputs}
-                        onClick={() => onRunAll(true)}
-                      >
-                        <RefreshCw size={16} className="mr-2" /> Clear and Run
-                      </Button>
-                    </TooltipTrigger>
-                    <TooltipContent>
-                      <p>The cache will be cleared before running</p>
-                    </TooltipContent>
-                  </Tooltip>
-                </TooltipProvider>
-                <TooltipProvider>
-                  <Tooltip>
-                    <TooltipTrigger asChild>
-                      <Button
-                        size="sm"
-                        className="rounded-sm"
-                        disabled={isLoadingOutputs}
-                        onClick={() => onRunAll(false)}
-                      >
-                        <Play size={16} className="mr-2" /> Run
-                      </Button>
-                    </TooltipTrigger>
-                    <TooltipContent>
-                      <p>This will use any cached outputs if applicable</p>
-                    </TooltipContent>
-                  </Tooltip>
-                </TooltipProvider>
-              </div>
+    <div className="h-[calc(70vh-2rem)] flex flex-col">
+      <div className="flex-none p-2 bg-white border-b sticky top-0 z-10">
+        <div className="flex justify-between items-center">
+          <div className="flex items-center space-x-2">
+            <h2 className="text-sm font-bold uppercase">
+              {pipelineName.toUpperCase()}
+            </h2>
+            {sampleSize && (
+              <TooltipProvider delayDuration={0}>
+                <Tooltip>
+                  <TooltipTrigger>
+                    <div className="flex items-center">
+                      <PieChart size={16} className="text-primary mr-2" />
+                      <span className="text-xs text-primary">
+                        {sampleSize} samples
+                      </span>
+                    </div>
+                  </TooltipTrigger>
+                  <TooltipContent className="max-w-[200px]">
+                    <p>
+                      Pipeline will run on a sample of {sampleSize} random
+                      documents.
+                    </p>
+                  </TooltipContent>
+                </Tooltip>
+              </TooltipProvider>
+            )}
+            <div className="flex p-0 space-x-0">
+              <TooltipProvider>
+                <Tooltip>
+                  <TooltipTrigger asChild>
+                    <Button
+                      variant="ghost"
+                      size="icon"
+                      onClick={() => fileInputRef.current?.click()}
+                    >
+                      <FileUp size={16} />
+                    </Button>
+                  </TooltipTrigger>
+                  <TooltipContent>
+                    <p>Initialize from config file</p>
+                  </TooltipContent>
+                </Tooltip>
+              </TooltipProvider>
+              <Input
+                type="file"
+                ref={fileInputRef}
+                onChange={handleFileUpload}
+                accept=".yaml,.yml"
+                className="hidden"
+              />
+              <TooltipProvider>
+                <Tooltip>
+                  <TooltipTrigger asChild>
+                    <Button
+                      size="icon"
+                      variant="ghost"
+                      onClick={() => handleExport()}
+                    >
+                      <Download size={16} />
+                    </Button>
+                  </TooltipTrigger>
+                  <TooltipContent>
+                    <p>Download pipeline config file</p>
+                  </TooltipContent>
+                </Tooltip>
+              </TooltipProvider>
+
+              <Button
+                size="icon"
+                variant="ghost"
+                onClick={() => setIsSettingsOpen(true)}
+              >
+                <Settings size={16} />
+              </Button>
             </div>
           </div>
-        </div>
-        <div className="p-2">
-          <DragDropContext onDragEnd={handleDragEnd}>
-            <Droppable droppableId="operations" type="operation">
-              {(provided, snapshot) => (
-                <div
-                  {...provided.droppableProps}
-                  ref={provided.innerRef}
-                  className={`space-y-2 ${
-                    snapshot.isDraggingOver ? "bg-gray-50" : ""
-                  }`}
+          <div className="flex space-x-2">
+            <DropdownMenu>
+              <DropdownMenuTrigger asChild>
+                <Button size="sm" className="rounded-sm">
+                  <Plus size={16} className="mr-2" /> Add Operation{" "}
+                  <ChevronDown size={16} className="ml-2" />
+                </Button>
+              </DropdownMenuTrigger>
+              <DropdownMenuContent>
+                <DropdownMenuLabel>LLM Operations</DropdownMenuLabel>
+                <DropdownMenuItem
+                  onClick={() =>
+                    handleAddOperation("LLM", "map", "Untitled Map")
+                  }
                 >
-                  {operations.map((op, index) => (
-                    <OperationCard key={op.id} index={index} />
-                  ))}
-                  {provided.placeholder}
-                </div>
-              )}
-            </Droppable>
-          </DragDropContext>
-        </div>
-        <Dialog open={isSettingsOpen} onOpenChange={setIsSettingsOpen}>
-          <DialogContent>
-            <DialogHeader>
-              <DialogTitle>Pipeline Settings</DialogTitle>
-            </DialogHeader>
-            <div className="grid gap-4 py-4">
-              <div className="grid grid-cols-4 items-center gap-4">
-                <Label htmlFor="name" className="text-right">
-                  Name
-                </Label>
-                <Input
-                  id="name"
-                  value={tempPipelineName}
-                  onChange={(e) => setTempPipelineName(e.target.value)}
-                  className="col-span-3"
-                />
-              </div>
-              <div className="grid grid-cols-4 items-center gap-4">
-                <Label htmlFor="sampling" className="text-right">
-                  Sample Size
-                </Label>
-                <Input
-                  id="sampling"
-                  type="number"
-                  value={tempSampleSize}
-                  onChange={(e) => setTempSampleSize(e.target.value)}
-                  placeholder="None"
-                  className="col-span-3"
-                />
-              </div>
-              <div className="grid grid-cols-4 items-center gap-4">
-                <Label htmlFor="currentFile" className="text-right">
-                  Dataset JSON
-                </Label>
-                <Select
-                  value={tempCurrentFile?.path || ""}
-                  onValueChange={(value) =>
-                    setTempCurrentFile(
-                      files.find((file) => file.path === value) || null
+                  Map
+                </DropdownMenuItem>
+                <DropdownMenuItem
+                  onClick={() =>
+                    handleAddOperation("LLM", "reduce", "Untitled Reduce")
+                  }
+                >
+                  Reduce
+                </DropdownMenuItem>
+                <DropdownMenuItem
+                  onClick={() =>
+                    handleAddOperation("LLM", "resolve", "Untitled Resolve")
+                  }
+                >
+                  Resolve
+                </DropdownMenuItem>
+                <DropdownMenuItem
+                  onClick={() =>
+                    handleAddOperation("LLM", "filter", "Untitled Filter")
+                  }
+                >
+                  Filter
+                </DropdownMenuItem>
+                <DropdownMenuItem
+                  onClick={() =>
+                    handleAddOperation(
+                      "LLM",
+                      "parallel_map",
+                      "Untitled Parallel Map"
                     )
                   }
                 >
-                  <SelectTrigger className="col-span-3">
-                    <SelectValue placeholder="Select a file" />
-                  </SelectTrigger>
-                  <SelectContent>
-                    {files.map((file) => (
-                      <SelectItem key={file.path} value={file.path}>
-                        {file.name}
-                      </SelectItem>
-                    ))}
-                  </SelectContent>
-                </Select>
-              </div>
-              <div className="grid grid-cols-4 items-center gap-4">
-                <Label htmlFor="defaultModel" className="text-right">
-                  Default Model
-                </Label>
-                <Input
-                  id="defaultModel"
-                  value={tempDefaultModel}
-                  onChange={(e) => setTempDefaultModel(e.target.value)}
-                  className="col-span-3"
-                />
-              </div>
+                  Parallel Map
+                </DropdownMenuItem>
+                <DropdownMenuSeparator />
+                <DropdownMenuLabel>Non-LLM Operations</DropdownMenuLabel>
+                <DropdownMenuItem
+                  onClick={() =>
+                    handleAddOperation("non-LLM", "unnest", "Untitled Unnest")
+                  }
+                >
+                  Unnest
+                </DropdownMenuItem>
+                <DropdownMenuItem
+                  onClick={() =>
+                    handleAddOperation("non-LLM", "split", "Untitled Split")
+                  }
+                >
+                  Split
+                </DropdownMenuItem>
+                <DropdownMenuItem
+                  onClick={() =>
+                    handleAddOperation("non-LLM", "gather", "Untitled Gather")
+                  }
+                >
+                  Gather
+                </DropdownMenuItem>
+                <DropdownMenuItem
+                  onClick={() =>
+                    handleAddOperation("non-LLM", "sample", "Untitled Sample")
+                  }
+                >
+                  Sample
+                </DropdownMenuItem>
+              </DropdownMenuContent>
+            </DropdownMenu>
+            <div className="flex space-x-2">
+              <Button
+                size="sm"
+                variant="destructive"
+                className="rounded-sm"
+                onClick={handleStop}
+                disabled={!isLoadingOutputs}
+              >
+                <StopCircle size={16} className="mr-2" />
+                Stop Pipeline
+              </Button>
+              <Button
+                size="sm"
+                className="rounded-sm"
+                onClick={() => onRunAll(true)}
+                disabled={isLoadingOutputs}
+              >
+                {isLoadingOutputs ? (
+                  <Loader2 size={16} className="mr-2 animate-spin" />
+                ) : (
+                  <RefreshCw size={16} className="mr-2" />
+                )}
+                Clear and Run
+              </Button>
+              <Button
+                size="sm"
+                className="rounded-sm"
+                disabled={isLoadingOutputs}
+                onClick={() => onRunAll(false)}
+              >
+                {isLoadingOutputs ? (
+                  <Loader2 size={16} className="mr-2 animate-spin" />
+                ) : (
+                  <Play size={16} className="mr-2" />
+                )}
+                Run
+              </Button>
             </div>
-            <DialogFooter>
-              <Button onClick={handleSettingsSave}>Save changes</Button>
-            </DialogFooter>
-          </DialogContent>
-        </Dialog>
+          </div>
+        </div>
+      </div>
+      <div className="flex-1 overflow-y-auto min-h-0 p-2">
+        <DragDropContext onDragEnd={handleDragEnd}>
+          <Droppable droppableId="operations" type="operation">
+            {(provided, snapshot) => (
+              <div
+                {...provided.droppableProps}
+                ref={provided.innerRef}
+                className={`space-y-2 ${
+                  snapshot.isDraggingOver ? "bg-gray-50" : ""
+                }`}
+              >
+                {operations.map((op, index) => (
+                  <OperationCard key={op.id} index={index} />
+                ))}
+                {provided.placeholder}
+              </div>
+            )}
+          </Droppable>
+        </DragDropContext>
       </div>
+      <Dialog open={isSettingsOpen} onOpenChange={setIsSettingsOpen}>
+        <DialogContent>
+          <DialogHeader>
+            <DialogTitle>Pipeline Settings</DialogTitle>
+          </DialogHeader>
+          <div className="grid gap-4 py-4">
+            <div className="grid grid-cols-4 items-center gap-4">
+              <Label htmlFor="name" className="text-right">
+                Name
+              </Label>
+              <Input
+                id="name"
+                value={tempPipelineName}
+                onChange={(e) => setTempPipelineName(e.target.value)}
+                className="col-span-3"
+              />
+            </div>
+            <div className="grid grid-cols-4 items-center gap-4">
+              <Label htmlFor="sampling" className="text-right">
+                Sample Size
+              </Label>
+              <Input
+                id="sampling"
+                type="number"
+                value={tempSampleSize}
+                onChange={(e) => setTempSampleSize(e.target.value)}
+                placeholder="None"
+                className="col-span-3"
+              />
+            </div>
+            <div className="grid grid-cols-4 items-center gap-4">
+              <Label htmlFor="currentFile" className="text-right">
+                Dataset JSON
+              </Label>
+              <Select
+                value={tempCurrentFile?.path || ""}
+                onValueChange={(value) =>
+                  setTempCurrentFile(
+                    files.find((file) => file.path === value) || null
+                  )
+                }
+              >
+                <SelectTrigger className="col-span-3">
+                  <SelectValue placeholder="Select a file" />
+                </SelectTrigger>
+                <SelectContent>
+                  {files.map((file) => (
+                    <SelectItem key={file.path} value={file.path}>
+                      {file.name}
+                    </SelectItem>
+                  ))}
+                </SelectContent>
+              </Select>
+            </div>
+            <div className="grid grid-cols-4 items-center gap-4">
+              <Label htmlFor="defaultModel" className="text-right">
+                Default Model
+              </Label>
+              <Input
+                id="defaultModel"
+                value={tempDefaultModel}
+                onChange={(e) => setTempDefaultModel(e.target.value)}
+                className="col-span-3"
+              />
+            </div>
+            <div className="grid grid-cols-4 items-center gap-4">
+              <Label htmlFor="optimize" className="text-right">
+                Optimizer Model
+              </Label>
+              <Select
+                value={tempOptimizerModel}
+                onValueChange={(value) => setTempOptimizerModel(value)}
+              >
+                <SelectTrigger className="col-span-3">
+                  <SelectValue placeholder="Select optimizer model" />
+                </SelectTrigger>
+                <SelectContent>
+                  <SelectItem value="gpt-4o">gpt-4o</SelectItem>
+                  <SelectItem value="gpt-4o-mini">gpt-4o-mini</SelectItem>
+                </SelectContent>
+              </Select>
+            </div>
+          </div>
+          <DialogFooter>
+            <Button onClick={handleSettingsSave}>Save changes</Button>
+          </DialogFooter>
+        </DialogContent>
+      </Dialog>
     </div>
   );
 };
diff --git a/website/src/components/operations/args.tsx b/website/src/components/operations/args.tsx
index 76f99bd8..abb8d9dc 100644
--- a/website/src/components/operations/args.tsx
+++ b/website/src/components/operations/args.tsx
@@ -17,21 +17,46 @@ import {
   TooltipProvider,
   TooltipTrigger,
 } from "../ui/tooltip";
+import { Switch } from "../ui/switch";
+import { Label } from "../ui/label";
 
-export const PromptInput: React.FC<{
+interface PromptInputProps {
   prompt: string;
   onChange: (value: string) => void;
-}> = React.memo(({ prompt, onChange }) => {
-  return (
-    <Textarea
-      placeholder="Enter prompt"
-      className="mb-1 rounded-sm text-sm font-mono"
-      rows={3}
-      value={prompt}
-      onChange={(e) => onChange(e.target.value)}
-    />
-  );
-});
+}
+
+export const PromptInput: React.FC<PromptInputProps> = React.memo(
+  ({ prompt, onChange }) => {
+    const validateJinjaTemplate = (value: string) => {
+      const hasOpenBrace = value.includes("{{");
+      const hasCloseBrace = value.includes("}}");
+      return hasOpenBrace && hasCloseBrace;
+    };
+
+    return (
+      <>
+        <Textarea
+          placeholder="Enter prompt (must be a Jinja2 template)"
+          className={`mb-1 rounded-sm text-sm font-mono ${
+            !validateJinjaTemplate(prompt) ? "border-red-500" : ""
+          }`}
+          rows={3}
+          value={prompt}
+          onChange={(e) => onChange(e.target.value)}
+        />
+        {!validateJinjaTemplate(prompt) && (
+          <div className="text-red-500 text-sm mb-1">
+            Prompt must contain Jinja2 template syntax {"{"}
+            {"{"} and {"}"}
+            {"}"}
+          </div>
+        )}
+      </>
+    );
+  }
+);
+
+PromptInput.displayName = "PromptInput";
 
 export const SchemaForm: React.FC<{
   schema: SchemaItem[];
@@ -80,8 +105,8 @@ export const SchemaForm: React.FC<{
                   value === "list"
                     ? { key: "0", type: "string" }
                     : value === "dict"
-                      ? [{ key: "", type: "string" }]
-                      : undefined,
+                    ? [{ key: "", type: "string" }]
+                    : undefined,
               });
             }}
           >
@@ -166,6 +191,101 @@ export const OutputSchema: React.FC<{
   );
 });
 
+export interface GleaningConfigProps {
+  gleaning: { num_rounds: number; validation_prompt: string } | null;
+  onUpdate: (
+    newGleaning: {
+      num_rounds: number;
+      validation_prompt: string;
+    } | null
+  ) => void;
+  isExpanded: boolean;
+  onToggle: () => void;
+}
+
+export const GleaningConfig: React.FC<GleaningConfigProps> = React.memo(
+  ({ gleaning, onUpdate, isExpanded, onToggle }) => {
+    return (
+      <div className="border-t border-primary">
+        <Button
+          variant="ghost"
+          size="sm"
+          onClick={onToggle}
+          className="w-full text-primary hover:bg-primary/10 flex justify-between items-center"
+        >
+          <div className="flex items-center gap-2">
+            <span>
+              Gleaning {gleaning?.num_rounds ? "(enabled)" : "(not enabled)"}
+            </span>
+            <TooltipProvider>
+              <Tooltip>
+                <TooltipTrigger>
+                  <Info size={16} className="text-primary" />
+                </TooltipTrigger>
+                <TooltipContent className="max-w-md whitespace-normal break-words text-left">
+                  <p>
+                    Gleaning allows you to iteratively refine outputs through
+                    multiple rounds of validation and improvement.
+                  </p>
+                </TooltipContent>
+              </Tooltip>
+            </TooltipProvider>
+          </div>
+          <ChevronDown
+            size={16}
+            className={`transition-transform duration-200 ${
+              isExpanded ? "transform rotate-180" : ""
+            }`}
+          />
+        </Button>
+
+        {isExpanded && (
+          <div className="p-2">
+            <div className="grid grid-cols-8 gap-4">
+              <div className="col-span-1 space-y-2">
+                <Label htmlFor="num_rounds">Rounds</Label>
+                <Input
+                  id="num_rounds"
+                  type="number"
+                  min="0"
+                  max="5"
+                  value={gleaning?.num_rounds || 0}
+                  onChange={(e) =>
+                    onUpdate({
+                      ...gleaning,
+                      num_rounds: parseInt(e.target.value) || 0,
+                    })
+                  }
+                  className={gleaning?.num_rounds === 0 ? "border-red-500" : ""}
+                />
+              </div>
+
+              <div className="col-span-7 space-y-2">
+                <Label htmlFor="validation_prompt">Validation Prompt</Label>
+                <Textarea
+                  id="validation_prompt"
+                  value={gleaning?.validation_prompt || ""}
+                  onChange={(e) =>
+                    onUpdate({
+                      ...gleaning,
+                      validation_prompt: e.target.value,
+                    })
+                  }
+                  className={
+                    !gleaning?.validation_prompt ? "border-red-500" : ""
+                  }
+                />
+              </div>
+            </div>
+          </div>
+        )}
+      </div>
+    );
+  }
+);
+
+GleaningConfig.displayName = "GleaningConfig";
+
 export const Guardrails: React.FC<{
   guardrails: string[];
   onUpdate: (newGuardrails: string[]) => void;
diff --git a/website/src/components/operations/components.tsx b/website/src/components/operations/components.tsx
index 032a77b0..666bc17a 100644
--- a/website/src/components/operations/components.tsx
+++ b/website/src/components/operations/components.tsx
@@ -146,7 +146,7 @@ export const ReduceOperationComponent: React.FC<OperationComponentProps> = ({
                       <X size={12} />
                     </Button>
                   </div>
-                ),
+                )
               )}
               <Button
                 onClick={() => {
@@ -341,7 +341,7 @@ export const ResolveOperationComponent: React.FC<OperationComponentProps> = ({
                   <X size={12} />
                 </Button>
               </div>
-            ),
+            )
           )}
           <Button
             onClick={() => {
@@ -427,7 +427,9 @@ export const SplitOperationComponent: React.FC<OperationComponentProps> = ({
   };
 
   const addMethodKwarg = () => {
-    const newKey = `arg${Object.keys(operation.otherKwargs?.method_kwargs || {}).length + 1}`;
+    const newKey = `arg${
+      Object.keys(operation.otherKwargs?.method_kwargs || {}).length + 1
+    }`;
     handleMethodKwargsChange(newKey, "");
   };
 
@@ -538,7 +540,7 @@ export const SplitOperationComponent: React.FC<OperationComponentProps> = ({
                   <X size={16} />
                 </Button>
               </div>
-            ),
+            )
           )}
           <Button size="sm" variant="outline" onClick={addMethodKwarg}>
             <Plus size={16} className="mr-2" /> Add Argument
@@ -651,7 +653,7 @@ export const GatherOperationComponent: React.FC<OperationComponentProps> = ({
     section: "previous" | "next",
     subsection: "head" | "middle" | "tail",
     key: string,
-    value: any,
+    value: any
   ) => {
     const updatedPeripheralChunks = {
       ...(operation.otherKwargs?.peripheral_chunks || {}),
@@ -794,7 +796,7 @@ export const GatherOperationComponent: React.FC<OperationComponentProps> = ({
                         "previous",
                         subsection as "head" | "middle" | "tail",
                         "content_key",
-                        e.target.value,
+                        e.target.value
                       )
                     }
                     placeholder="Content key"
@@ -813,7 +815,7 @@ export const GatherOperationComponent: React.FC<OperationComponentProps> = ({
                           "previous",
                           subsection as "head" | "middle" | "tail",
                           "count",
-                          Number(e.target.value),
+                          Number(e.target.value)
                         )
                       }
                       placeholder="Count"
@@ -843,7 +845,7 @@ export const GatherOperationComponent: React.FC<OperationComponentProps> = ({
                         "next",
                         subsection as "head" | "middle" | "tail",
                         "content_key",
-                        e.target.value,
+                        e.target.value
                       )
                     }
                     placeholder="Content key"
@@ -862,7 +864,7 @@ export const GatherOperationComponent: React.FC<OperationComponentProps> = ({
                           "next",
                           subsection as "head" | "middle" | "tail",
                           "count",
-                          Number(e.target.value),
+                          Number(e.target.value)
                         )
                       }
                       placeholder="Count"
@@ -899,7 +901,7 @@ export const ParallelMapOperationComponent: React.FC<
     index: number,
     action: "add" | "remove" | "update",
     value?: string,
-    keyIndex?: number,
+    keyIndex?: number
   ) => {
     const updatedPrompts = [...(operation.otherKwargs?.prompts || [])];
     const currentOutputKeys = [...(updatedPrompts[index].output_keys || [])];
@@ -988,10 +990,10 @@ export const ParallelMapOperationComponent: React.FC<
                             index,
                             "update",
                             e.target.value,
-                            keyIndex,
+                            keyIndex
                           )
                         }
-                        className="w-32"
+                        className="w-48"
                       />
                       <Button
                         variant="ghost"
@@ -1001,7 +1003,7 @@ export const ParallelMapOperationComponent: React.FC<
                             index,
                             "remove",
                             undefined,
-                            keyIndex,
+                            keyIndex
                           )
                         }
                       >
@@ -1026,12 +1028,12 @@ export const ParallelMapOperationComponent: React.FC<
                     handlePromptChange(index, "model", e.target.value)
                   }
                   placeholder="Model"
-                  className="w-32 mt-1"
+                  className="w-48 mt-1"
                 />
               </div>
             </div>
           </div>
-        ),
+        )
       )}
       <Button onClick={addPrompt} size="sm">
         Add Prompt
@@ -1159,13 +1161,13 @@ export const SampleOperationComponent: React.FC<OperationComponentProps> = ({
               type="text"
               value={
                 operation.otherKwargs?.method_kwargs?.embedding_keys?.join(
-                  ", ",
+                  ", "
                 ) || ""
               }
               onChange={(e) =>
                 handleMethodKwargsChange(
                   "embedding_keys",
-                  e.target.value.split(", "),
+                  e.target.value.split(", ")
                 )
               }
               placeholder="Comma-separated list of keys"
@@ -1214,7 +1216,7 @@ export default function createOperationComponent(
   operation: Operation,
   onUpdate: (updatedOperation: Operation) => void,
   isSchemaExpanded: boolean,
-  onToggleSchema: () => void,
+  onToggleSchema: () => void
 ) {
   switch (operation.type) {
     case "reduce":
diff --git a/website/src/components/ui/progress.tsx b/website/src/components/ui/progress.tsx
new file mode 100644
index 00000000..4fc3b473
--- /dev/null
+++ b/website/src/components/ui/progress.tsx
@@ -0,0 +1,28 @@
+"use client"
+
+import * as React from "react"
+import * as ProgressPrimitive from "@radix-ui/react-progress"
+
+import { cn } from "@/lib/utils"
+
+const Progress = React.forwardRef<
+  React.ElementRef<typeof ProgressPrimitive.Root>,
+  React.ComponentPropsWithoutRef<typeof ProgressPrimitive.Root>
+>(({ className, value, ...props }, ref) => (
+  <ProgressPrimitive.Root
+    ref={ref}
+    className={cn(
+      "relative h-2 w-full overflow-hidden rounded-full bg-primary/20",
+      className
+    )}
+    {...props}
+  >
+    <ProgressPrimitive.Indicator
+      className="h-full w-full flex-1 bg-primary transition-all"
+      style={{ transform: `translateX(-${100 - (value || 0)}%)` }}
+    />
+  </ProgressPrimitive.Root>
+))
+Progress.displayName = ProgressPrimitive.Root.displayName
+
+export { Progress }
diff --git a/website/src/contexts/PipelineContext.tsx b/website/src/contexts/PipelineContext.tsx
index 69c7e7a6..06104c9d 100644
--- a/website/src/contexts/PipelineContext.tsx
+++ b/website/src/contexts/PipelineContext.tsx
@@ -21,6 +21,13 @@ interface PipelineState {
   currentFile: File | null;
   output: OutputType | null;
   terminalOutput: string;
+  optimizerProgress: {
+    status: string;
+    progress: number;
+    shouldOptimize: boolean;
+    rationale: string;
+    validatorPrompt: string;
+  } | null;
   isLoadingOutputs: boolean;
   numOpRun: number;
   pipelineName: string;
@@ -28,6 +35,7 @@ interface PipelineState {
   files: File[];
   cost: number;
   defaultModel: string;
+  optimizerModel: string;
 }
 
 interface PipelineContextType extends PipelineState {
@@ -35,6 +43,15 @@ interface PipelineContextType extends PipelineState {
   setCurrentFile: React.Dispatch<React.SetStateAction<File | null>>;
   setOutput: React.Dispatch<React.SetStateAction<OutputType | null>>;
   setTerminalOutput: React.Dispatch<React.SetStateAction<string>>;
+  setOptimizerProgress: React.Dispatch<
+    React.SetStateAction<{
+      status: string;
+      progress: number;
+      shouldOptimize: boolean;
+      rationale: string;
+      validatorPrompt: string;
+    } | null>
+  >;
   setIsLoadingOutputs: React.Dispatch<React.SetStateAction<boolean>>;
   setNumOpRun: React.Dispatch<React.SetStateAction<number>>;
   setPipelineName: React.Dispatch<React.SetStateAction<string>>;
@@ -42,6 +59,7 @@ interface PipelineContextType extends PipelineState {
   setFiles: React.Dispatch<React.SetStateAction<File[]>>;
   setCost: React.Dispatch<React.SetStateAction<number>>;
   setDefaultModel: React.Dispatch<React.SetStateAction<string>>;
+  setOptimizerModel: React.Dispatch<React.SetStateAction<string>>;
   saveProgress: () => void;
   unsavedChanges: boolean;
   clearPipelineState: () => void;
@@ -193,6 +211,7 @@ export const PipelineProvider: React.FC<{ children: React.ReactNode }> = ({
       localStorageKeys.TERMINAL_OUTPUT_KEY,
       ""
     ),
+    optimizerProgress: null,
     isLoadingOutputs: loadFromLocalStorage(
       localStorageKeys.IS_LOADING_OUTPUTS_KEY,
       false
@@ -212,6 +231,10 @@ export const PipelineProvider: React.FC<{ children: React.ReactNode }> = ({
       localStorageKeys.DEFAULT_MODEL_KEY,
       "gpt-4o-mini"
     ),
+    optimizerModel: loadFromLocalStorage(
+      localStorageKeys.OPTIMIZER_MODEL_KEY,
+      "gpt-4o-mini"
+    ),
   }));
 
   const [unsavedChanges, setUnsavedChanges] = useState(false);
@@ -266,6 +289,10 @@ export const PipelineProvider: React.FC<{ children: React.ReactNode }> = ({
       localStorageKeys.DEFAULT_MODEL_KEY,
       JSON.stringify(stateRef.current.defaultModel)
     );
+    localStorage.setItem(
+      localStorageKeys.OPTIMIZER_MODEL_KEY,
+      JSON.stringify(stateRef.current.optimizerModel)
+    );
     setUnsavedChanges(false);
     console.log("Progress saved!");
   }, []);
@@ -310,6 +337,8 @@ export const PipelineProvider: React.FC<{ children: React.ReactNode }> = ({
       files: mockFiles,
       cost: 0,
       defaultModel: "gpt-4o-mini",
+      optimizerModel: "gpt-4o-mini",
+      optimizerProgress: null,
     });
     setUnsavedChanges(false);
     console.log("Pipeline state cleared!");
@@ -376,6 +405,14 @@ export const PipelineProvider: React.FC<{ children: React.ReactNode }> = ({
       (value) => setStateAndUpdate("defaultModel", value),
       [setStateAndUpdate]
     ),
+    setOptimizerModel: useCallback(
+      (value) => setStateAndUpdate("optimizerModel", value),
+      [setStateAndUpdate]
+    ),
+    setOptimizerProgress: useCallback(
+      (value) => setStateAndUpdate("optimizerProgress", value),
+      [setStateAndUpdate]
+    ),
     saveProgress,
     unsavedChanges,
     clearPipelineState,

From 58128e2e5b3f7158bb64ff8a947d2900c1f6effb Mon Sep 17 00:00:00 2001
From: Shreya Shankar <ss.shankar505@gmail.com>
Date: Sat, 2 Nov 2024 13:03:48 -0700
Subject: [PATCH 2/4] feat: add optimizer in the UI

---
 .../map_optimizer/plan_generators.py          |   2 -
 website/src/components/AnsiRenderer.tsx       |  19 ++-
 website/src/components/OperationCard.tsx      |   1 +
 website/src/components/Output.tsx             | 124 ++++++++++++------
 4 files changed, 100 insertions(+), 46 deletions(-)

diff --git a/docetl/optimizers/map_optimizer/plan_generators.py b/docetl/optimizers/map_optimizer/plan_generators.py
index a3c1f9bd..34757402 100644
--- a/docetl/optimizers/map_optimizer/plan_generators.py
+++ b/docetl/optimizers/map_optimizer/plan_generators.py
@@ -774,8 +774,6 @@ def _generate_chain_plans(
         Note:
             - This method is most effective when the original task has multiple output keys
               with dependencies between them.
-            - If the output schema has only one key, an empty dictionary is returned as
-              chain decomposition is not necessary.
             - The method uses the LLM to generate the chain of subtasks, ensuring that
               all output keys from the original task are covered.
         """
diff --git a/website/src/components/AnsiRenderer.tsx b/website/src/components/AnsiRenderer.tsx
index 937d240f..4e261333 100644
--- a/website/src/components/AnsiRenderer.tsx
+++ b/website/src/components/AnsiRenderer.tsx
@@ -1,6 +1,7 @@
 import React, { useEffect, useRef, useState } from "react";
 import Convert from "ansi-to-html";
 import { useWebSocket } from "@/contexts/WebSocketContext";
+import { useToast } from "@/hooks/use-toast";
 
 const convert = new Convert({
   fg: "#000",
@@ -25,6 +26,7 @@ const AnsiRenderer: React.FC<AnsiRendererProps> = ({
   const scrollRef = useRef<HTMLDivElement>(null);
   const [userInput, setUserInput] = useState("");
   const { sendMessage } = useWebSocket();
+  const { toast } = useToast();
 
   useEffect(() => {
     if (scrollRef.current) {
@@ -33,15 +35,22 @@ const AnsiRenderer: React.FC<AnsiRendererProps> = ({
   }, [text]);
 
   const handleSendMessage = () => {
-    sendMessage(userInput.trim());
-    setUserInput("");
+    const trimmedInput = userInput.trim();
+    if (trimmedInput) {
+      sendMessage(trimmedInput);
+      toast({
+        title: "Terminal input received",
+        description: `You sent: ${trimmedInput}`,
+      });
+      setUserInput("");
+    }
   };
 
   const isWebSocketClosed = readyState === WebSocket.CLOSED;
 
   return (
     <div
-      className={`flex flex-col w-full h-[620px] bg-black text-white font-mono rounded-lg overflow-hidden ${
+      className={`flex flex-col w-full h-[420px] bg-black text-white font-mono rounded-lg overflow-hidden ${
         isWebSocketClosed ? "opacity-50" : ""
       }`}
     >
@@ -49,8 +58,8 @@ const AnsiRenderer: React.FC<AnsiRendererProps> = ({
         ref={scrollRef}
         className="flex-grow overflow-auto p-4"
         style={{
-          height: "600px", // Reduced height to accommodate input field
-          maxHeight: "600px",
+          height: "400px", // Reduced height to accommodate input field
+          maxHeight: "400px",
         }}
       >
         <pre
diff --git a/website/src/components/OperationCard.tsx b/website/src/components/OperationCard.tsx
index 39a7b14f..d4095d74 100644
--- a/website/src/components/OperationCard.tsx
+++ b/website/src/components/OperationCard.tsx
@@ -126,6 +126,7 @@ const OperationHeader: React.FC<{
               size="sm"
               className="p-0.25 h-6 w-6"
               onClick={onOptimize}
+              disabled={disabled}
             >
               <Zap size={14} className="text-yellow-500" />
             </Button>
diff --git a/website/src/components/Output.tsx b/website/src/components/Output.tsx
index 91b55b3c..1c3e88c6 100644
--- a/website/src/components/Output.tsx
+++ b/website/src/components/Output.tsx
@@ -2,7 +2,7 @@ import React, { useState, useEffect, useMemo } from "react";
 import { ColumnType } from "@/components/ResizableDataTable";
 import ResizableDataTable from "@/components/ResizableDataTable";
 import { usePipelineContext } from "@/contexts/PipelineContext";
-import { Loader2, Download } from "lucide-react";
+import { Loader2, Download, ChevronDown } from "lucide-react";
 import { Button } from "@/components/ui/button";
 import { Progress } from "@/components/ui/progress";
 import BookmarkableText from "@/components/BookmarkableText";
@@ -24,51 +24,97 @@ export const ConsoleContent: React.FC = () => {
   const { readyState } = useWebSocket();
 
   return (
-    <div className="flex flex-col h-full w-full bg-black text-white font-mono rounded-lg overflow-hidden">
+    <div className="flex flex-col h-full w-full">
       {optimizerProgress && (
-        <div className="p-4 border-b border-gray-800 bg-gray-900">
-          <div className="flex items-center justify-between mb-2">
-            <div className="text-sm font-medium text-blue-400">
-              {optimizerProgress.status}
-            </div>
-            <div className="text-xs text-gray-400">
-              {Math.round(optimizerProgress.progress * 100)}%
-            </div>
-          </div>
-          <Progress
-            value={optimizerProgress.progress * 100}
-            className="w-full h-2 bg-gray-700"
+        <div className="mb-4 p-[6px] rounded-lg relative">
+          {/* Animated gradient border */}
+          <div
+            className="absolute inset-0 rounded-lg opacity-80"
+            style={{
+              background:
+                "linear-gradient(45deg, #60a5fa, #c084fc, #818cf8, #60a5fa, #60a5fa, #c084fc, #818cf8)",
+              backgroundSize: "300% 300%",
+              animation: "gradient 8s linear infinite",
+            }}
           />
-          {optimizerProgress.shouldOptimize && (
-            <div className="mt-4 space-y-4">
-              <div>
-                <div className="text-xs uppercase tracking-wider text-gray-400 mb-1">
-                  Optimizing because
-                </div>
-                <div className="text-sm text-gray-200 leading-relaxed">
-                  {optimizerProgress.rationale}
-                </div>
+
+          {/* Inner content container */}
+          <div className="relative rounded-lg p-4 bg-white">
+            <div className="flex items-center justify-between mb-2">
+              <div className="text-sm font-medium bg-gradient-to-r from-blue-500 to-purple-500 bg-clip-text text-transparent">
+                {optimizerProgress.status}
+              </div>
+              <div className="text-xs text-blue-600">
+                {Math.round(optimizerProgress.progress * 100)}%
               </div>
+            </div>
+            <div className="relative w-full h-2 bg-gray-100 rounded-full overflow-hidden">
+              <div
+                className="absolute top-0 left-0 h-full"
+                style={{
+                  width: `${optimizerProgress.progress * 100}%`,
+                  background:
+                    "linear-gradient(45deg, #60a5fa, #c084fc, #818cf8, #60a5fa, #60a5fa, #c084fc, #818cf8)",
+                  backgroundSize: "300% 300%",
+                  animation: "gradient 8s linear infinite",
+                }}
+              />
+            </div>
 
-              {optimizerProgress.validatorPrompt && (
-                <div>
-                  <div className="text-xs uppercase tracking-wider text-gray-400 mb-1">
-                    Using this prompt to find the best plan
-                  </div>
-                  <div className="text-sm text-gray-200 leading-relaxed whitespace-pre-wrap border-l-4 border-gray-600 pl-4 my-2 italic">
-                    {optimizerProgress.validatorPrompt}
+            {optimizerProgress.shouldOptimize && (
+              <div className="mt-4 space-y-4">
+                <details className="group">
+                  <summary className="cursor-pointer list-none">
+                    <div className="flex items-center">
+                      <div className="text-xs font-medium uppercase tracking-wider bg-gradient-to-r from-blue-500 to-purple-500 bg-clip-text text-transparent">
+                        Optimizing because
+                      </div>
+                      <ChevronDown className="w-4 h-4 ml-2 text-gray-500 transition-transform group-open:rotate-180" />
+                    </div>
+                  </summary>
+                  <div className="mt-1 text-sm text-gray-600">
+                    {optimizerProgress.rationale}
                   </div>
-                </div>
-              )}
-            </div>
-          )}
+                </details>
+
+                {optimizerProgress.validatorPrompt && (
+                  <details className="group">
+                    <summary className="cursor-pointer list-none">
+                      <div className="flex items-center">
+                        <div className="text-xs font-medium uppercase tracking-wider bg-gradient-to-r from-blue-500 to-purple-500 bg-clip-text text-transparent">
+                          Using this prompt to evaluate the best plan
+                        </div>
+                        <ChevronDown className="w-4 h-4 ml-2 text-gray-500 transition-transform group-open:rotate-180" />
+                      </div>
+                    </summary>
+                    <div className="mt-1 text-sm text-gray-600 whitespace-pre-wrap border-l-4 border-purple-300 pl-3 italic">
+                      {optimizerProgress.validatorPrompt}
+                    </div>
+                  </details>
+                )}
+              </div>
+            )}
+          </div>
         </div>
       )}
-      <AnsiRenderer
-        text={terminalOutput || ""}
-        readyState={readyState}
-        setTerminalOutput={setTerminalOutput}
-      />
+
+      <div className="flex-1">
+        <AnsiRenderer
+          text={terminalOutput || ""}
+          readyState={readyState}
+          setTerminalOutput={setTerminalOutput}
+        />
+      </div>
+
+      <style>
+        {`
+          @keyframes gradient {
+            0% { background-position: 0% 50%; }
+            50% { background-position: 100% 50%; }
+            100% { background-position: 0% 50%; }
+          }
+        `}
+      </style>
     </div>
   );
 };

From 6ccef71232d72c1b4a6113e02911dd3c84ec7834 Mon Sep 17 00:00:00 2001
From: Shreya Shankar <ss.shankar505@gmail.com>
Date: Sun, 3 Nov 2024 13:53:23 -0800
Subject: [PATCH 3/4] get UI to work for reduce optimizer

---
 docetl/optimizers/reduce_optimizer.py | 56 ++++++++++++++++-----------
 1 file changed, 34 insertions(+), 22 deletions(-)

diff --git a/docetl/optimizers/reduce_optimizer.py b/docetl/optimizers/reduce_optimizer.py
index dcbc39fc..1d7c96c5 100644
--- a/docetl/optimizers/reduce_optimizer.py
+++ b/docetl/optimizers/reduce_optimizer.py
@@ -103,13 +103,16 @@ def optimize(
         )
 
         # Find the key with the longest value
-        longest_key = max(
-            op_config["reduce_key"], key=lambda k: len(str(input_data[0][k]))
-        )
-        sample_key = tuple(
-            input_data[0][k] if k == longest_key else input_data[0][k]
-            for k in op_config["reduce_key"]
-        )
+        if op_config["reduce_key"] == ["_all"]:
+            sample_key = tuple(["_all"])
+        else:
+            longest_key = max(
+                op_config["reduce_key"], key=lambda k: len(str(input_data[0][k]))
+            )
+            sample_key = tuple(
+                input_data[0][k] if k == longest_key else input_data[0][k]
+                for k in op_config["reduce_key"]
+            )
 
         # Render the prompt with a sample input
         prompt_template = Template(op_config["prompt"])
@@ -200,7 +203,7 @@ def optimize(
 
             return self._optimize_single_reduce(op_config, input_data, validator_prompt)
         else:
-            self.console.log("No improvements identified.")
+            self.console.log(f"No improvements identified; {validation_results}.")
             return [op_config], original_output, 0.0
 
     def _should_use_map(
@@ -1082,7 +1085,9 @@ def _validate_reduce_output(
         with ThreadPoolExecutor(max_workers=self.max_threads) as executor:
             futures = []
             for reduce_key, inputs in validation_inputs.items():
-                if isinstance(op_config["reduce_key"], list):
+                if op_config["reduce_key"] == ["_all"] or op_config["reduce_key"] == "_all":
+                    sample_output = output_data[0]
+                elif isinstance(op_config["reduce_key"], list):
                     sample_output = next(
                         (
                             item
@@ -1133,11 +1138,11 @@ def _validate_reduce_output(
                 parameters = {
                     "type": "object",
                     "properties": {
-                        "is_valid": {"type": "boolean"},
+                        "is_correct": {"type": "boolean"},
                         "issues": {"type": "array", "items": {"type": "string"}},
                         "suggestions": {"type": "array", "items": {"type": "string"}},
                     },
-                    "required": ["is_valid", "issues", "suggestions"],
+                    "required": ["is_correct", "issues", "suggestions"],
                 }
 
                 futures.append(
@@ -1156,9 +1161,11 @@ def _validate_reduce_output(
 
         # Determine if optimization is needed based on validation results
         invalid_count = sum(
-            1 for result in validation_results if not result["is_valid"]
+            1 for result in validation_results if not result["is_correct"]
+        )
+        needs_improvement = invalid_count > 1 or (
+            invalid_count == 1 and len(validation_results) == 1
         )
-        needs_improvement = invalid_count > 1
 
         return {
             "needs_improvement": needs_improvement,
@@ -1170,14 +1177,19 @@ def _create_validation_inputs(
     ) -> Dict[Any, List[Dict[str, Any]]]:
         # Group input data by reduce_key
         grouped_data = {}
-        for item in input_data:
-            if isinstance(reduce_key, list):
-                key = tuple(item[k] for k in reduce_key)
-            else:
-                key = item[reduce_key]
-            if key not in grouped_data:
-                grouped_data[key] = []
-            grouped_data[key].append(item)
+        if reduce_key == ["_all"]:
+            # Put all data in one group under a single key
+            grouped_data[("_all",)] = input_data
+        else:
+            # Group by reduce key(s) as before
+            for item in input_data:
+                if isinstance(reduce_key, list):
+                    key = tuple(item[k] for k in reduce_key)
+                else:
+                    key = item[reduce_key]
+                if key not in grouped_data:
+                    grouped_data[key] = []
+                grouped_data[key].append(item)
 
         # Select a fixed number of reduce keys
         selected_keys = random.sample(
@@ -1738,7 +1750,7 @@ def _evaluate_single_plan(
         valid_count = sum(
             1
             for result in validation_result["validation_results"]
-            if result["is_valid"]
+            if result["is_correct"]
         )
         score = valid_count / len(validation_result["validation_results"])
 

From 7ff7d64fb905881f3b213e1c00ded338f95706d4 Mon Sep 17 00:00:00 2001
From: Shreya Shankar <ss.shankar505@gmail.com>
Date: Sun, 3 Nov 2024 14:02:41 -0800
Subject: [PATCH 4/4] get UI to work for reduce optimizer

---
 docetl/optimizers/join_optimizer.py    |  2 +-
 server/app/routes/pipeline.py          | 20 +++++---------------
 website/src/components/PipelineGui.tsx |  4 ++++
 3 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/docetl/optimizers/join_optimizer.py b/docetl/optimizers/join_optimizer.py
index 10c76772..a1599f25 100644
--- a/docetl/optimizers/join_optimizer.py
+++ b/docetl/optimizers/join_optimizer.py
@@ -11,7 +11,7 @@
 
 from docetl.operations.equijoin import EquijoinOperation
 from docetl.operations.resolve import ResolveOperation
-from docetl.utils import completion_cost, extract_jinja_variables
+from docetl.utils import completion_cost, extract_jinja_variables, StageType
 
 
 class JoinOptimizer:
diff --git a/server/app/routes/pipeline.py b/server/app/routes/pipeline.py
index 38ec4e88..fe2a3c9a 100644
--- a/server/app/routes/pipeline.py
+++ b/server/app/routes/pipeline.py
@@ -71,24 +71,12 @@ async def run_pipeline():
                 )
 
                 if user_message == "kill":
-                    runner.console.print("Killing process...")
+                    runner.console.print("Stopping process...")
                     await websocket.send_json({
                         "type": "error",
-                        "message": "Killing process. Service will restart automatically."
+                        "message": "Process stopped by user request"
                     })
-                    # Close websocket cleanly
-                    await websocket.close()
-                    
-                    # Get current process ID
-                    pid = os.getpid()
-                    
-                    # Schedule the process to kill itself
-                    async def delayed_kill():
-                        await asyncio.sleep(0.5)  # Give time for websocket to close
-                        os.kill(pid, signal.SIGTERM)
-                    
-                    asyncio.create_task(delayed_kill())
-                    return
+                    raise Exception("Process stopped by user request")
 
                 # Process the user message and send it to the runner
                 runner.console.post_input(user_message)
@@ -148,3 +136,5 @@ async def delayed_kill():
         error_traceback = traceback.format_exc()
         print(f"Error occurred:\n{error_traceback}")
         await websocket.send_json({"type": "error", "data": str(e) + "\n" + error_traceback})
+    finally:
+        await websocket.close()
diff --git a/website/src/components/PipelineGui.tsx b/website/src/components/PipelineGui.tsx
index b8332cf9..def89e4c 100644
--- a/website/src/components/PipelineGui.tsx
+++ b/website/src/components/PipelineGui.tsx
@@ -511,6 +511,10 @@ const PipelineGUI: React.FC = () => {
   const handleStop = () => {
     sendMessage("kill");
     setRunningButtonType(null);
+
+    if (readyState === WebSocket.CLOSED && isLoadingOutputs) {
+      setIsLoadingOutputs(false);
+    }
   };
 
   return (