diff --git a/example-apps/chatbot-rag-app/.flaskenv b/example-apps/chatbot-rag-app/.flaskenv
deleted file mode 100644
index 88612f8d..00000000
--- a/example-apps/chatbot-rag-app/.flaskenv
+++ /dev/null
@@ -1,4 +0,0 @@
-FLASK_APP=api/app.py
-FLASK_RUN_PORT=4000
-# Production mode ensures we don't run into problems.
-FLASK_ENV=production
diff --git a/example-apps/chatbot-rag-app/Dockerfile b/example-apps/chatbot-rag-app/Dockerfile
index 317ebd69..bca2b036 100644
--- a/example-apps/chatbot-rag-app/Dockerfile
+++ b/example-apps/chatbot-rag-app/Dockerfile
@@ -1,4 +1,4 @@
-FROM node:20-alpine AS build-step
+FROM node:22-alpine AS build-step
 WORKDIR /app
 ENV PATH=/node_modules/.bin:$PATH
 COPY frontend ./frontend
@@ -28,7 +28,4 @@ COPY api ./api
 COPY data ./data
 
 EXPOSE 4000
-# The only thing different from running local is that in docker we need to
-# listen on all IPs, not just localhost.
-ENV FLASK_RUN_HOST=0.0.0.0
-CMD [ "flask", "run"]
+CMD [ "python", "api/app.py"]
diff --git a/example-apps/chatbot-rag-app/README.md b/example-apps/chatbot-rag-app/README.md
index 0419fae4..81e53614 100644
--- a/example-apps/chatbot-rag-app/README.md
+++ b/example-apps/chatbot-rag-app/README.md
@@ -26,11 +26,27 @@ use-cases. Visit the [Install Elasticsearch](https://www.elastic.co/search-labs/
 
 Once you decided your approach, edit your `.env` file accordingly.
 
-### Elasticsearch index and chat_history index
+### Running your own Elastic Stack with Docker
 
-By default, the app will use the `workplace-app-docs` index and the chat
-history index will be `workplace-app-docs-chat-history`. If you want to change
-these, edit `ES_INDEX` and `ES_INDEX_CHAT_HISTORY` entries in your `.env` file.
+If you'd like to start Elastic locally, you can use the provided
+[docker-compose-elastic.yml](docker-compose-elastic.yml) file. This starts
+Elasticsearch, Kibana, and APM Server and only requires Docker installed.
+
+Use docker compose to run Elastic stack in the background:
+
+```bash
+docker compose -f docker-compose-elastic.yml up --force-recreate -d
+```
+
+Then, you can view Kibana at http://localhost:5601/app/home#/
+
+If asked for a username and password, use username: elastic and password: elastic.
+
+Clean up when finished, like this:
+
+```bash
+docker compose -f docker-compose-elastic.yml down
+```
 
 ## Connecting to LLM
 
@@ -67,6 +83,12 @@ docker compose up --build --force-recreate
 *Note*: First time creating the index can fail on timeout. Wait a few minutes
 and retry.
 
+Clean up when finished, like this:
+
+```bash
+docker compose down
+```
+
 ### Run locally
 
 If you want to run this example with Python and Node.js, you need to do a few
@@ -95,9 +117,8 @@ correct packages installed:
 ```bash
 python3 -m venv .venv
 source .venv/bin/activate
-# install dev requirements for pip-compile and dotenv
-pip install pip-tools "python-dotenv[cli]"
-pip-compile
+# Install dotenv which is a portable way to load environment variables.
+pip install "python-dotenv[cli]"
 pip install -r requirements.txt
 ```
 
@@ -105,13 +126,7 @@ pip install -r requirements.txt
 
 First, ingest the data into elasticsearch:
 ```bash
-$ dotenv run -- flask create-index
-".elser_model_2" model not available, downloading it now
-Model downloaded, starting deployment
-Loading data from ./data/data.json
-Loaded 15 documents
-Split 15 documents into 26 chunks
-Creating Elasticsearch sparse vector store in http://localhost:9200
+FLASK_APP=api/app.py dotenv run -- flask create-index
 ```
 
 *Note*: First time creating the index can fail on timeout. Wait a few minutes
@@ -121,12 +136,33 @@ and retry.
 
 Now, run the app, which listens on http://localhost:4000
 ```bash
-$ dotenv run -- flask run
- * Serving Flask app 'api/app.py'
- * Debug mode: off
+dotenv run -- python api/app.py
 ```
 
-## Customizing the app
+## Advanced
+
+### Updating package versions
+
+To update package versions, recreate [requirements.txt](requirements.txt) and
+reinstall like this. Once checked in, any commands above will use updates.
+
+```bash
+rm -rf .venv
+python3 -m venv .venv
+source .venv/bin/activate
+# Install dev requirements for pip-compile
+pip install pip-tools
+# Recreate requirements.txt
+pip-compile
+# Install main dependencies
+pip install -r requirements.txt
+```
+
+### Elasticsearch index and chat_history index
+
+By default, the app will use the `workplace-app-docs` index and the chat
+history index will be `workplace-app-docs-chat-history`. If you want to change
+these, edit `ES_INDEX` and `ES_INDEX_CHAT_HISTORY` entries in your `.env` file.
 
 ### Indexing your own data
 
diff --git a/example-apps/chatbot-rag-app/api/app.py b/example-apps/chatbot-rag-app/api/app.py
index 826b6349..a94f4a24 100644
--- a/example-apps/chatbot-rag-app/api/app.py
+++ b/example-apps/chatbot-rag-app/api/app.py
@@ -37,6 +37,5 @@ def create_index():
     index_data.main()
 
 
-# Unless we run through flask, we can miss critical settings or telemetry signals.
 if __name__ == "__main__":
-    raise RuntimeError("Run via the parent directory: 'flask run'")
+    app.run(host="0.0.0.0", port=4000, debug=False)
diff --git a/example-apps/chatbot-rag-app/api/llm_integrations.py b/example-apps/chatbot-rag-app/api/llm_integrations.py
index ac34f1be..7da4bd60 100644
--- a/example-apps/chatbot-rag-app/api/llm_integrations.py
+++ b/example-apps/chatbot-rag-app/api/llm_integrations.py
@@ -11,60 +11,47 @@
 
 
 def init_openai_chat(temperature):
+    # Include streaming usage as this allows recording of LLM metrics
     return ChatOpenAI(
-        model=os.getenv("CHAT_MODEL"), streaming=True, temperature=temperature
+        model=os.getenv("CHAT_MODEL"),
+        streaming=True,
+        temperature=temperature,
+        model_kwargs={"stream_options": {"include_usage": True}},
     )
 
 
 def init_vertex_chat(temperature):
-    VERTEX_PROJECT_ID = os.getenv("VERTEX_PROJECT_ID")
-    VERTEX_REGION = os.getenv("VERTEX_REGION", "us-central1")
-    vertexai.init(project=VERTEX_PROJECT_ID, location=VERTEX_REGION)
-    return ChatVertexAI(streaming=True, temperature=temperature)
+    return ChatVertexAI(
+        model_name=os.getenv("CHAT_MODEL"), streaming=True, temperature=temperature
+    )
 
 
 def init_azure_chat(temperature):
+    # Include streaming usage as this allows recording of LLM metrics
     return AzureChatOpenAI(
-        model=os.getenv("CHAT_DEPLOYMENT"), streaming=True, temperature=temperature
+        model=os.getenv("CHAT_DEPLOYMENT"),
+        streaming=True,
+        temperature=temperature,
+        model_kwargs={"stream_options": {"include_usage": True}},
     )
 
 
 def init_bedrock(temperature):
-    AWS_ACCESS_KEY = os.getenv("AWS_ACCESS_KEY")
-    AWS_SECRET_KEY = os.getenv("AWS_SECRET_KEY")
-    AWS_REGION = os.getenv("AWS_REGION")
-    AWS_MODEL_ID = os.getenv("AWS_MODEL_ID", "anthropic.claude-v2")
     return ChatBedrock(
-        region_name=AWS_REGION,
-        aws_access_key_id=AWS_ACCESS_KEY,
-        aws_secret_access_key=AWS_SECRET_KEY,
-        model_id=AWS_MODEL_ID,
+        model_id=os.getenv("CHAT_MODEL"),
         streaming=True,
         model_kwargs={"temperature": temperature},
     )
 
 
 def init_mistral_chat(temperature):
-    MISTRAL_API_ENDPOINT = os.getenv("MISTRAL_API_ENDPOINT")
-    MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
-    MISTRAL_MODEL = os.getenv("MISTRAL_MODEL", "Mistral-large")
-    kwargs = {
-        "mistral_api_key": MISTRAL_API_KEY,
-        "temperature": temperature,
-    }
-    if MISTRAL_API_ENDPOINT:
-        kwargs["endpoint"] = MISTRAL_API_ENDPOINT
-    if MISTRAL_MODEL:
-        kwargs["model"] = MISTRAL_MODEL
-    return ChatMistralAI(**kwargs)
+    return ChatMistralAI(
+        model=os.getenv("CHAT_MODEL"), streaming=True, temperature=temperature
+    )
 
 
 def init_cohere_chat(temperature):
-    COHERE_API_KEY = os.getenv("COHERE_API_KEY")
-    COHERE_MODEL = os.getenv("COHERE_MODEL")
-    return ChatCohere(
-        cohere_api_key=COHERE_API_KEY, model=COHERE_MODEL, temperature=temperature
-    )
+    return ChatCohere(model=os.getenv("CHAT_MODEL"), temperature=temperature)
 
 
 MAP_LLM_TYPE_TO_CHAT_MODEL = {
diff --git a/example-apps/chatbot-rag-app/docker-compose-elastic.yml b/example-apps/chatbot-rag-app/docker-compose-elastic.yml
new file mode 100644
index 00000000..6d2b0b8b
--- /dev/null
+++ b/example-apps/chatbot-rag-app/docker-compose-elastic.yml
@@ -0,0 +1,91 @@
+name: elastic-stack
+
+services:
+  elasticsearch:
+    image: docker.elastic.co/elasticsearch/elasticsearch:8.17.0
+    container_name: elasticsearch
+    ports:
+      - 9200:9200
+    environment:
+      - node.name=elasticsearch
+      - cluster.name=docker-cluster
+      - discovery.type=single-node
+      - ELASTIC_PASSWORD=elastic
+      - bootstrap.memory_lock=true
+      - xpack.security.enabled=true
+      - xpack.security.http.ssl.enabled=false
+      - xpack.security.transport.ssl.enabled=false
+      - xpack.license.self_generated.type=trial
+      - ES_JAVA_OPTS=-Xmx8g
+    ulimits:
+      memlock:
+        soft: -1
+        hard: -1
+    healthcheck:
+      test: ["CMD-SHELL", "curl -s http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=500ms"]
+      retries: 300
+      interval: 1s
+
+  elasticsearch_settings:
+    depends_on:
+      elasticsearch:
+        condition: service_healthy
+    image: docker.elastic.co/elasticsearch/elasticsearch:8.17.0
+    container_name: elasticsearch_settings
+    restart: 'no'
+    command: >
+      bash -c '        
+        # gen-ai assistants in kibana save state in a way that requires security to be enabled, so we need to create
+        # a kibana system user before starting it.
+        echo "Setup the kibana_system password";
+        until curl -s -u "elastic:elastic" -X POST http://elasticsearch:9200/_security/user/kibana_system/_password -d "{\"password\":\"elastic\"}" -H "Content-Type: application/json" | grep -q "^{}"; do sleep 5; done;
+      '
+
+  kibana:
+    image: docker.elastic.co/kibana/kibana:8.17.0
+    container_name: kibana
+    depends_on:
+      elasticsearch_settings:
+        condition: service_completed_successfully
+    ports:
+      - 5601:5601
+    environment:
+      - SERVERNAME=kibana
+      - ELASTICSEARCH_HOSTS=http://elasticsearch:9200
+      - ELASTICSEARCH_USERNAME=kibana_system
+      - ELASTICSEARCH_PASSWORD=elastic
+      # Non-default settings from here:
+      # https://github.com/elastic/apm-server/blob/main/testing/docker/kibana/kibana.yml
+      - MONITORING_UI_CONTAINER_ELASTICSEARCH_ENABLED=true
+      - XPACK_SECURITY_ENCRYPTIONKEY=fhjskloppd678ehkdfdlliverpoolfcr
+      - XPACK_ENCRYPTEDSAVEDOBJECTS_ENCRYPTIONKEY=fhjskloppd678ehkdfdlliverpoolfcr
+      - SERVER_PUBLICBASEURL=http://127.0.0.1:5601
+    healthcheck:
+      test: ["CMD-SHELL", "curl -s http://localhost:5601/api/status | grep -q 'All services are available'"]
+      retries: 300
+      interval: 1s
+
+  apm-server:
+    image: docker.elastic.co/apm/apm-server:8.17.0
+    container_name: apm-server
+    depends_on:
+      elasticsearch:
+        condition: service_healthy
+    command: >
+      apm-server
+        -E apm-server.kibana.enabled=true
+        -E apm-server.kibana.host=http://kibana:5601
+        -E apm-server.kibana.username=elastic
+        -E apm-server.kibana.password=elastic
+        -E output.elasticsearch.hosts=["http://elasticsearch:9200"]
+        -E output.elasticsearch.username=elastic
+        -E output.elasticsearch.password=elastic
+    cap_add: ["CHOWN", "DAC_OVERRIDE", "SETGID", "SETUID"]
+    cap_drop: ["ALL"]
+    ports:
+      - 8200:8200
+    healthcheck:
+      test: ["CMD-SHELL", "bash -c 'echo -n > /dev/tcp/127.0.0.1/8200'"]
+      retries: 300
+      interval: 1s
+
diff --git a/example-apps/chatbot-rag-app/docker-compose.yml b/example-apps/chatbot-rag-app/docker-compose.yml
index 634fa831..f6f2de0b 100644
--- a/example-apps/chatbot-rag-app/docker-compose.yml
+++ b/example-apps/chatbot-rag-app/docker-compose.yml
@@ -1,13 +1,20 @@
+name: chatbot-rag-app
+
 services:
   ingest-data:
     build:
       context: .
     container_name: ingest-data
     restart: 'no'
+    environment:
+      # host.docker.internal means connect to the host machine, e.g. your laptop
+      ELASTICSEARCH_URL: "http://host.docker.internal:9200"
+      FLASK_APP: api/app.py
     env_file:
       - .env
-      - .flaskenv
     command: flask create-index
+    extra_hosts:
+        - "host.docker.internal:host-gateway"
 
   api-frontend:
     depends_on:
@@ -16,8 +23,12 @@ services:
     container_name: api-frontend
     build:
       context: .
+    environment:
+      # host.docker.internal means connect to the host machine, e.g. your laptop
+      ELASTICSEARCH_URL: "http://host.docker.internal:9200"
     env_file:
       - .env
-      - .flaskenv
     ports:
       - "4000:4000"
+    extra_hosts:
+        - "host.docker.internal:host-gateway"
diff --git a/example-apps/chatbot-rag-app/env.example b/example-apps/chatbot-rag-app/env.example
index d078293f..f060c491 100644
--- a/example-apps/chatbot-rag-app/env.example
+++ b/example-apps/chatbot-rag-app/env.example
@@ -28,24 +28,31 @@ ES_INDEX_CHAT_HISTORY=workplace-app-docs-chat-history
 
 # Uncomment and complete if you want to use Bedrock LLM
 # LLM_TYPE=bedrock
-# AWS_ACCESS_KEY=
-# AWS_SECRET_KEY=
-# AWS_REGION=
-# AWS_MODEL_ID=
+# AWS_ACCESS_KEY_ID=
+# AWS_SECRET_ACCESS_KEY=
+# AWS_DEFAULT_REGION=
+# CHAT_MODEL=anthropic.claude-3-5-sonnet-20240620-v1:0
 
 # Uncomment and complete if you want to use Vertex AI
 # LLM_TYPE=vertex
-# VERTEX_PROJECT_ID=
-# VERTEX_REGION=
+## Project that has the service "aiplatform.googleapis.com" enabled
+# GOOGLE_CLOUD_PROJECT=
+# GOOGLE_CLOUD_REGION=
+# CHAT_MODEL=gemini-1.5-flash-002
+## Needed if you haven't run `gcloud auth application-default login`
 # GOOGLE_APPLICATION_CREDENTIALS=
 
 # Uncomment and complete if you want to use Mistral AI
 # LLM_TYPE=mistral
+## Key in https://console.mistral.ai/api-keys/
 # MISTRAL_API_KEY=
-# MISTRAL_API_ENDPOINT=
-# MISTRAL_MODEL=
+## 'API Endpoints' from https://docs.mistral.ai/getting-started/models/models_overview/
+# CHAT_MODEL=open-mistral-nemo
+## Only set this if not using the default Mistral base URL
+# MISTRAL_BASE_URL=
 
 # Uncomment and complete if you want to use Cohere
 # LLM_TYPE=cohere
+## Key in https://dashboard.cohere.com/api-keys
 # COHERE_API_KEY=
-# COHERE_MODEL=
+# CHAT_MODEL=command-r7b-12-2024
diff --git a/example-apps/chatbot-rag-app/requirements.txt b/example-apps/chatbot-rag-app/requirements.txt
index 21fea6a1..2e2f2f04 100644
--- a/example-apps/chatbot-rag-app/requirements.txt
+++ b/example-apps/chatbot-rag-app/requirements.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.12
+# This file is autogenerated by pip-compile with Python 3.13
 # by the following command:
 #
 #    pip-compile
@@ -352,7 +352,6 @@ types-requests==2.32.0.20241016
     # via cohere
 typing-extensions==4.12.2
     # via
-    #   anyio
     #   cohere
     #   huggingface-hub
     #   langchain-core