From 98f6f4ddf4805fdef0fec624edca474d8ffd7f6b Mon Sep 17 00:00:00 2001
From: Alexey Snigir <alexey_snigir@external.mckinsey.com>
Date: Tue, 24 Dec 2024 13:55:41 +0100
Subject: [PATCH 1/5] complex prompt, update report, improvements

---
 .github/workflows/test-score-vizro-ai.yml |  25 +-
 vizro-ai/hatch.toml                       |   2 +-
 vizro-ai/tests/score/prompts.py           | 105 +++++++++
 vizro-ai/tests/score/pytest.ini           |   6 +
 vizro-ai/tests/score/test_dashboard.py    | 273 ++++++++++------------
 5 files changed, 243 insertions(+), 168 deletions(-)
 create mode 100644 vizro-ai/tests/score/prompts.py
diff --git a/.github/workflows/test-score-vizro-ai.yml b/.github/workflows/test-score-vizro-ai.yml
index 813ecba8c..033b8fa0e 100644
--- a/.github/workflows/test-score-vizro-ai.yml
+++ b/.github/workflows/test-score-vizro-ai.yml
@@ -5,7 +5,13 @@ defaults:
     working-directory: vizro-ai
 
 on:
+  schedule:
+    - cron: "30 10 * * 1" # run every Monday at 10:30 UTC
   workflow_dispatch:
+  #temporary for development
+  pull_request:
+    branches:
+      - main
 
 env:
   PYTHONUNBUFFERED: 1
@@ -20,17 +26,8 @@ jobs:
       fail-fast: false
       matrix:
         config:
-          - python-version: "3.9"
-            hatch-env: all.py3.9
-          - python-version: "3.10"
-            hatch-env: all.py3.10
-          - python-version: "3.11"
-            hatch-env: all.py3.11
           - python-version: "3.12"
             hatch-env: all.py3.12
-          - python-version: "3.9"
-            hatch-env: lower-bounds
-            label: lower bounds
 
     steps:
       - uses: actions/checkout@v4
@@ -46,17 +43,9 @@ jobs:
       fail-fast: false
       matrix:
         config:
-          - python-version: "3.9"
-            hatch-env: all.py3.9
-          - python-version: "3.10"
-            hatch-env: all.py3.10
-          - python-version: "3.11"
-            hatch-env: all.py3.11
           - python-version: "3.12"
             hatch-env: all.py3.12
-          - python-version: "3.9"
-            hatch-env: lower-bounds
-            label: lower bounds
+
 
     steps:
       - uses: actions/checkout@v4
diff --git a/vizro-ai/hatch.toml b/vizro-ai/hatch.toml
index 384d15d57..371590661 100644
--- a/vizro-ai/hatch.toml
+++ b/vizro-ai/hatch.toml
@@ -49,7 +49,7 @@ prep-release = [
 pypath = "hatch run python -c 'import sys; print(sys.executable)'"
 test = "pytest tests {args}"
 test-integration = "pytest -vs --reruns 1 tests/integration --headless {args}"
-test-score = "pytest -vs --reruns 1 tests/score --headless {args}"
+test-score = "pytest -vs tests/score --headless {args}"
 test-unit = "pytest tests/unit {args}"
 test-unit-coverage = [
   "coverage run -m pytest tests/unit {args}",
diff --git a/vizro-ai/tests/score/prompts.py b/vizro-ai/tests/score/prompts.py
new file mode 100644
index 000000000..47858f694
--- /dev/null
+++ b/vizro-ai/tests/score/prompts.py
@@ -0,0 +1,105 @@
+easy_prompt = """
+I need a page with 1 table.
+The table shows the tech companies stock data.
+
+I need a second page showing 2 cards and one chart.
+The first card says 'The Gapminder dataset provides historical data on countries' development indicators.'
+The chart is an scatter plot showing life expectancy vs. GDP per capita by country.
+Life expectancy on the y axis, GDP per capita on the x axis, and colored by continent.
+The second card says 'Data spans from 1952 to 2007 across various countries.'
+The layout uses a grid of 3 columns and 2 rows.
+
+Row 1: The first row has three columns:
+The first column is occupied by the first card.
+The second and third columns are spanned by the chart.
+
+Row 2: The second row mirrors the layout of the first row with respect to chart,
+but the first column is occupied by the second card.
+
+Add a filter to filter the scatter plot by continent.
+Add a second filter to filter the chart by year.
+"""
+
+medium_prompt = """
+<Page 1>
+I need a page with 1 table and 1 line chart.
+The chart shows the stock price trends of GOOG and AAPL.
+The table shows the stock prices data details.
+
+<Page 2>
+I need a second page showing 3 cards and 4 charts.
+The cards says 'The Gapminder dataset provides historical data on countries' development indicators.'
+The charts are the scatter plots showing GDP per capita vs. life expectancy.
+GDP per capita on the x axis, life expectancy on the y axis, and colored by continent.
+Layout the cards on the left and the chart on the right.
+Add a filter to filter the scatter plots by continent.
+Add a second filter to filter the charts by year.
+
+<Page 3>
+This page displays the tips dataset. use four different charts to show data
+distributions. one chart should be a bar chart. the other should be a scatter plot.
+next chart should be a line chart. last one should be an area plot.
+first and second charts are on the left and the third and fourth charts are on the right.
+Add a filter to filter data in every plot by smoker.
+
+<Page 4>
+Create 3 cards on this page:
+1. The first card on top says "This page combines data from various sources
+ including tips, stock prices, and global indicators."
+2. The second card says "Insights from Gapminder dataset."
+3. The third card says "Stock price trends over time."
+
+Layout these 3 cards in this way:
+create a grid with 3 columns and 2 rows.
+Row 1: The first row has three columns:
+- The first column is empty.
+- The second and third columns span the area for card 1.
+
+Row 2: The second row also has three columns:
+- The first column is empty.
+- The second column is occupied by the area for card 2.
+- The third column is occupied by the area for card 3.
+    """
+
+
+complex_prompt = """
+<Page 1>
+I need a page with 1 table and 3 line charts.
+The chart shows the stock price trends of GOOG and AAPL.
+The table shows the stock prices data details.
+Add 3 filters to filter the line chart by companies.
+
+<Page 2>
+I need a second page showing 1 card and 1 chart.
+The card says 'The Gapminder dataset provides historical data on countries' development indicators.'
+The chart is a scatter plot showing GDP per capita vs. life expectancy.
+GDP per capita on the x axis, life expectancy on the y axis, and colored by continent.
+Layout the card on the left and the chart on the right. The card takes 1/3 of the whole space on the left.
+The chart takes 2/3 of the whole space and is on the right.
+Add a filter to filter the scatter plot by continent.
+Add a second filter to filter the chart by year.
+
+<Page 3>
+This page displays the tips dataset. use two different charts to show data
+distributions. one chart should be a bar chart and the other should be a scatter plot.
+first chart is on the left and the second chart is on the right.
+Add a filter to filter data in the scatter plot by smoker.
+
+<Page 4>
+Create 3 cards on this page:
+1. The first card on top says "This page combines data from various sources
+ including tips, stock prices, and global indicators."
+2. The second card says "Insights from Gapminder dataset."
+3. The third card says "Stock price trends over time."
+
+Layout these 3 cards in this way:
+create a grid with 3 columns and 2 rows.
+Row 1: The first row has three columns:
+- The first column is empty.
+- The second and third columns span the area for card 1.
+
+Row 2: The second row also has three columns:
+- The first column is empty.
+- The second column is occupied by the area for card 2.
+- The third column is occupied by the area for card 3.
+    """
\ No newline at end of file
diff --git a/vizro-ai/tests/score/pytest.ini b/vizro-ai/tests/score/pytest.ini
index 8b3381827..7f2efb67c 100644
--- a/vizro-ai/tests/score/pytest.ini
+++ b/vizro-ai/tests/score/pytest.ini
@@ -2,3 +2,9 @@
 markers =
     easy_dashboard: mark test with easy prompt for dashboard creation.
     medium_dashboard: mark test with medium prompt for dashboard creation.
+    complex_dashboard: mark test with complex prompt for dashboard creation.
+
+filterwarnings =
+    ignore::UserWarning
+    # Ignore deprecation warning until this is solved: https://github.com/plotly/dash/issues/2590:
+    ignore:HTTPResponse.getheader():DeprecationWarning
diff --git a/vizro-ai/tests/score/test_dashboard.py b/vizro-ai/tests/score/test_dashboard.py
index 53d2e9033..5c2bac14a 100644
--- a/vizro-ai/tests/score/test_dashboard.py
+++ b/vizro-ai/tests/score/test_dashboard.py
@@ -10,11 +10,11 @@
 import chromedriver_autoinstaller
 import pytest
 import vizro.plotly.express as px
+import numpy as np
 from vizro import Vizro
 
 from vizro_ai import VizroAI
-
-vizro_ai = VizroAI()
+from prompts import easy_prompt, medium_prompt, complex_prompt
 
 df1 = px.data.gapminder()
 df2 = px.data.stocks()
@@ -22,12 +22,12 @@
 
 
 @dataclass
-class Components:
+class Component:
     type: Literal["ag_grid", "card", "graph"]
 
 
 @dataclass
-class Controls:
+class Control:
     type: Literal["filter", "parameter"]
 
 
@@ -43,6 +43,7 @@ def logic(  # noqa: PLR0912, PLR0915
     model_name,
     dash_duo,
     prompt_tier,
+    prompt_text,
     config: dict,
 ):
     """Calculates all separate scores. Creates csv report.
@@ -52,6 +53,7 @@ def logic(  # noqa: PLR0912, PLR0915
         model_name: GenAI model name
         dash_duo: dash_duo fixture
         prompt_tier: complexity of the prompt
+        prompt_text: prompt text
         config: json config of the expected dashboard
 
     """
@@ -161,73 +163,40 @@ def logic(  # noqa: PLR0912, PLR0915
     pages_exist.extend(pages_num)
 
     # Every separate score has its own weight.
-    app_started_score = {"weight": 0.4, "score": app_started}
-    no_browser_console_errors_score = {"weight": 0.1, "score": no_browser_console_errors}
-    pages_score = {"weight": 0.2, "score": sum(pages_exist) / len(pages_exist)}
-    components_score = {"weight": 0.1, "score": sum(components_num) / len(components_num)}
-    component_types_score = {"weight": 0.1, "score": sum(components_types_names) / len(components_types_names)}
-    controls_score = {"weight": 0.1, "score": sum(controls_num) / len(controls_num)}
-    controls_types_score = {"weight": 0.1, "score": sum(controls_types_names) / len(controls_types_names)}
-
     scores = [
-        app_started_score,
-        no_browser_console_errors_score,
-        pages_score,
-        components_score,
-        component_types_score,
-        controls_score,
-        controls_types_score,
+        {"score_name": "app_started_score", "weight": 0.4, "score": app_started},
+        {"score_name": "no_browser_console_errors_score", "weight": 0.1, "score": no_browser_console_errors},
+        {"score_name": "pages_score", "weight": 0.2, "score": sum(pages_exist) / len(pages_exist)},
+        {"score_name": "components_score", "weight": 0.1, "score": sum(components_num) / len(components_num)},
+        {"score_name": "component_types_score", "weight": 0.1, "score": sum(components_types_names) / len(components_types_names)},
+        {"score_name": "controls_score", "weight": 0.1, "score": sum(controls_num) / len(controls_num)},
+        {"score_name": "controls_types_score", "weight": 0.1, "score": sum(controls_types_names) / len(controls_types_names)},
     ]
-    # total_weight should be equal to 1
-    total_weight = sum(score["weight"] for score in scores)
-    # If total_weight is not equal to 1, we're recalculating weights for every separate score
-    # and calculating final weighted_score for the created dashboard
-    if total_weight != 1:
-        scores = [{"weight": score["weight"] / total_weight, "score": score["score"]} for score in scores]
-    weighted_score = round(sum(score["weight"] * score["score"] for score in scores), 1)
 
-    # csv report creation
+    scores_values = np.array([score["score"] for score in scores])
+    weights = np.array([score["weight"] for score in scores])
+    weighted_score = np.average(scores_values, weights=weights)
 
-    data_rows = [
-        datetime.now(),
-        vizro_type,
-        branch,
-        python_version,
-        model_name,
-        prompt_tier,
-        weighted_score,
-        app_started_score["score"],
-        no_browser_console_errors_score["score"],
-        pages_score["score"],
-        components_score["score"],
-        component_types_score["score"],
-        controls_score["score"],
-        controls_types_score["score"],
-    ]
+    # csv report creation
+    data_rows = [datetime.now(), vizro_type, branch, python_version, model_name, prompt_tier, prompt_text, weighted_score]
+    data_rows.extend(score["score"] for score in scores)
 
     with open(f"{report_dir}/report_model_{model_name}_{vizro_type}.csv", "a", newline=""):
         with open(f"{report_dir}/report_model_{model_name}_{vizro_type}.csv", "r+", newline="") as csvfile:
             writer = csv.writer(csvfile, delimiter=",")
             first_line = csvfile.readline()
             if not first_line:
-                writer.writerow(
-                    [
+                header_rows = [
                         "timestamp",
                         "vizro_type",
                         "branch",
                         "python_version",
                         "model",
                         "prompt_tier",
-                        "weighted_score",
-                        "app_started_score",
-                        "no_browser_console_errors_score",
-                        "pages_score",
-                        "components_score",
-                        "component_types_score",
-                        "controls_score",
-                        "controls_types_score",
-                    ]
-                )
+                        "prompt_text",
+                        "weighted_score"]
+                header_rows.extend(score["score_name"] for score in scores)
+                writer.writerow(header_rows)
                 writer.writerow(data_rows)
             else:
                 writer.writerow(data_rows)
@@ -248,59 +217,40 @@ def logic(  # noqa: PLR0912, PLR0915
 @pytest.mark.easy_dashboard
 @pytest.mark.parametrize(
     "model_name",
-    ["gpt-4o-mini"],
-    ids=["gpt-4o-mini"],
-)
-@pytest.mark.filterwarnings("ignore::langchain_core._api.beta_decorator.LangChainBetaWarning")
-@pytest.mark.filterwarnings("ignore::UserWarning")
-@pytest.mark.filterwarnings("ignore:HTTPResponse.getheader()")
+    [
+        "gpt-4o-mini",
+        "claude-3-5-sonnet-latest",
+    ],
+    ids=[
+        "gpt-4o-mini",
+        "claude-3-5-sonnet-latest",
+    ])
 def test_easy_dashboard(dash_duo, model_name):
-    input_text = """
-    I need a page with 1 table.
-    The table shows the tech companies stock data.
-
-    I need a second page showing 2 cards and one chart.
-    The first card says 'The Gapminder dataset provides historical data on countries' development indicators.'
-    The chart is an scatter plot showing life expectancy vs. GDP per capita by country.
-    Life expectancy on the y axis, GDP per capita on the x axis, and colored by continent.
-    The second card says 'Data spans from 1952 to 2007 across various countries.'
-    The layout uses a grid of 3 columns and 2 rows.
-
-    Row 1: The first row has three columns:
-    The first column is occupied by the first card.
-    The second and third columns are spanned by the chart.
-
-    Row 2: The second row mirrors the layout of the first row with respect to chart,
-    but the first column is occupied by the second card.
-
-    Add a filter to filter the scatter plot by continent.
-    Add a second filter to filter the chart by year.
-    """
-
-    dashboard = vizro_ai.dashboard([df1, df2], input_text)
+    dashboard = VizroAI(model=model_name).dashboard([df1, df2], easy_prompt)
 
     logic(
         dashboard=dashboard,
         model_name=model_name,
         dash_duo=dash_duo,
         prompt_tier="easy",
+        prompt_text=easy_prompt.replace("\n", " "),
         config={
             "pages": [
                 {
                     "components": [
-                        Components(type="ag_grid"),
+                        Component(type="ag_grid"),
                     ],
                     "controls": [],
                 },
                 {
                     "components": [
-                        Components(type="card"),
-                        Components(type="card"),
-                        Components(type="graph"),
+                        Component(type="card"),
+                        Component(type="card"),
+                        Component(type="graph"),
                     ],
                     "controls": [
-                        Controls(type="filter"),
-                        Controls(type="filter"),
+                        Control(type="filter"),
+                        Control(type="filter"),
                     ],
                 },
             ],
@@ -312,93 +262,118 @@ def test_easy_dashboard(dash_duo, model_name):
 @pytest.mark.parametrize(
     "model_name",
     ["gpt-4o-mini"],
-    ids=["gpt-4o-mini"],
-)
-@pytest.mark.filterwarnings("ignore::langchain_core._api.beta_decorator.LangChainBetaWarning")
-@pytest.mark.filterwarnings("ignore::UserWarning")
-@pytest.mark.filterwarnings("ignore:HTTPResponse.getheader()")
+    ids=["gpt-4o-mini"])
 def test_medium_dashboard(dash_duo, model_name):
-    input_text = """
-    <Page 1>
-    I need a page with 1 table and 1 line chart.
-    The chart shows the stock price trends of GOOG and AAPL.
-    The table shows the stock prices data details.
-
-    <Page 2>
-    I need a second page showing 1 card and 1 chart.
-    The card says 'The Gapminder dataset provides historical data on countries' development indicators.'
-    The chart is a scatter plot showing GDP per capita vs. life expectancy.
-    GDP per capita on the x axis, life expectancy on the y axis, and colored by continent.
-    Layout the card on the left and the chart on the right. The card takes 1/3 of the whole space on the left.
-    The chart takes 2/3 of the whole space and is on the right.
-    Add a filter to filter the scatter plot by continent.
-    Add a second filter to filter the chart by year.
-
-    <Page 3>
-    This page displays the tips dataset. use two different charts to show data
-    distributions. one chart should be a bar chart and the other should be a scatter plot.
-    first chart is on the left and the second chart is on the right.
-    Add a filter to filter data in the scatter plot by smoker.
-
-    <Page 4>
-    Create 3 cards on this page:
-    1. The first card on top says "This page combines data from various sources
-     including tips, stock prices, and global indicators."
-    2. The second card says "Insights from Gapminder dataset."
-    3. The third card says "Stock price trends over time."
-
-    Layout these 3 cards in this way:
-    create a grid with 3 columns and 2 rows.
-    Row 1: The first row has three columns:
-    - The first column is empty.
-    - The second and third columns span the area for card 1.
-
-    Row 2: The second row also has three columns:
-    - The first column is empty.
-    - The second column is occupied by the area for card 2.
-    - The third column is occupied by the area for card 3.
-        """
-
-    dashboard = vizro_ai.dashboard([df1, df2, df3], input_text)
+    dashboard = VizroAI(model=model_name).dashboard([df1, df2, df3], medium_prompt)
 
     logic(
         dashboard=dashboard,
         model_name=model_name,
         dash_duo=dash_duo,
         prompt_tier="medium",
+        prompt_text=medium_prompt.replace("\n", " "),
         config={
             "pages": [
                 {
                     "components": [
-                        Components(type="ag_grid"),
-                        Components(type="graph"),
+                        Component(type="ag_grid"),
+                        Component(type="graph"),
                     ],
                     "controls": [],
                 },
                 {
                     "components": [
-                        Components(type="card"),
-                        Components(type="graph"),
+                        Component(type="card"),
+                        Component(type="graph"),
+                    ],
+                    "controls": [
+                        Control(type="filter"),
+                        Control(type="filter"),
+                    ],
+                },
+                {
+                    "components": [
+                        Component(type="graph"),
+                        Component(type="graph"),
+                    ],
+                    "controls": [
+                        Control(type="filter"),
+                    ],
+                },
+                {
+                    "components": [
+                        Component(type="card"),
+                        Component(type="card"),
+                        Component(type="card"),
+                    ],
+                    "controls": [],
+                },
+            ],
+        },
+    )
+
+
+@pytest.mark.complex_dashboard
+@pytest.mark.parametrize(
+    "model_name",
+    ["gpt-4o-mini"],
+    ids=["gpt-4o-mini"],
+)
+def test_complex_dashboard(dash_duo, model_name):
+    dashboard = VizroAI(model=model_name).dashboard([df1, df2, df3], complex_prompt)
+
+    logic(
+        dashboard=dashboard,
+        model_name=model_name,
+        dash_duo=dash_duo,
+        prompt_tier="complex",
+        prompt_text=complex_prompt.replace("\n", " "),
+        config={
+            "pages": [
+                {
+                    "components": [
+                        Component(type="ag_grid"),
+                        Component(type="graph"),
+                        Component(type="graph"),
+                        Component(type="graph"),
+                    ],
+                    "controls": [
+                        Control(type="filter"),
+                        Control(type="filter"),
+                        Control(type="filter")
+                    ],
+                },
+                {
+                    "components": [
+                        Component(type="card"),
+                        Component(type="card"),
+                        Component(type="card"),
+                        Component(type="graph"),
+                        Component(type="graph"),
+                        Component(type="graph"),
+                        Component(type="graph"),
                     ],
                     "controls": [
-                        Controls(type="filter"),
-                        Controls(type="filter"),
+                        Control(type="filter"),
+                        Control(type="filter"),
                     ],
                 },
                 {
                     "components": [
-                        Components(type="graph"),
-                        Components(type="graph"),
+                        Component(type="graph"),
+                        Component(type="graph"),
+                        Component(type="graph"),
+                        Component(type="graph"),
                     ],
                     "controls": [
-                        Controls(type="filter"),
+                        Control(type="filter"),
                     ],
                 },
                 {
                     "components": [
-                        Components(type="card"),
-                        Components(type="card"),
-                        Components(type="card"),
+                        Component(type="card"),
+                        Component(type="card"),
+                        Component(type="card"),
                     ],
                     "controls": [],
                 },

From 736254e8aa8d0b350e7d3236aea41f12ed958f73 Mon Sep 17 00:00:00 2001
From: Alexey Snigir <alexey_snigir@external.mckinsey.com>
Date: Tue, 24 Dec 2024 13:56:20 +0100
Subject: [PATCH 2/5] changelog

---
 ..._alexey_snigir_score_tests_improvements.md | 48 +++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 vizro-ai/changelog.d/20241224_135602_alexey_snigir_score_tests_improvements.md

diff --git a/vizro-ai/changelog.d/20241224_135602_alexey_snigir_score_tests_improvements.md b/vizro-ai/changelog.d/20241224_135602_alexey_snigir_score_tests_improvements.md
new file mode 100644
index 000000000..7c0d58d4f
--- /dev/null
+++ b/vizro-ai/changelog.d/20241224_135602_alexey_snigir_score_tests_improvements.md
@@ -0,0 +1,48 @@
+<!--
+A new scriv changelog fragment.
+
+Uncomment the section that is right (remove the HTML comment wrapper).
+-->
+
+<!--
+### Highlights ✨
+
+- A bullet item for the Highlights ✨ category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Removed
+
+- A bullet item for the Removed category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Added
+
+- A bullet item for the Added category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Changed
+
+- A bullet item for the Changed category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Deprecated
+
+- A bullet item for the Deprecated category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Fixed
+
+- A bullet item for the Fixed category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Security
+
+- A bullet item for the Security category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->

From 99d0fd7987ba8417fdd29ccc9348952d933fcf0c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 24 Dec 2024 12:57:28 +0000
Subject: [PATCH 3/5] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .github/workflows/test-score-vizro-ai.yml |  1 -
 vizro-ai/tests/score/prompts.py           |  2 +-
 vizro-ai/tests/score/test_dashboard.py    | 58 ++++++++++++++---------
 3 files changed, 36 insertions(+), 25 deletions(-)

diff --git a/.github/workflows/test-score-vizro-ai.yml b/.github/workflows/test-score-vizro-ai.yml
index 033b8fa0e..a124cbc6c 100644
--- a/.github/workflows/test-score-vizro-ai.yml
+++ b/.github/workflows/test-score-vizro-ai.yml
@@ -46,7 +46,6 @@ jobs:
           - python-version: "3.12"
             hatch-env: all.py3.12
 
-
     steps:
       - uses: actions/checkout@v4
 
diff --git a/vizro-ai/tests/score/prompts.py b/vizro-ai/tests/score/prompts.py
index 47858f694..b507f051c 100644
--- a/vizro-ai/tests/score/prompts.py
+++ b/vizro-ai/tests/score/prompts.py
@@ -102,4 +102,4 @@
 - The first column is empty.
 - The second column is occupied by the area for card 2.
 - The third column is occupied by the area for card 3.
-    """
\ No newline at end of file
+    """
diff --git a/vizro-ai/tests/score/test_dashboard.py b/vizro-ai/tests/score/test_dashboard.py
index 5c2bac14a..5681ee89d 100644
--- a/vizro-ai/tests/score/test_dashboard.py
+++ b/vizro-ai/tests/score/test_dashboard.py
@@ -8,13 +8,13 @@
 from typing import Literal
 
 import chromedriver_autoinstaller
+import numpy as np
 import pytest
 import vizro.plotly.express as px
-import numpy as np
+from prompts import complex_prompt, easy_prompt, medium_prompt
 from vizro import Vizro
 
 from vizro_ai import VizroAI
-from prompts import easy_prompt, medium_prompt, complex_prompt
 
 df1 = px.data.gapminder()
 df2 = px.data.stocks()
@@ -168,9 +168,17 @@ def logic(  # noqa: PLR0912, PLR0915
         {"score_name": "no_browser_console_errors_score", "weight": 0.1, "score": no_browser_console_errors},
         {"score_name": "pages_score", "weight": 0.2, "score": sum(pages_exist) / len(pages_exist)},
         {"score_name": "components_score", "weight": 0.1, "score": sum(components_num) / len(components_num)},
-        {"score_name": "component_types_score", "weight": 0.1, "score": sum(components_types_names) / len(components_types_names)},
+        {
+            "score_name": "component_types_score",
+            "weight": 0.1,
+            "score": sum(components_types_names) / len(components_types_names),
+        },
         {"score_name": "controls_score", "weight": 0.1, "score": sum(controls_num) / len(controls_num)},
-        {"score_name": "controls_types_score", "weight": 0.1, "score": sum(controls_types_names) / len(controls_types_names)},
+        {
+            "score_name": "controls_types_score",
+            "weight": 0.1,
+            "score": sum(controls_types_names) / len(controls_types_names),
+        },
     ]
 
     scores_values = np.array([score["score"] for score in scores])
@@ -178,7 +186,16 @@ def logic(  # noqa: PLR0912, PLR0915
     weighted_score = np.average(scores_values, weights=weights)
 
     # csv report creation
-    data_rows = [datetime.now(), vizro_type, branch, python_version, model_name, prompt_tier, prompt_text, weighted_score]
+    data_rows = [
+        datetime.now(),
+        vizro_type,
+        branch,
+        python_version,
+        model_name,
+        prompt_tier,
+        prompt_text,
+        weighted_score,
+    ]
     data_rows.extend(score["score"] for score in scores)
 
     with open(f"{report_dir}/report_model_{model_name}_{vizro_type}.csv", "a", newline=""):
@@ -187,14 +204,15 @@ def logic(  # noqa: PLR0912, PLR0915
             first_line = csvfile.readline()
             if not first_line:
                 header_rows = [
-                        "timestamp",
-                        "vizro_type",
-                        "branch",
-                        "python_version",
-                        "model",
-                        "prompt_tier",
-                        "prompt_text",
-                        "weighted_score"]
+                    "timestamp",
+                    "vizro_type",
+                    "branch",
+                    "python_version",
+                    "model",
+                    "prompt_tier",
+                    "prompt_text",
+                    "weighted_score",
+                ]
                 header_rows.extend(score["score_name"] for score in scores)
                 writer.writerow(header_rows)
                 writer.writerow(data_rows)
@@ -224,7 +242,8 @@ def logic(  # noqa: PLR0912, PLR0915
     ids=[
         "gpt-4o-mini",
         "claude-3-5-sonnet-latest",
-    ])
+    ],
+)
 def test_easy_dashboard(dash_duo, model_name):
     dashboard = VizroAI(model=model_name).dashboard([df1, df2], easy_prompt)
 
@@ -259,10 +278,7 @@ def test_easy_dashboard(dash_duo, model_name):
 
 
 @pytest.mark.medium_dashboard
-@pytest.mark.parametrize(
-    "model_name",
-    ["gpt-4o-mini"],
-    ids=["gpt-4o-mini"])
+@pytest.mark.parametrize("model_name", ["gpt-4o-mini"], ids=["gpt-4o-mini"])
 def test_medium_dashboard(dash_duo, model_name):
     dashboard = VizroAI(model=model_name).dashboard([df1, df2, df3], medium_prompt)
 
@@ -337,11 +353,7 @@ def test_complex_dashboard(dash_duo, model_name):
                         Component(type="graph"),
                         Component(type="graph"),
                     ],
-                    "controls": [
-                        Control(type="filter"),
-                        Control(type="filter"),
-                        Control(type="filter")
-                    ],
+                    "controls": [Control(type="filter"), Control(type="filter"), Control(type="filter")],
                 },
                 {
                     "components": [

From 34a7ccf9651776996f89597ea839ec0c4dafa303 Mon Sep 17 00:00:00 2001
From: Alexey Snigir <alexey_snigir@external.mckinsey.com>
Date: Fri, 27 Dec 2024 11:17:08 +0100
Subject: [PATCH 4/5] add anthropic creds

---
 .github/workflows/test-score-vizro-ai.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/test-score-vizro-ai.yml b/.github/workflows/test-score-vizro-ai.yml
index 033b8fa0e..29d11287b 100644
--- a/.github/workflows/test-score-vizro-ai.yml
+++ b/.github/workflows/test-score-vizro-ai.yml
@@ -66,6 +66,8 @@ jobs:
         env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
           OPENAI_API_BASE: ${{ secrets.OPENAI_API_BASE }}
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          ANTHROPIC_BASE_URL: ${{ secrets.ANTHROPIC_BASE_URL }}
           VIZRO_TYPE: pypi
           BRANCH: ${{ github.head_ref }}
           PYTHON_VERSION: ${{ matrix.config.python-version }}
@@ -77,6 +79,8 @@ jobs:
         env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
           OPENAI_API_BASE: ${{ secrets.OPENAI_API_BASE }}
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          ANTHROPIC_BASE_URL: ${{ secrets.ANTHROPIC_BASE_URL }}
           VIZRO_TYPE: local
           BRANCH: ${{ github.head_ref }}
           PYTHON_VERSION: ${{ matrix.config.python-version }}

From 7139fb5a7fbc29da75b388acd678586726c71783 Mon Sep 17 00:00:00 2001
From: Alexey Snigir <alexey_snigir@external.mckinsey.com>
Date: Fri, 27 Dec 2024 11:45:33 +0100
Subject: [PATCH 5/5] fix report aggregated

---
 .github/workflows/test-score-vizro-ai.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test-score-vizro-ai.yml b/.github/workflows/test-score-vizro-ai.yml
index e41928ce8..15f64fd91 100644
--- a/.github/workflows/test-score-vizro-ai.yml
+++ b/.github/workflows/test-score-vizro-ai.yml
@@ -123,7 +123,7 @@ jobs:
       - name: Create one csv report
         run: |
           cd /home/runner/work/vizro/vizro/
-          head -n 1 Report-3.11-/report_model_gpt-4o-mini_pypi.csv > report-aggregated-${{ steps.date.outputs.date }}.csv && tail -n+2 -q */*.csv >> report-aggregated-${{ steps.date.outputs.date }}.csv
+          head -n 1 Report-3.12-/report_model_gpt-4o-mini_pypi.csv > report-aggregated-${{ steps.date.outputs.date }}.csv && tail -n+2 -q */*.csv >> report-aggregated-${{ steps.date.outputs.date }}.csv
           gawk -F, -i inplace 'FNR>1 {$1="${{ steps.date.outputs.date }}"} {print}' OFS=, report-aggregated-${{ steps.date.outputs.date }}.csv
 
       - name: Report artifacts