diff --git a/.github/workflows/repo_scraper.yml b/.github/workflows/repo_scraper.yml
index 34b896735..e1b12aac3 100644
--- a/.github/workflows/repo_scraper.yml
+++ b/.github/workflows/repo_scraper.yml
@@ -1,35 +1,36 @@
 name: Run repo scraper
 
 on:
-  #push:
-  #  branches: [main]
   #schedule:
   #  - cron: "0 0 * * *"  # Run at the end of every day
   workflow_dispatch: {}  # manual executions
 
 jobs:
   scrape:
-    name: Run repo scraper
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
+      - name: Checkout code
+        uses: actions/checkout@v4
+
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
           python-version: 3.11
+          cache: 'pip'
+          cache-dependency-path: setup.py
+
+      - name: Auth with GCP
+        uses: google-github-actions/auth@v2
+        with:
+          credentials_json: ${{ secrets.GCP_SA_KEY }}
 
       - name: Install dependencies
         run: |
-          python -m pip install --upgrade pip
-          pip install -r tools/repo_scraper/requirements.txt
+          pip install -r tools/repo_stats/requirements.txt
           pip list
 
       - name: Run repo scraper
         env:
-          DROPBOX_TOKEN: ${{ secrets.DROPBOX_TOKEN }}
-          DROPBOX_APP_KEY: ${{ secrets.DROPBOX_APP_KEY }}
-          DROPBOX_APP_SECRET: ${{ secrets.DROPBOX_APP_SECRET }}
-          DROPBOX_REFRESH_TOKEN: ${{ secrets.DROPBOX_REFRESH_TOKEN }}
-          GH_TOKEN: ${{ secrets.GH_TOKEN }}
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: |
-          python tools/repo_scraper/repo_scraper.py
+          python tools/repo_stats/scraper.py
diff --git a/.gitignore b/.gitignore
index 2552e7057..dd8897e76 100644
--- a/.gitignore
+++ b/.gitignore
@@ -156,10 +156,13 @@ node_modules/
 latest_info.csv
 latest_repo_data.csv
 repo_data.csv
+group_info.csv
+repo_stats*.json
 student_repos/
 reviews.csv
 bert_sentiment_model.pt
 prediction*.json
+**/service_account_key.json
 
 # vscode
 .vscode/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 0705e7efe..636cf94c0 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -20,7 +20,7 @@ repos:
     hooks:
       # try to fix what is possible
       - id: ruff
-        args: ["--fix"]
+        args: ["--fix", "--unsafe-fixes"]
       # perform formatting updates
       - id: ruff-format
       # validate if all is fine with preview mode
diff --git a/reports/report.py b/reports/report.py
index f8ef337f4..2c8d8bee6 100644
--- a/reports/report.py
+++ b/reports/report.py
@@ -19,7 +19,7 @@ def no_constraints(answer, index) -> None:
 
 
 def length_constraints(answer, index, min_length, max_length) -> None:
-    """Either min or maximum length contrains for question."""
+    """Either min or maximum length constrains for question."""
     answer = answer.split()
     if not (min_length <= len(answer) <= max_length):
         warnings.warn(
diff --git a/s3_reproducibility/docker.md b/s3_reproducibility/docker.md
index 2fcd489d9..cd3615c6f 100644
--- a/s3_reproducibility/docker.md
+++ b/s3_reproducibility/docker.md
@@ -158,6 +158,14 @@ beneficial for you to download.
     docker rm <container_id>
     ```
 
+    In general we recommend to use the `--rm` flag when running a container e.g.
+
+    ```bash
+    docker run --rm <image>
+    ```
+
+    which will automatically remove the container after it has finished running.
+
 9. Let's now move on to trying to construct a Dockerfile ourselves for our MNIST project. Create a file called
     `trainer.dockerfile`. The intention is that we want to develop one Dockerfile for running our training script and
     one for doing predictions.
@@ -303,7 +311,7 @@ beneficial for you to download.
     to start the image in interactive mode:
 
     ```bash
-    docker run -it --entrypoint sh {image_name}:{image_name}
+    docker run --rm -it --entrypoint sh {image_name}:{image_tag}
     ```
 
 16. When your training has completed you will notice that any files that are created when running your training script
diff --git a/tools/leaderboard_app/leaderboard_app.py b/tools/leaderboard_app/leaderboard_app.py
deleted file mode 100644
index bdee74854..000000000
--- a/tools/leaderboard_app/leaderboard_app.py
+++ /dev/null
@@ -1,151 +0,0 @@
-r"""Basic streamlit leaderboard app for showing data from scraped GitHub repos.
-
-Run with:
-    streamlit run tools\leaderboard_app\leaderboard_app.py
-"""
-
-import ast
-import os
-import sys
-from datetime import datetime
-
-import dropbox
-import pandas as pd
-import streamlit as st
-from dotenv import load_dotenv
-from dropbox.exceptions import AuthError
-
-st.set_page_config(layout="wide")
-
-if st.secrets.load_if_toml_exists():
-    DROPBOX_TOKEN = st.secrets["DROPBOX_TOKEN"]
-    DROPBOX_APP_KEY = st.secrets["DROPBOX_APP_KEY"]
-    DROPBOX_APP_SECRET = st.secrets["DROPBOX_APP_SECRET"]
-    DROPBOX_REFRESH_TOKEN = st.secrets["DROPBOX_REFRESH_TOKEN"]
-else:  # load credentials from .env file
-    load_dotenv()
-    DROPBOX_TOKEN = os.getenv("DROPBOX_TOKEN")
-    DROPBOX_APP_KEY = os.getenv("DROPBOX_APP_KEY")
-    DROPBOX_APP_SECRET = os.getenv("DROPBOX_APP_SECRET")
-    DROPBOX_REFRESH_TOKEN = os.getenv("DROPBOX_REFRESH_TOKEN")
-
-
-def download_data(filename: str) -> None:
-    """Download data from dropbox."""
-    with dropbox.Dropbox(
-        oauth2_access_token=DROPBOX_TOKEN,
-        app_key=DROPBOX_APP_KEY,
-        app_secret=DROPBOX_APP_SECRET,
-        oauth2_refresh_token=DROPBOX_REFRESH_TOKEN,
-    ) as dbx:
-        try:
-            dbx.users_get_current_account()
-        except AuthError:
-            sys.exit("ERROR: Invalid access token; try re-generating an access token from the app console on the web.")
-
-        dbx.files_download_to_file(filename, f"/{filename}")
-
-
-def main() -> None:
-    """Streamlit application for showing group GitHub stats."""
-    download_data("latest_repo_data.csv")
-
-    df = pd.read_csv("latest_repo_data.csv")
-
-    # convert to column
-    df["contributions_per_contributor"] = df["contributions_per_contributor"].apply(
-        lambda x: ast.literal_eval(x) if pd.notnull(x) else x,
-    )
-    df["warnings_raised"] = df["warnings_raised"].apply(lambda x: 27 - x if pd.notnull(x) else x)
-    f = "%Y-%m-%dT%H:%M:%SZ"
-    df["latest_commit"] = df["latest_commit"].apply(
-        lambda x: datetime.strptime(x, f).astimezone(tz=datetime.UTC) if pd.notnull(x) else x,
-    )
-
-    # remove columns that are not needed
-    df1 = df[
-        [
-            "group_nb",
-            "num_students",
-            "num_contributors",
-            "total_commits",
-            "num_commits_to_main",
-            "contributions_per_contributor",
-            "num_prs",
-            "average_commit_message_length_to_main",
-            "average_commit_message_length",
-            "latest_commit",
-        ]
-    ]
-
-    df2 = df[
-        [
-            "group_nb",
-            "num_docker_files",
-            "num_workflow_files",
-            "has_requirement_file",
-            "has_makefile",
-            "has_cloudbuild",
-            "repo_size",
-            "readme_size",
-            "using_dvc",
-            "warnings_raised",
-        ]
-    ]
-
-    st.title("Group Github Stats")
-    st.text(
-        """
-        Below is shown automatic scraped data for all groups in the course. None of these stats directly contribute
-        towards you passing the course or not. Instead they can inform how you are doing in comparison to other groups,
-        and it can indirectly inform the us about how well you are using version control for collaborating on your
-        project.
-        """,
-    )
-
-    st.header("Base statistics")
-    st.dataframe(
-        df1,
-        column_config={
-            "group_nb": "Group Number",
-            "num_students": "Students",
-            "num_contributors": "Contributors",
-            "total_commits": "Total Commits",
-            "num_commits_to_main": "Commits to main",
-            "contributions_per_contributor": st.column_config.BarChartColumn("Contributions distribution"),
-            "num_prs": "Number of Pull Requests",
-            "average_commit_message_length_to_main": "ACML* (main)",
-            "average_commit_message_length": "ACML* (all)",
-            "latest_commit": st.column_config.DatetimeColumn("Latest commit"),
-        },
-        hide_index=True,
-    )
-    st.text("*ACML = Average Commit Message Length")
-
-    st.header("Content statistics")
-    st.dataframe(
-        df2,
-        column_config={
-            "group_nb": "Group Number",
-            "num_docker_files": "Docker files",
-            "num_workflow_files": "Workflow files",
-            "has_requirement_file": "Requirement file",
-            "has_makefile": "Makefile",
-            "has_cloudbuild": "Cloudbuild",
-            "repo_size": "Repository size",
-            "readme_size": "Readme size",
-            "using_dvc": "Using dvc",
-            "warnings_raised": st.column_config.ProgressColumn(
-                "Report completion",
-                help="Number of questions answered in exam report",
-                format="%d",
-                min_value=0,
-                max_value=27,
-            ),
-        },
-        hide_index=True,
-    )
-
-
-if __name__ == "__main__":
-    main()
diff --git a/tools/leaderboard_app/requirements.txt b/tools/leaderboard_app/requirements.txt
deleted file mode 100644
index 141ddcee2..000000000
--- a/tools/leaderboard_app/requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-streamlit >= 1.26.0
-dropbox >= 11.36.2
-python-dotenv >= 1.0.0
-pandas >= 2.0.3
diff --git a/tools/repo_scraper/repo_scraper.py b/tools/repo_scraper/repo_scraper.py
deleted file mode 100644
index 3af0103e2..000000000
--- a/tools/repo_scraper/repo_scraper.py
+++ /dev/null
@@ -1,375 +0,0 @@
-"""Tool for scraping student repos for information.
-
-Run locally with (from root folder)
-    python tools/repo_scraper/repo_scraper.py
-"""
-
-from __future__ import annotations
-
-import csv
-import datetime
-import os
-import shutil
-import sys
-from pathlib import Path
-from subprocess import PIPE, Popen
-
-import dropbox
-import requests
-from dotenv import load_dotenv
-from dropbox.exceptions import AuthError
-
-load_dotenv()
-DROPBOX_TOKEN = os.getenv("DROPBOX_TOKEN")
-DROPBOX_APP_KEY = os.getenv("DROPBOX_APP_KEY")
-DROPBOX_APP_SECRET = os.getenv("DROPBOX_APP_SECRET")
-DROPBOX_REFRESH_TOKEN = os.getenv("DROPBOX_REFRESH_TOKEN")
-GH_TOKEN = os.getenv("GITHUB_TOKEN") or os.getenv("GH_TOKEN")
-headers = {"Authorization": f"Bearer {GH_TOKEN}"}
-
-
-def process_data(data: list[list[str]]):
-    """Process the data from the csv file."""
-    # remove empty emails
-    new_data = []
-    for group in data:
-        group[0] = int(group[0])  # convert group number to int
-        new_data.append([group[0], len([g for g in group[1:-1] if g != ""]), group[-1]])
-    return new_data
-
-
-def load_data(filename: str) -> list[list[str]]:
-    """Load the data from the csv file."""
-    with open("latest_info.csv") as f:
-        csv_reader = csv.reader(f, delimiter=",")
-        content = []
-        for row in csv_reader:
-            content.append(row)
-
-        header = content.pop(0)
-        return process_data(content)
-
-
-def download_data(filename: str) -> None:
-    """Download specific file from dropbox."""
-    with dropbox.Dropbox(
-        oauth2_access_token=DROPBOX_TOKEN,
-        app_key=DROPBOX_APP_KEY,
-        app_secret=DROPBOX_APP_SECRET,
-        oauth2_refresh_token=DROPBOX_REFRESH_TOKEN,
-    ) as dbx:
-        try:
-            dbx.users_get_current_account()
-        except AuthError:
-            sys.exit("ERROR: Invalid access token; try re-generating an access token from the app console on the web.")
-
-        dbx.files_download_to_file(filename, f"/{filename}")
-
-
-def upload_data(filename: str) -> None:
-    """Upload specific file to dropbox."""
-    with dropbox.Dropbox(
-        oauth2_access_token=DROPBOX_TOKEN,
-        app_key=DROPBOX_APP_KEY,
-        app_secret=DROPBOX_APP_SECRET,
-        oauth2_refresh_token=DROPBOX_REFRESH_TOKEN,
-    ) as dbx:
-        try:
-            dbx.users_get_current_account()
-        except AuthError:
-            sys.exit("ERROR: Invalid access token; try re-generating an access token from the app console on the web.")
-
-        now = datetime.datetime.now(tz=datetime.UTC).strftime("%Y_%m_%d_%H_%M_%S")
-        with open(filename, "rb") as f:
-            dbx.files_upload(f.read(), f"/{now}_{filename}")
-        with open(filename, "rb") as f:
-            dbx.files_upload(f.read(), f"/latest_{filename}", mode=dropbox.files.WriteMode.overwrite)
-
-
-def reformat_repo(repo: str) -> str:
-    """Extract from the url the user id and repository name only."""
-    split = repo.split("/")
-    return f"{split[-2]}/{split[-1]}"
-
-
-def get_default_branch_name(repo: str) -> str:
-    """Get the default branch name of a GitHub repo."""
-    response = requests.get(f"https://api.github.com/repos/{repo}", headers=headers, timeout=100)
-    return response.json()["default_branch"]
-
-
-def get_content(branch: str, url: str, repo: str, current_path: str) -> None:
-    """Recursively download content from a GitHub repo."""
-    response = requests.get(url, headers=headers, timeout=100)
-    for file in response.json():
-        if file["type"] == "dir":
-            folder = file["name"]
-            os.system(f"cd {current_path} & mkdir {folder}")
-            get_content(branch, f"{url}/{folder}", repo, f"{current_path}/{folder}")
-        else:
-            path = file["path"]
-            os.system(f"cd {current_path} & curl -s -OL https://raw.githubusercontent.com/{repo}/{branch}/{path}")
-
-
-def get_content_recursive(url):
-    """Extract all content from a GitHub repo recursively."""
-    all_content = []
-    content = requests.get(url, headers=headers, timeout=10).json()
-    for c in content:
-        if "type" not in c:
-            continue
-        if c["type"] == "dir":
-            all_content += get_content_recursive(f"{url}/{c['name']}")
-        else:
-            all_content.append(c)
-    return all_content
-
-
-def write_to_file(filename, row, mode="a") -> None:
-    """Write to a local csv file."""
-    with open(filename, mode=mode, newline="") as f:
-        writer = csv.writer(f, delimiter=",")
-        writer.writerow(row)
-
-
-def clone_repos(formatted_data, out_folder, timeout_clone) -> None:
-    """Clone the repos of the students."""
-    print("====== Cloning repos ======")
-    for index, data in enumerate(formatted_data):
-        group_nb, _, repo = data
-        print(f"Cloning group {group_nb}, {index}/{len(formatted_data)}")
-        out = os.system(f"cd {out_folder} && timeout -v {timeout_clone} git clone -q {repo}")
-        clone_success = out == 0
-        folder_name = repo.split("/")[-1]
-        if clone_success:
-            os.system(f"cd {out_folder} && cp -r {folder_name} group_{group_nb} && rm -rf {folder_name}")
-        else:
-            if folder_name in os.listdir(out_folder):
-                shutil.rmtree(f"{out_folder}/{folder_name}")
-        data.append(clone_success)
-
-
-def extract_prs(repo: str) -> list[dict]:
-    """Extract all PRs from a GitHub repo."""
-    prs = []
-    page_counter = 1
-    while True:
-        prs_page = requests.get(
-            f"https://api.github.com/repos/{repo}/pulls",
-            headers=headers,
-            params={"state": "all", "page": page_counter, "per_page": 100},
-            timeout=100,
-        ).json()
-        if len(prs_page) == 0:
-            break
-        page_counter += 1
-        prs += prs_page
-    return prs
-
-
-def extract_commits(repo: str) -> list[dict]:
-    """Extract all commits from a GitHub repo."""
-    commits = []
-    page_counter = 1
-    while True:
-        commits_page = requests.get(
-            f"https://api.github.com/repos/{repo}/commits",
-            headers=headers,
-            params={"state": "all", "page": page_counter, "per_page": 100},
-            timeout=100,
-        ).json()
-        if len(commits_page) == 0:
-            break
-        page_counter += 1
-        commits += commits_page
-    return commits
-
-
-def get_stats_from_content(repo: str):
-    """Extract stats from the content of a GitHub repo."""
-    content = get_content_recursive(f"https://api.github.com/repos/{repo}/contents")
-    docker_files = [c for c in content if c["name"] == "Dockerfile" or ".dockerfile" in c["name"]]
-    num_docker_files = len(docker_files)
-    workflow_files = [c for c in content if c["path"].startswith(".github/workflows")]
-    num_workflow_files = len(workflow_files)
-    has_requirement_file = len([c for c in content if c["name"] == "requirements.txt"]) > 0
-    has_makefile = len([c for c in content if c["name"] == "Makefile"]) > 0
-    has_cloudbuild = len([c for c in content if "cloudbuild.yaml" in c["name"]]) > 0
-    return num_docker_files, num_workflow_files, has_requirement_file, has_makefile, has_cloudbuild
-
-
-def check_report(out_folder: str, group_nb: int) -> None | int:
-    """Check how many questions are answered in the report."""
-    warnings_raised = None
-    if "reports" in os.listdir(f"{out_folder}/group_{group_nb}"):
-        report_dir = os.listdir(f"{out_folder}/group_{group_nb}/reports")
-        if "README.md" in report_dir and "report.py" in report_dir:
-            p = Popen(
-                ["python", "report.py", "check"],
-                cwd=f"{out_folder}/group_{group_nb}/reports",
-                stdout=PIPE,
-                stderr=PIPE,
-                stdin=PIPE,
-            )
-            output = p.stderr.read()
-            warnings_raised = len(output.decode("utf-8").split("\n")[:-1:2])
-
-
-def main(
-    out_folder: str = "student_repos",
-    timeout_clone: str = "2m",
-) -> None:
-    """Extract group statistics from github."""
-    print("Getting the repository information")
-    if "latest_info.csv" not in os.listdir():
-        download_data("latest_info.csv")
-    formatted_data = load_data("latest_info.csv")
-
-    # loop for scraping the repository of each group
-    print("Cleaning out old data if needed")
-    if os.path.isdir(out_folder):  # non-empty folder, delete content
-        shutil.rmtree(out_folder)
-    os.makedirs(out_folder)
-
-    # clone repos
-    clone_repos(formatted_data, out_folder, timeout_clone)
-
-    # create file for data
-    write_to_file(
-        "repo_data.csv",
-        [
-            "group_nb",
-            "num_students",
-            "num_contributors",
-            "num_prs",
-            "num_commits_to_main",
-            "average_commit_message_length_to_main",
-            "latest_commit",
-            "average_commit_message_length",
-            "contributions_per_contributor",
-            "total_commits",
-            "num_docker_files",
-            "num_workflow_files",
-            "has_requirement_file",
-            "has_makefile",
-            "has_cloudbuild",
-            "repo_size",
-            "readme_size",
-            "using_dvc",
-            "warnings_raised",
-        ],
-        mode="w",
-    )
-
-    # extract info through API
-    print("====== Extracting info through API ======")
-    for index, (group_nb, num_students, repo, clone_success) in enumerate(formatted_data):
-        print(f"Processing group {group_nb}, {index}/{len(formatted_data)}")
-        repo = reformat_repo(repo)
-        exists = requests.get(f"https://api.github.com/repos/{repo}", headers=headers, timeout=100)
-        if exists.status_code == 200:
-            contributors = requests.get(
-                f"https://api.github.com/repos/{repo}/contributors",
-                headers=headers,
-                timeout=100,
-            ).json()
-            contributors = {c["login"]: {"contributions": c["contributions"], "commits_pr": 0} for c in contributors}
-            num_contributors = len(contributors)
-
-            prs = extract_prs(repo)
-            num_prs = len(prs)
-
-            commits = extract_commits(repo)
-            num_commits_to_main = len(commits)
-            commit_messages = [c["commit"]["message"] for c in commits]
-            average_commit_message_length_to_main = sum([len(c) for c in commit_messages]) / len(commit_messages)
-            latest_commit = commits[0]["commit"]["author"]["date"]
-
-            merged_prs = [p["number"] for p in prs if p["merged_at"] is not None]
-            for pr_num in merged_prs:
-                pr_commits = requests.get(
-                    f"https://api.github.com/repos/{repo}/pulls/{pr_num}/commits",
-                    headers=headers,
-                    params={"state": "all", "per_page": 100},
-                    timeout=100,
-                ).json()
-                commit_messages += [c["commit"]["message"] for c in pr_commits]
-                for comm in pr_commits:
-                    if (
-                        comm["committer"] is not None
-                        and "login" in comm["committer"]
-                        and comm["committer"]["login"] in contributors
-                    ):
-                        contributors[comm["committer"]["login"]]["commits_pr"] += 1
-            average_commit_message_length = sum([len(c) for c in commit_messages]) / len(commit_messages)
-
-            contributions_per_contributor = [c["contributions"] + c["commits_pr"] for c in contributors.values()]
-            total_commits = sum(contributions_per_contributor)
-
-            stats = get_stats_from_content(repo)
-            num_docker_files, num_workflow_files, has_requirement_file, has_makefile, has_cloudbuild = stats
-        else:
-            num_contributors = None
-            num_prs = None
-            num_commits_to_main = None
-            average_commit_message_length_to_main = None
-            latest_commit = None
-            average_commit_message_length = None
-            contributions_per_contributor = None
-            total_commits = None
-            num_docker_files = None
-            num_workflow_files = None
-            has_requirement_file = None
-            has_makefile = None
-            has_cloudbuild = None
-
-        if clone_success:
-            path = Path(f"{out_folder}/group_{group_nb}")
-            repo_size = sum([f.stat().st_size for f in path.glob("**/*") if f.is_file()]) / 1_048_576  # in MB
-
-            if "README.md" in os.listdir(f"{out_folder}/group_{group_nb}"):
-                with open(f"{out_folder}/group_{group_nb}/README.md") as f:
-                    content = f.read()
-                readme_size = len(content.split(" "))
-            else:
-                readme_size = None
-
-            using_dvc = ".dvc" in os.listdir(f"{out_folder}/group_{group_nb}")
-            warnings_raised = check_report(out_folder, group_nb)
-
-        else:
-            repo_size = None
-            readme_size = None
-            using_dvc = None
-            warnings_raised = None
-
-        write_to_file(
-            "repo_data.csv",
-            [
-                group_nb,
-                num_students,
-                num_contributors,
-                num_prs,
-                num_commits_to_main,
-                average_commit_message_length_to_main,
-                latest_commit,
-                average_commit_message_length,
-                contributions_per_contributor,
-                total_commits,
-                num_docker_files,
-                num_workflow_files,
-                has_requirement_file,
-                has_makefile,
-                has_cloudbuild,
-                repo_size,
-                readme_size,
-                using_dvc,
-                warnings_raised,
-            ],
-            mode="a",
-        )
-    upload_data("repo_data.csv")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/tools/repo_scraper/requirements.txt b/tools/repo_scraper/requirements.txt
deleted file mode 100644
index dc75d742b..000000000
--- a/tools/repo_scraper/requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-dropbox >= 11.36.2
-python-dotenv >= 1.0.0
-markdown >= 3.5.1
-click >= 8.1.7
diff --git a/tools/repo_stats/Dockerfile b/tools/repo_stats/Dockerfile
new file mode 100644
index 000000000..a81c28cd2
--- /dev/null
+++ b/tools/repo_stats/Dockerfile
@@ -0,0 +1,14 @@
+FROM python:3.11-slim
+
+EXPOSE $PORT
+
+WORKDIR /app
+
+COPY requirements.txt .
+COPY leaderboard.py .
+COPY models.py .
+
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Command to run the Streamlit application
+ENTRYPOINT ["sh", "-c", "streamlit run leaderboard.py --server.port=$PORT --server.address=0.0.0.0"]
diff --git a/tools/repo_stats/Makefile b/tools/repo_stats/Makefile
new file mode 100644
index 000000000..71551eb41
--- /dev/null
+++ b/tools/repo_stats/Makefile
@@ -0,0 +1,36 @@
+## This defines all targets as phony targets, i.e. targets that are always out of date
+## This is done to ensure that the commands are always executed, even if a file with the same name exists
+## See https://www.gnu.org/software/make/manual/html_node/Phony-Targets.html
+## Remove this if you want to use this Makefile for real targets
+.PHONY: *
+
+GCP_PROJECT_NAME = $(shell gcloud config get-value project)
+
+# construct service account key file name
+# upload as github secret and use in github actions
+service_account:
+	gcloud iam service-accounts create repo-stats-account \
+		--description="Service account for repo stats application" --display-name="repo-stats-account"
+	gcloud projects add-iam-policy-binding $(GCP_PROJECT_NAME) \
+		--member="serviceAccount:repo-stats-account@$(GCP_PROJECT_NAME).iam.gserviceaccount.com" \
+		--role="roles/storage.objectUser"
+		--project
+	gcloud iam service-accounts keys create service_account_key.json \
+		--iam-account=repo-stats-account@$(GCP_PROJECT_NAME).iam.gserviceaccount.com
+	echo service_account_key.json >> .gitignore
+
+# deploy the leaderboard service
+deploy_leaderboard:
+	gcloud run deploy repo-stats-leaderboard \
+		--source . \
+		--platform managed \
+		--region europe-west1 \
+		--allow-unauthenticated
+
+	gcloud run services add-iam-policy-binding \
+    	--region=europe-west1 \
+    	--member=allUsers \
+    	--role=roles/run.invoker \
+		repo-stats-leaderboard
+
+	gcloud run services describe repo-stats-leaderboard --region=europe-west1 --format="value(status.url)"
diff --git a/tools/repo_stats/leaderboard.py b/tools/repo_stats/leaderboard.py
new file mode 100644
index 000000000..f1313c595
--- /dev/null
+++ b/tools/repo_stats/leaderboard.py
@@ -0,0 +1,174 @@
+import base64
+import json
+from io import BytesIO
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import streamlit as st
+from dotenv import load_dotenv
+from google.cloud.storage import Client
+from models import RepoStats
+from PIL import Image
+
+load_dotenv()
+
+
+def download_data(file_name: str) -> None:
+    """Downloads the group-repository data from GCS."""
+    storage_client = Client()
+    bucket = storage_client.bucket("mlops_group_repository")
+    blob = bucket.blob(file_name)
+    blob.download_to_filename(file_name)
+
+
+def load_data(file_name: str) -> pd.DataFrame:
+    """Loads the group-repository data into a DataFrame."""
+    with Path(file_name).open() as f:
+        content = json.load(f)
+    repo_content = [RepoStats(**group) for group in content]
+    repo_content_dicts = [repo.model_dump() for repo in repo_content]
+    return pd.DataFrame(repo_content_dicts)
+
+
+def activity_to_image(activity_matrix: list[list[int]], scale_factor: int = 10) -> str:
+    """
+    Convert an activity matrix (N, 24) into an RGB image scaled up by a given factor.
+
+    Args:
+        activity_matrix (list[list[int]]): A 2D list of activity values.
+        scale_factor (int): Factor by which to scale up the image size.
+
+    Returns:
+        str: Base64-encoded PNG image string in "data:image/png;base64," format.
+    """
+    # Normalize the activity matrix to the range [0, 255].
+    array = np.array(activity_matrix, dtype=np.float32)
+    max_value = np.max(array)
+    if max_value > 0:
+        array = array / max_value * 255
+
+    # Create an RGB image: Green for activity, Black for no activity.
+    height, width = array.shape
+    rgb_array = np.zeros((height, width, 3), dtype=np.uint8)
+    rgb_array[:, :, 1] = array.astype(np.uint8)  # Green channel
+
+    # Scale up the image by the scale factor.
+    scaled_height, scaled_width = height * scale_factor, width * scale_factor
+    image = Image.fromarray(rgb_array, mode="RGB")
+    image = image.resize((scaled_width, scaled_height), Image.NEAREST)
+
+    # Convert the image to a Base64 string.
+    buffer = BytesIO()
+    image.save(buffer, format="PNG")
+    buffer.seek(0)
+    image_base64 = base64.b64encode(buffer.read()).decode("utf-8")
+
+    return f"data:image/png;base64,{image_base64}"
+
+
+def main() -> None:
+    """Main function for the leaderboard."""
+    download_data("repo_stats.json")
+    dataframe = load_data("repo_stats.json")
+    dataframe["num_warnings"] = dataframe["num_warnings"].apply(lambda x: 27 - x if pd.notnull(x) else x)
+    dataframe["activity_matrix"] = dataframe["activity_matrix"].apply(
+        lambda x: activity_to_image(x) if x is not None else x
+    )
+    st.set_page_config(layout="wide")
+    st.title("Group Github Stats")
+    st.text(
+        """
+        Below is shown automatic scraped data for all groups in the course. None of these stats directly contribute
+        towards you passing the course or not. Instead they can inform how you are doing in comparison to other groups,
+        and it can indirectly inform the us about how well you are using version control for collaborating on your
+        project.
+        """,
+    )
+
+    df_base = dataframe[
+        [
+            "group_number",
+            "group_size",
+            "num_contributors",
+            "total_commits",
+            "num_commits_to_main",
+            "contributions_per_contributor",
+            "num_prs",
+            "average_commit_length_to_main",
+            "average_commit_length",
+            "latest_commit",
+            "activity_matrix",
+        ]
+    ]
+
+    df_content = dataframe[
+        [
+            "group_number",
+            "num_python_files",
+            "num_docker_files",
+            "num_workflow_files",
+            "has_requirements_file",
+            "has_cloudbuild",
+            "using_dvc",
+            "repo_size",
+            "actions_passing",
+            "readme_length",
+            "num_warnings",
+        ]
+    ]
+
+    st.header("Base statistics")
+    st.dataframe(
+        df_base,
+        column_config={
+            "group_number": "Group Number",
+            "group_size": "Group Size",
+            "num_contributors": "Contributors",
+            "total_commits": "Total Commits",
+            "num_commits_to_main": "Commits to main",
+            "contributions_per_contributor": st.column_config.BarChartColumn("Contributions distribution"),
+            "num_prs": "PRs",
+            "average_commit_length_to_main": "ACML* (main)",
+            "average_commit_length": "ACML* (all)",
+            "latest_commit": st.column_config.DatetimeColumn("Latest commit"),
+            "activity_matrix": st.column_config.ImageColumn(
+                "Commit activity**",
+                width="medium",
+            ),
+        },
+        hide_index=True,
+    )
+    st.write("*ACML = Average commit message length")
+    st.write(
+        "**Activity matrix is a (N, 24) matrix where N is the number of days since the first commit."
+        " Each row represents the number of commits per hour for that day."
+    )
+    st.header("Content statistics")
+    st.dataframe(
+        df_content,
+        column_config={
+            "group_number": "Group Number",
+            "num_python_files": "Python files",
+            "num_docker_files": "Docker files",
+            "num_workflow_files": "Workflow files",
+            "has_requirements_file": "Requirement file",
+            "has_cloudbuild": "Cloudbuild",
+            "using_dvc": "Using dvc",
+            "repo_size": "Repository size",
+            "actions_passing": "Actions passing",
+            "readme_length": "Readme size",
+            "num_warnings": st.column_config.ProgressColumn(
+                "Report completion",
+                help="Number of questions answered in exam report",
+                format="%d",
+                min_value=0,
+                max_value=27,
+            ),
+        },
+        hide_index=True,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/repo_stats/models.py b/tools/repo_stats/models.py
new file mode 100644
index 000000000..2245b17a8
--- /dev/null
+++ b/tools/repo_stats/models.py
@@ -0,0 +1,282 @@
+import base64
+import os
+from pathlib import Path
+from subprocess import PIPE, Popen
+
+import markdown2
+import requests
+from dotenv import load_dotenv
+from pydantic import BaseModel
+
+load_dotenv()
+GH_TOKEN = os.getenv("GH_TOKEN") or os.getenv("GITHUB_TOKEN")
+headers = {"Authorization": f"Bearer {GH_TOKEN}"}
+
+
+class RepoStats(BaseModel):
+    """Model for repository statistics."""
+
+    group_number: int
+    group_size: int
+    num_contributors: int | None
+    num_prs: int | None
+    num_commits_to_main: int | None
+    average_commit_length_to_main: float | None
+    latest_commit: str | None
+    average_commit_length: float | None
+    contributions_per_contributor: list[int] | None
+    total_commits: int | None
+    activity_matrix: list[list[int]] | None
+
+    num_docker_files: int | None
+    num_python_files: int | None
+    num_workflow_files: int | None
+    has_requirements_file: bool | None
+    has_cloudbuild: bool | None
+    using_dvc: bool | None
+    repo_size: float | None
+    readme_length: int | None
+    actions_passing: bool | None
+
+    num_warnings: int | None
+
+
+class Contributor(BaseModel):
+    """Model for contributors."""
+
+    login: str
+    contributions: int
+    commits_pr: int
+
+    @property
+    def total_commits(self) -> int:
+        """Returns the total number of commits by the contributor."""
+        return self.contributions + self.commits_pr
+
+
+class Report(BaseModel):
+    """Model for the report."""
+
+    group_number: int
+    repo_api: str
+    default_branch: str
+    file_written: bool = False
+
+    def download_checker(self) -> None:
+        """Downloads the checker script from the repository."""
+        if not Path("report.py").exists():
+            url = "https://api.github.com/repos/SkafteNicki/dtu_mlops/contents/reports/report.py"
+            response = requests.get(url, headers=headers, timeout=100)
+            if response.status_code == 200:
+                content_base64 = response.json()["content"]
+                content_decoded = base64.b64decode(content_base64).decode("utf-8")
+                with open("report.py", "w", encoding="utf-8") as file:
+                    file.write(content_decoded)
+
+    def download_report(self) -> None:
+        """Downloads the report from the repository."""
+        if self.file_written:
+            return
+        url = f"{self.repo_api}/contents/reports/README.md"
+        response = requests.get(url, headers=headers, timeout=100).json()
+        if response.get("message") != "Not Found":
+            content_base64 = response["content"]
+            content_decoded = base64.b64decode(content_base64).decode("utf-8")
+            with open("README.md", "w", encoding="utf-8") as file:
+                file.write(content_decoded)
+            self.file_written = True
+        else:
+            self.file_written = False
+
+    @property
+    def check_answers(self) -> int | None:
+        """Returns the number of warnings in the report."""
+        self.download_checker()
+        self.download_report()
+        if self.file_written:
+            p = Popen(
+                ["python", "report.py", "check"],
+                cwd=".",
+                stdout=PIPE,
+                stderr=PIPE,
+                stdin=PIPE,
+            )
+            output = p.stderr.read()
+            return len(output.decode("utf-8").split("\n")[:-1:2])
+        return None
+
+
+class RepoContent(BaseModel):
+    """Model for repository content."""
+
+    group_number: int
+    repo_api: str
+    default_branch: str
+
+    @property
+    def file_tree(self):
+        """Returns the file tree of the repository."""
+        if hasattr(self, "_file_tree"):
+            return self._file_tree
+        branch_url = f"{self.repo_api}/git/refs/heads/{self.default_branch}"
+        branch_response = requests.get(branch_url, headers=headers, timeout=100).json()
+        tree_sha = branch_response["object"]["sha"]
+        tree_url = f"{self.repo_api}/git/trees/{tree_sha}?recursive=1"
+        tree_response = requests.get(tree_url, headers=headers, timeout=100).json()
+        self._file_tree = tree_response["tree"]
+        return self._file_tree
+
+    @property
+    def num_docker_files(self) -> int:
+        """Returns the number of Dockerfiles in the repository."""
+        return len([f for f in self.file_tree if "Dockerfile" in f["path"] or ".dockerfile" in f["path"]])
+
+    @property
+    def num_python_files(self) -> int:
+        """Returns the number of Python files in the repository."""
+        return len([f for f in self.file_tree if ".py" in f["path"]])
+
+    @property
+    def num_workflow_files(self) -> int:
+        """Returns the number of workflow files in the repository."""
+        return len([f for f in self.file_tree if ".yml" in f["path"]])
+
+    @property
+    def has_requirements_file(self) -> bool:
+        """Returns True if the repository has a requirements.txt file."""
+        return any("requirements.txt" in f["path"] for f in self.file_tree)
+
+    @property
+    def has_cloudbuild(self) -> bool:
+        """Returns True if the repository uses Google Cloud Build."""
+        return any("cloudbuild.yaml" in f["path"] for f in self.file_tree)
+
+    @property
+    def using_dvc(self) -> bool:
+        """Returns True if the repository uses DVC."""
+        return any(".dvc" in f["path"] for f in self.file_tree)
+
+    @property
+    def repo_size(self) -> float:
+        """Returns the size of the repository in MB."""
+        total_size_bytes = sum([f["size"] for f in self.file_tree if "size" in f])
+        return total_size_bytes / (1024**2)
+
+    @property
+    def readme_length(self) -> int:
+        """Returns the number of words in the README file."""
+        readme_url = f"{self.repo_api}/readme"
+        readme_response = requests.get(readme_url, headers=headers, timeout=100).json()
+        if "content" in readme_response:
+            content_base64 = readme_response["content"]
+            content_decoded = base64.b64decode(content_base64).decode("utf-8")
+            plain_text = markdown2.markdown(content_decoded, extras=["strip"])
+            return len(plain_text.split())
+        return 0
+
+    @property
+    def actions_passing(self) -> bool:
+        """Returns True if the GitHub Actions are passing."""
+        commit_url = f"{self.repo_api}/commits/{self.default_branch}"
+        commit_response = requests.get(commit_url, headers=headers, timeout=100).json()
+        latest_commit = commit_response["sha"]
+
+        workflow_url = f"{self.repo_api}/actions/runs?branch={self.default_branch}&event=push"
+        workflow_response = requests.get(workflow_url, headers=headers, timeout=100).json()
+        workflow_runs = workflow_response["workflow_runs"]
+
+        all_passing = True
+        for w_run in workflow_runs:
+            if w_run["head_sha"] == latest_commit and (
+                w_run["status"] != "completed" or w_run["conclusion"] != "success"
+            ):
+                all_passing = False
+                break
+        return all_passing
+
+
+class GroupInfo(BaseModel):
+    """Model for group information."""
+
+    group_number: int
+    student_1: str | None
+    student_2: str | None
+    student_3: str | None
+    student_4: str | None
+    student_5: str | None
+    repo_url: str
+
+    @property
+    def group_size(self) -> int:
+        """Returns the number of students in the group."""
+        return len(list(filter(None, [self.student_1, self.student_2, self.student_3, self.student_4, self.student_5])))
+
+    @property
+    def repo_api(self) -> str:
+        """Returns the API URL of the repository."""
+        split = self.repo_url.split("/")
+        return f"https://api.github.com/repos/{split[-2]}/{split[-1]}"
+
+    @property
+    def default_branch(self) -> str:
+        """Returns the default branch of the repository."""
+        if hasattr(self, "_default_branch"):
+            return self._default_branch
+        self._default_branch = requests.get(self.repo_api, headers=headers, timeout=100).json()["default_branch"]
+        return self._default_branch
+
+    @property
+    def repo_accessible(self) -> bool:
+        """Returns True if the repository is accessible."""
+        if hasattr(self, "_repo_accessible"):
+            return self._repo_accessible
+        self._repo_accessible = requests.head(self.repo_url, headers=headers, timeout=100).status_code == 200
+        return self._repo_accessible
+
+    @property
+    def contributors(self) -> list[Contributor]:
+        """Returns all contributors to the repository."""
+        if self.repo_accessible:
+            request = requests.get(f"{self.repo_api}/contributors", headers=headers, timeout=100).json()
+            return [Contributor(login=c["login"], contributions=c["contributions"], commits_pr=0) for c in request]
+        return None
+
+    @property
+    def prs(self):
+        """Returns all pull requests to the repository."""
+        if self.repo_accessible:
+            prs = []
+            page_counter = 1
+            while True:
+                request = requests.get(
+                    f"{self.repo_api}/pulls",
+                    headers=headers,
+                    timeout=100,
+                    params={"state": "all", "page": page_counter, "per_page": 100},
+                ).json()
+                if len(request) == 0:
+                    break
+                page_counter += 1
+                prs.extend(request)
+            return prs
+        return None
+
+    @property
+    def commits(self) -> list:
+        """Returns all commits to the default branch."""
+        if self.repo_accessible:
+            commits = []
+            page_counter = 1
+            while True:
+                request = requests.get(
+                    f"{self.repo_api}/commits",
+                    headers=headers,
+                    timeout=100,
+                    params={"page": page_counter, "per_page": 100},
+                ).json()
+                if len(request) == 0:
+                    break
+                page_counter += 1
+                commits.extend(request)
+            return commits
+        return None
diff --git a/tools/repo_stats/requirements.txt b/tools/repo_stats/requirements.txt
new file mode 100644
index 000000000..45f322845
--- /dev/null
+++ b/tools/repo_stats/requirements.txt
@@ -0,0 +1,10 @@
+typer==0.13.1
+python-dotenv==1.0.1
+markdown2==2.5.1
+google-cloud-storage==2.18.2
+pandas==2.2.3
+streamlit==1.40.1
+pydantic==2.9.2
+requests==2.32.3
+numpy==2.1.3
+pillow==11.0.0
diff --git a/tools/repo_stats/scraper.py b/tools/repo_stats/scraper.py
new file mode 100644
index 000000000..034e3a058
--- /dev/null
+++ b/tools/repo_stats/scraper.py
@@ -0,0 +1,220 @@
+import csv
+import datetime
+import json
+import logging
+import os
+from pathlib import Path
+
+import numpy as np
+import requests
+from dotenv import load_dotenv
+from google.cloud.storage import Client
+from models import GroupInfo, RepoContent, Report, RepoStats
+from typer import Typer
+
+load_dotenv()
+
+GH_TOKEN = os.getenv("GH_TOKEN") or os.getenv("GITHUB_TOKEN")
+headers = {"Authorization": f"Bearer {GH_TOKEN}"}
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+def upload_data(file_name: str) -> None:
+    """Uploads the repo stats data to GCS."""
+    storage_client = Client()
+    bucket = storage_client.bucket("mlops_group_repository")
+    blob = bucket.blob(file_name)
+    blob.upload_from_filename(file_name)
+
+
+def download_data(file_name: str) -> None:
+    """Downloads the group-repository data from GCS."""
+    storage_client = Client()
+    bucket = storage_client.bucket("mlops_group_repository")
+    blob = bucket.blob(file_name)
+    blob.download_to_filename(file_name)
+
+
+def load_data(file_name: str) -> list[GroupInfo]:
+    """Loads the group-repository data into a DataFrame."""
+    with Path(file_name).open() as f:
+        csv_reader = csv.reader(f, delimiter=",")
+        content = []
+        for i, row in enumerate(csv_reader):
+            if i == 0:  # Skip the header
+                continue
+            group = GroupInfo(
+                group_number=int(row[0]),
+                student_1=row[1] if row[1] != "" else None,
+                student_2=row[2] if row[2] != "" else None,
+                student_3=row[3] if row[3] != "" else None,
+                student_4=row[4] if row[4] != "" else None,
+                student_5=row[5] if row[5] != "" else None,
+                repo_url=row[6],
+            )
+            content.append(group)
+    return content
+
+
+def create_activity_matrix(commits: list, max_delta: int = 5, normalize: bool = True) -> list[list[int]]:
+    """Creates an activity matrix from the commits."""
+    commit_times = [datetime.datetime.fromisoformat(commit["commit"]["committer"]["date"][:-1]) for commit in commits]
+    commit_times.sort()
+
+    start_time = commit_times[0]
+    end_time = min(start_time + datetime.timedelta(weeks=max_delta), commit_times[-1])
+
+    num_days = (end_time - start_time).days + 1  # include last day
+
+    commit_matrix = np.zeros((num_days, 24), dtype=int)
+
+    for commit_time in commit_times:
+        if start_time <= commit_time <= end_time:
+            day_index = (commit_time - start_time).days
+            hour_index = commit_time.hour
+            commit_matrix[day_index, hour_index] += 1
+
+    return commit_matrix.tolist()
+
+
+app = Typer()
+
+
+@app.command()
+def main():
+    """Main function to scrape the group-repository data."""
+    logger.info("Getting group-repository information")
+    if "group_info.csv" not in os.listdir():
+        download_data("group_info.csv")
+    group_data = load_data("group_info.csv")
+    logger.info("Group-repository information loaded successfully")
+
+    repo_stats: list[RepoContent] = []
+    for index, group in enumerate(group_data):
+        logger.info(f"Processing group {group.group_number}, {index+1}/{len(group_data)}")
+
+        if group.repo_accessible:
+            contributors = group.contributors
+            num_contributors = len(contributors)
+
+            prs = group.prs
+            num_prs = len(prs)
+
+            commits = group.commits
+            num_commits_to_main = len(commits)
+            commit_messages = [c["commit"]["message"] for c in commits]
+            average_commit_length_to_main = sum([len(c) for c in commit_messages]) / len(commit_messages)
+            latest_commit = commits[0]["commit"]["author"]["date"]
+
+            merged_prs = [p["number"] for p in prs if p["merged_at"] is not None]
+            for pr_num in merged_prs:
+                pr_commits = requests.get(
+                    f"{group.repo_api}/pulls/{pr_num}/commits", headers=headers, timeout=100
+                ).json()
+                commit_messages += [c["commit"]["message"] for c in pr_commits]
+                for commit in pr_commits:
+                    for contributor in contributors:
+                        if (
+                            commit["committer"] is not None
+                            and "login" in commit["committer"]
+                            and contributor.login == commit["author"]["login"]
+                        ):
+                            contributor.commits_pr += 1
+                commits += pr_commits
+
+            activity_matrix = create_activity_matrix(commits)
+
+            average_commit_length = sum([len(c) for c in commit_messages]) / len(commit_messages)
+
+            contributions_per_contributor = [c.total_commits for c in contributors]
+            total_commits = sum(contributions_per_contributor)
+
+            repo_content = RepoContent(
+                group_number=group.group_number, repo_api=group.repo_api, default_branch=group.default_branch
+            )
+            num_docker_files = repo_content.num_docker_files
+            num_python_files = repo_content.num_python_files
+            num_workflow_files = repo_content.num_workflow_files
+            has_requirements_file = repo_content.has_requirements_file
+            has_cloudbuild = repo_content.has_cloudbuild
+            using_dvc = repo_content.using_dvc
+            repo_size = repo_content.repo_size
+            readme_length = repo_content.readme_length
+            actions_passing = repo_content.actions_passing
+
+            report = Report(
+                group_number=group.group_number, repo_api=group.repo_api, default_branch=group.default_branch
+            )
+            num_warnings = report.check_answers
+
+        else:
+            num_contributors = None
+            num_prs = None
+            num_commits_to_main = None
+            average_commit_length_to_main = None
+            latest_commit = None
+            average_commit_length = None
+            total_commits = None
+            contributions_per_contributor = None
+            total_commits = None
+            activity_matrix = None
+
+            num_docker_files = None
+            num_python_files = None
+            num_workflow_files = None
+            has_requirements_file = None
+            has_cloudbuild = None
+            using_dvc = None
+            repo_size = None
+            readme_length = None
+            actions_passing = None
+
+            num_warnings = None
+
+        repo_stat = RepoStats(
+            group_number=group.group_number,
+            group_size=group.group_size,
+            num_contributors=num_contributors,
+            num_prs=num_prs,
+            num_commits_to_main=num_commits_to_main,
+            average_commit_length_to_main=average_commit_length_to_main,
+            latest_commit=latest_commit,
+            average_commit_length=average_commit_length,
+            contributions_per_contributor=contributions_per_contributor,
+            total_commits=total_commits,
+            activity_matrix=activity_matrix,
+            num_docker_files=num_docker_files,
+            num_python_files=num_python_files,
+            num_workflow_files=num_workflow_files,
+            has_requirements_file=has_requirements_file,
+            has_cloudbuild=has_cloudbuild,
+            using_dvc=using_dvc,
+            repo_size=repo_size,
+            readme_length=readme_length,
+            actions_passing=actions_passing,
+            num_warnings=num_warnings,
+        )
+        repo_stats.append(repo_stat)
+
+    logger.info("Writing repo stats to file")
+    now = datetime.datetime.now(tz=datetime.UTC).strftime("%Y_%m_%d_%H_%M_%S")
+    filename = f"repo_stats_{now}.json"
+    with open("repo_stats.json", "w") as f:
+        json.dump([r.model_dump() for r in repo_stats], f)
+    with open(filename, "w") as f:
+        json.dump([r.model_dump() for r in repo_stats], f)
+
+    logger.info("Uploading repo stats to GCS")
+    upload_data("repo_stats.json")
+    upload_data(filename)
+
+    logger.info("Cleaning locally temp files")
+    Path("README.md").unlink()
+    Path("report.py").unlink()
+    Path(filename).unlink()
+
+
+if __name__ == "__main__":
+    app()