diff --git a/.github/workflows/black_check.yml b/.github/workflows/black_check.yml
new file mode 100644
index 00000000..542da966
--- /dev/null
+++ b/.github/workflows/black_check.yml
@@ -0,0 +1,26 @@
+name: Black
+
+on:
+ push:
+ branches: [ main, release-1.0.0]
+ pull_request:
+ branches: [ main, release-1.0.0]
+
+jobs:
+
+ check-black:
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v2
+ - name: Set up Python
+ uses: actions/setup-python@v2
+ with:
+ python-version: 3.11
+ - name: Install Black 24.10.0 - check setup.py if version matches
+ run: |
+ python -m pip install --upgrade pip
+ pip install black==24.10.0
+ - name: Run Black
+ run: |
+ black --check .
diff --git a/.github/workflows/py311_tests.yml b/.github/workflows/py311_tests.yml
new file mode 100644
index 00000000..7e186b22
--- /dev/null
+++ b/.github/workflows/py311_tests.yml
@@ -0,0 +1,26 @@
+name: Pytest
+
+on:
+ push:
+ branches: [ main, release-1.0.0]
+ pull_request:
+ branches: [ main, release-1.0.0]
+
+jobs:
+
+ build:
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v2
+ - name: Set up Python
+ uses: actions/setup-python@v2
+ with:
+ python-version: 3.11
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install -e .[test]
+ - name: Run tests
+ run: |
+ pytest
diff --git a/.gitignore b/.gitignore
index 4e583a04..2b3546e8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,5 @@
__pycache__/
*.py[cod]
-data
*.egg-info
export/datasets
tmp
@@ -12,15 +11,15 @@ env/
.venv
.vscode
build
+dist
venv/
wiki
-factgenie/annotations
-factgenie/generations
-factgenie/outputs
+factgenie/campaigns
factgenie/templates/campaigns/*
-!factgenie/templates/campaigns/*.*
+factgenie/data/datasets.yml
+factgenie/data/inputs
+factgenie/data/outputs
factgenie/config/config.yml
-factgenie/config/datasets.yml
factgenie/config/llm-eval
factgenie/config/llm-gen
factgenie/config/crowdsourcing
\ No newline at end of file
diff --git a/CONTRIBUTING b/CONTRIBUTING
index 81983a51..55668978 100644
--- a/CONTRIBUTING
+++ b/CONTRIBUTING
@@ -2,4 +2,4 @@
Thank you for considering contributing to **factgenie**!
-Please, see the 🌱 [Contributing](../../wiki/07-Contributing) page on our wiki for details.
+Please, see the 🌱 [Contributing](../../wiki/Contributing) page on our wiki for details.
diff --git a/Dockerfile b/Dockerfile
index f5fa9524..252c4d8c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -4,8 +4,9 @@ RUN mkdir -p /usr/src/factgenie
WORKDIR /usr/src/factgenie
COPY . /usr/src/factgenie
+RUN cp /usr/src/factgenie/factgenie/config/config_TEMPLATE.yml /usr/src/factgenie/factgenie/config/config.yml
RUN pip install -e .[deploy]
EXPOSE 80
-ENTRYPOINT ["gunicorn", "--env", "SCRIPT_NAME=", "-b", ":80", "-w", "1", "--threads", "2", "factgenie.cli:create_app()"]
+ENTRYPOINT ["gunicorn", "--env", "SCRIPT_NAME=", "-b", ":80", "-w", "1", "--threads", "8", "factgenie.bin.run:create_app()"]
\ No newline at end of file
diff --git a/README.md b/README.md
index d4f4ba2b..0c6998dc 100644
--- a/README.md
+++ b/README.md
@@ -3,12 +3,12 @@
factgenie
-![GitHub](https://img.shields.io/github/license/kasnerz/factgenie)
-![GitHub issues](https://img.shields.io/github/issues/kasnerz/factgenie)
-[![arXiv](https://img.shields.io/badge/arXiv-2407.17863-0175ac.svg)](https://arxiv.org/abs/2407.17863)
+![Github downloads](https://img.shields.io/github/downloads/kasnerz/factgenie/total)
+![PyPI](https://img.shields.io/pypi/v/factgenie)
+[![slack](https://img.shields.io/badge/slack-factgenie-0476ad.svg?logo=slack)](https://join.slack.com/t/factgenie/shared_invite/zt-2u180yy81-3zCR7mt8EOy55cxA5zhKyQ)
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
![Github stars](https://img.shields.io/github/stars/kasnerz/factgenie?style=social)
-
+
Annotate LLM outputs with a lightweight, self-hosted web application 🌈
@@ -17,14 +17,8 @@ Annotate LLM outputs with a lightweight, self-hosted web application 🌈
-## 📢 News
-- **25/10/2024** — We are preparing the first official release. Stay tuned!
-- **08/10/2024** — We added [step-by-step walkthrougs](../../wiki/00-Tutorials) on using factgenie for generating and annotating outputs for a dataset of basketball reports 🏀
-- **07/10/2024** — We removed the example datasets from the repository. Instead, you can find them in the _External Resources_ section in the _Manage data_ interface.
-- **24/09/2024** — We introduced a brand new factgenie logo!
-- **19/09/2024** — On the Analytics page, you can now see detailed statistics about annotations and compute inter-annotator agreement 📈
-- **16/09/2024** — You can now collect extra inputs from the annotators for each example using sliders and select boxes.
-- **16/09/2024** — We added an option to generate outputs for the inputs with LLMs directly within factgenie! 🦾
+## 📢 Changelog
+- **[1.0.0] - 2024-11-13**: The first official release 🎉
## 👉️ How can factgenie help you?
Outputs from large language models (LLMs) may contain errors: semantic, factual, and lexical.
@@ -42,39 +36,53 @@ Factgenie can provide you:
*What does factgenie **not help with** is collecting the data (we assume that you already have these), starting the crowdsourcing campaign (for that, you need to use a service such as [Prolific.com](https://prolific.com)) or running the LLM evaluators (for that, you need a local framework such as [Ollama](https://ollama.com) or a proprietary API).*
## 🏃 Quickstart
-Make sure you have Python 3 installed (the project is tested with Python 3.10).
+Make sure you have Python >=3.9 installed.
-After cloning the repository, the following commands install the package and start the web server:
+If you want to quickly try out factgenie, you can install the package from PyPI:
+```bash
+pip install factgenie
```
+
+However, the recommended approach for using factgenie is using an editable package:
+```bash
+git clone https://github.com/ufal/factgenie.git
+cd factgenie
pip install -e .[dev,deploy]
-factgenie run --host=127.0.0.1 --port 5000
```
+This approach will allow you to manually modify configuration files, write your own data classes and access generated files.
+
+After installing factgenie, use the following command to run the server on your local computer:
+```bash
+factgenie run --host=127.0.0.1 --port 8890
+```
+More information on how to set up factgenie is on [Github wiki](../../wiki/Setup).
## 💡 Usage guide
See the following **wiki pages** that that will guide you through various use-cases of factgenie:
-| Topic | Description |
-| ---------------------------------------------------------------------- | -------------------------------------------------- |
-| 🔧 [Setup](../../wiki/01-Setup) | How to install factgenie. |
-| 🗂️ [Data Management](../../wiki/02-Data-Management) | How to manage datasets and model outputs. |
-| 🤖 [LLM Annotations](../../wiki/03-LLM-Annotations) | How to annotate outputs using LLMs. |
-| 👥 [Crowdsourcing Annotations](../../wiki/04-Crowdsourcing-Annotations) | How to annotate outputs using human crowdworkers. |
-| ✍️ [Generating Outputs](../../wiki/05-Generating-Outputs) | How to generate outputs using LLMs. |
-| 📊 [Analyzing Annotations](../../wiki/06-Analyzing-Annotations) | How to obtain statistics on collected annotations. |
-| 🌱 [Contributing](../../wiki/07-Contributing) | How to contribute to factgenie. |
+| Topic | Description |
+| ------------------------------------------------------------------- | -------------------------------------------------- |
+| 🔧 [Setup](../../wiki/Setup) | How to install factgenie. |
+| 🗂️ [Data Management](../../wiki/Data-Management) | How to manage datasets and model outputs. |
+| 🤖 [LLM Annotations](../../wiki/LLM-Annotations) | How to annotate outputs using LLMs. |
+| 👥 [Crowdsourcing Annotations](../../wiki/Crowdsourcing-Annotations) | How to annotate outputs using human crowdworkers. |
+| ✍️ [Generating Outputs](../../wiki/Generating-Outputs) | How to generate outputs using LLMs. |
+| 📊 [Analyzing Annotations](../../wiki/Analyzing-Annotations) | How to obtain statistics on collected annotations. |
+| 💻 [Command Line Interface](../../wiki/CLI) | How to use factgenie command line interface. |
+| 🌱 [Contributing](../../wiki/Contributing) | How to contribute to factgenie. |
## 🔥 Tutorials
We also provide step-by-step walkthroughs showing how to employ factgenie on the [the dataset from the Shared Task in Evaluating Semantic Accuracy](https://github.com/ehudreiter/accuracySharedTask):
-| Tutorial | Description |
-| ------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------ |
-| [🏀 #1: Importing a custom dataset](../../wiki/00-Tutorials#-tutorial-1-importing-a-custom-dataset) | Loading the basketball statistics and model-generated basketball reports into the web interface. |
-| [💬 #2: Generating outputs](../../wiki/00-Tutorials#-tutorial-2-generating-outputs) | Using Llama 3.1 with Ollama for generating basketball reports. |
-| [📊 #3: Customizing data visualization](../../wiki/00-Tutorials#-tutorial-3-customizing-data-visualization) | Manually creating a custom dataset class for better data visualization. |
-| [🤖 #4: Annotating outputs with an LLM](../../wiki/00-Tutorials#-tutorial-4-annotating-outputs-with-an-llm) | Using GPT-4o for annotating errors in the basketball reports. |
-| [👨💼 #5: Annotating outputs with human annotators](../../wiki/00-Tutorials#-tutorial-5-annotating-outputs-with-human-annotators) | Using human annotators for annotating errors in the basketball reports. |
+| Tutorial | Description |
+| --------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------ |
+| [🏀 #1: Importing a custom dataset](../../wiki/Tutorials#-tutorial-1-importing-a-custom-dataset) | Loading the basketball statistics and model-generated basketball reports into the web interface. |
+| [💬 #2: Generating outputs](../../wiki/Tutorials#-tutorial-2-generating-outputs) | Using Llama 3.1 with Ollama for generating basketball reports. |
+| [📊 #3: Customizing data visualization](../../wiki/Tutorials#-tutorial-3-customizing-data-visualization) | Manually creating a custom dataset class for better data visualization. |
+| [🤖 #4: Annotating outputs with an LLM](../../wiki/Tutorials#-tutorial-4-annotating-outputs-with-an-llm) | Using GPT-4o for annotating errors in the basketball reports. |
+| [👨💼 #5: Annotating outputs with human annotators](../../wiki/Tutorials#-tutorial-5-annotating-outputs-with-human-annotators) | Using human annotators for annotating errors in the basketball reports. |
## 💬 Cite us
diff --git a/docker-compose.yml b/docker-compose.yml
index 8cbb9635..dbe25558 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,8 +1,29 @@
+# YOU NEED run once `curl http://localhost:11434/api/pull -d '{"name": "llama3.1:8b"}'`
+# after running `docker-compose up -d` from the repo root directory
+# in order to download the llama3.1:8b model which is the default model
+# we use in the example configurations for factgenie
services:
factgenie:
container_name: factgenie
image: factgenie
restart: on-failure
ports:
- - 8080:80
- build: ./factgenie
+ - 8890:80
+ build: ./
+
+ # Factgenie connects to LLM inference servers either OpenAI client or Ollama
+ # Demonstrates running ollama on CPU
+ # For GPU run ollama without Docker
+ # or look at https://hub.docker.com/r/ollama/ollama and follow the GPU instructions
+ ollama:
+ container_name: ollama
+ image: ollama/ollama
+ restart: on-failure
+ # We need to expose the port to your machine because you need to pull models for ollama
+ # before factgenie queries the ollama server to run inference for the model.
+ # E.g. curl http://localhost:11434/api/pull -d '{"name": "llama3.1:8b"}' to download the factgenie default LLM.
+ ports:
+ - 11434:11434
+
+
+
diff --git a/factgenie/__init__.py b/factgenie/__init__.py
index e2ac06b8..606511ae 100644
--- a/factgenie/__init__.py
+++ b/factgenie/__init__.py
@@ -2,29 +2,21 @@
PACKAGE_DIR = Path(__file__).parent
ROOT_DIR = PACKAGE_DIR.parent
+
TEMPLATES_DIR = PACKAGE_DIR / "templates"
STATIC_DIR = PACKAGE_DIR / "static"
-ANNOTATIONS_DIR = PACKAGE_DIR / "annotations"
-GENERATIONS_DIR = PACKAGE_DIR / "generations"
+CAMPAIGN_DIR = PACKAGE_DIR / "campaigns"
LLM_EVAL_CONFIG_DIR = PACKAGE_DIR / "config" / "llm-eval"
LLM_GEN_CONFIG_DIR = PACKAGE_DIR / "config" / "llm-gen"
CROWDSOURCING_CONFIG_DIR = PACKAGE_DIR / "config" / "crowdsourcing"
-DATA_DIR = PACKAGE_DIR / "data"
-OUTPUT_DIR = PACKAGE_DIR / "outputs"
+INPUT_DIR = PACKAGE_DIR / "data" / "inputs"
+OUTPUT_DIR = PACKAGE_DIR / "data" / "outputs"
+DATASET_CONFIG_PATH = PACKAGE_DIR / "data" / "datasets.yml"
RESOURCES_CONFIG_PATH = PACKAGE_DIR / "config" / "resources.yml"
-DATASET_CONFIG_PATH = PACKAGE_DIR / "config" / "datasets.yml"
-
-OLD_DATASET_CONFIG_PATH = PACKAGE_DIR / "loaders" / "datasets.yml"
-OLD_MAIN_CONFIG_PATH = PACKAGE_DIR / "config.yml"
MAIN_CONFIG_PATH = PACKAGE_DIR / "config" / "config.yml"
-if not MAIN_CONFIG_PATH.exists() and not OLD_MAIN_CONFIG_PATH.exists():
- raise ValueError(
- f"Invalid path to config.yml {MAIN_CONFIG_PATH=}. "
- "Please copy config_TEMPLATE.yml to config.yml "
- "and change the password, update the host prefix, etc."
- )
-
+MAIN_CONFIG_TEMPLATE_PATH = PACKAGE_DIR / "config" / "config_TEMPLATE.yml"
+DEFAULT_PROMPTS_CONFIG_PATH = PACKAGE_DIR / "config" / "default_prompts.yml"
PREVIEW_STUDY_ID = "factgenie_preview"
diff --git a/factgenie/analysis.py b/factgenie/analysis.py
index c483061a..5ca7f5dd 100644
--- a/factgenie/analysis.py
+++ b/factgenie/analysis.py
@@ -11,11 +11,12 @@
from scipy.stats import pearsonr
import sys
from pathlib import Path
+from slugify import slugify
import logging
import coloredlogs
-import factgenie.utils as utils
+import factgenie.workflows as workflows
-from factgenie.campaigns import ANNOTATIONS_DIR
+from factgenie import CAMPAIGN_DIR
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
@@ -23,42 +24,11 @@
# coloredlogs.install(level="INFO", logger=logger, fmt="%(asctime)s %(levelname)s %(message)s")
-def get_example_info(j, campaign_id):
- return {
- "annotator_id": j["annotator_id"],
- "annotator_group": j.get("annotator_group", 0),
- "campaign_id": campaign_id,
- "dataset": j["dataset"],
- "example_idx": j["example_idx"],
- "setup_id": j["setup_id"],
- "split": j["split"],
- "flags": j.get("flags", []),
- "options": j.get("options", []),
- "text_fields": j.get("text_fields", []),
- }
-
-
-def load_annotations(line, campaign_id):
- j = json.loads(line)
- annotation_records = []
-
- r = get_example_info(j, campaign_id)
-
- for annotation in j["annotations"]:
- r["annotation_type"] = int(annotation["type"])
- r["annotation_start"] = annotation["start"]
- r["annotation_text"] = annotation["text"]
-
- annotation_records.append(r.copy())
-
- return annotation_records
-
-
-def create_example_record(line, campaign_id, annotation_span_categories, annotation_records):
+def create_example_record(line, metadata, annotation_span_categories, annotation_records):
# a record is created even if there are no annotations
j = json.loads(line)
- example_record = get_example_info(j, campaign_id)
+ example_record = workflows.create_annotation_example_record(j)
for i, category in enumerate(annotation_span_categories):
example_record["cat_" + str(i)] = 0
@@ -83,21 +53,20 @@ def load_annotations_for_campaign(campaign):
annotation_index = []
example_index = []
- campaign_id = campaign.metadata["id"]
annotation_span_categories = campaign.metadata["config"]["annotation_span_categories"]
- jsonl_files = glob.glob(os.path.join(ANNOTATIONS_DIR, campaign_id, "files", "*.jsonl"))
+ jsonl_files = glob.glob(os.path.join(CAMPAIGN_DIR, campaign.metadata["id"], "files", "*.jsonl"))
for jsonl_file in jsonl_files:
with open(jsonl_file) as f:
lines = f.readlines()
for line in lines:
try:
- annotation_records = load_annotations(line, campaign_id)
+ annotation_records = workflows.load_annotations_from_record(line, split_spans=True)
annotation_index += annotation_records
example_record = create_example_record(
- line, campaign_id, annotation_span_categories, annotation_records
+ line, campaign.metadata, annotation_span_categories, annotation_records
)
example_index.append(example_record)
except Exception as e:
@@ -172,6 +141,7 @@ def compute_avg_ann_counts(ann_counts, example_index):
dataset = row["dataset"]
split = row["split"]
setup_id = row["setup_id"]
+
ann_counts.loc[i, "example_count"] = (
example_index[
(example_index["dataset"] == dataset)
@@ -205,7 +175,10 @@ def compute_prevalence(ann_counts, example_index):
& (example_index["cat_" + str(annotation_type)] > 0)
]
- ann_counts.loc[i, "prevalence"] = examples.shape[0] / row["example_count"]
+ if row["example_count"] == 0:
+ ann_counts.loc[i, "prevalence"] = 0
+ else:
+ ann_counts.loc[i, "prevalence"] = examples.shape[0] / row["example_count"]
# round to three decimal places
ann_counts["prevalence"] = ann_counts["prevalence"].round(3)
@@ -276,7 +249,7 @@ def compute_extra_fields_stats(example_index):
return extra_fields_stats
-def compute_statistics(app, campaign, datasets):
+def compute_statistics(app, campaign):
statistics = {}
annotation_index, example_index = load_annotations_for_campaign(campaign)
@@ -308,6 +281,11 @@ def compute_pearson_macro_average(counts, first_ann_idx, second_ann_idx):
for c, cat_counts in counts.items():
r, _ = pearsonr(cat_counts[first_ann_idx], cat_counts[second_ann_idx])
+
+ # if r is nan, set it to 0
+ if not r == r:
+ r = 0
+
coefficients.append(r)
return round(sum(coefficients) / len(coefficients), 2), [round(coeff, 2) for coeff in coefficients]
@@ -323,7 +301,9 @@ def compute_pearson_micro_average(counts, first_ann_idx, second_ann_idx):
return round(r, 2)
-def compute_pearson_correlation(dataset_level_counts, example_level_counts, annotator_count, annotator_group_ids):
+def compute_pearson_correlation(
+ dataset_level_counts, example_level_counts, annotator_count, annotator_group_ids, compute_dataset_level_corr
+):
results = []
for a in range(annotator_count):
@@ -331,14 +311,22 @@ def compute_pearson_correlation(dataset_level_counts, example_level_counts, anno
a_group_id = annotator_group_ids[a]
b_group_id = annotator_group_ids[b]
- r_data_macro, r_data_list = compute_pearson_macro_average(
- dataset_level_counts, first_ann_idx=a, second_ann_idx=b
- )
+ if compute_dataset_level_corr:
+ r_data_macro, r_data_list = compute_pearson_macro_average(
+ dataset_level_counts, first_ann_idx=a, second_ann_idx=b
+ )
+ else:
+ r_data_macro, r_data_list = None, None
+
r_example_macro, r_example_list = compute_pearson_macro_average(
example_level_counts, first_ann_idx=a, second_ann_idx=b
)
- r_data_micro = compute_pearson_micro_average(dataset_level_counts, first_ann_idx=a, second_ann_idx=b)
+ if compute_dataset_level_corr:
+ r_data_micro = compute_pearson_micro_average(dataset_level_counts, first_ann_idx=a, second_ann_idx=b)
+ else:
+ r_data_micro = None
+
r_example_micro = compute_pearson_micro_average(example_level_counts, first_ann_idx=a, second_ann_idx=b)
results.append(
@@ -432,7 +420,7 @@ def prepare_example_index(combinations, selected_campaigns, campaigns):
return example_index, annotator_count, annotator_group_ids, cat_columns
-def compute_inter_annotator_agreement(app, selected_campaigns, combinations, campaigns, datasets):
+def compute_inter_annotator_agreement(app, selected_campaigns, combinations, campaigns):
combinations = [(c["dataset"], c["split"], c["setup_id"]) for c in combinations]
example_index, annotator_count, annotator_group_ids, cat_columns = prepare_example_index(
@@ -442,12 +430,13 @@ def compute_inter_annotator_agreement(app, selected_campaigns, combinations, cam
dataset_level_counts, example_level_counts = compute_span_counts(
example_index=example_index, annotator_count=annotator_count, combinations=combinations, cat_columns=cat_columns
)
-
+ compute_dataset_level_corr = len(combinations) > 1
results = compute_pearson_correlation(
dataset_level_counts=dataset_level_counts,
example_level_counts=example_level_counts,
annotator_count=annotator_count,
annotator_group_ids=annotator_group_ids,
+ compute_dataset_level_corr=compute_dataset_level_corr,
)
return results
diff --git a/factgenie/main.py b/factgenie/app.py
old mode 100755
new mode 100644
similarity index 54%
rename from factgenie/main.py
rename to factgenie/app.py
index fdee9fc9..f324ef3f
--- a/factgenie/main.py
+++ b/factgenie/app.py
@@ -1,15 +1,14 @@
#!/usr/bin/env python3
import os
import json
-import time
import logging
-import pandas as pd
import time
import threading
import traceback
import shutil
import datetime
-import markdown
+import urllib.parse
+
from flask import (
Flask,
render_template,
@@ -18,32 +17,28 @@
Response,
make_response,
redirect,
- url_for,
send_from_directory,
)
-from collections import defaultdict
-import urllib.parse
from slugify import slugify
-from factgenie import PREVIEW_STUDY_ID
-from factgenie.campaigns import HumanCampaign, CampaignStatus, ExampleStatus, ANNOTATIONS_DIR, GENERATIONS_DIR
-from factgenie.models import ModelFactory
-from factgenie.loaders.dataset import get_dataset_classes
-import factgenie.utils as utils
+import factgenie.crowdsourcing as crowdsourcing
+import factgenie.llm_campaign as llm_campaign
+import factgenie.workflows as workflows
import factgenie.analysis as analysis
+import factgenie.utils as utils
-from werkzeug.middleware.proxy_fix import ProxyFix
-
-DIR_PATH = os.path.dirname(__file__)
-TEMPLATES_DIR = os.path.join(DIR_PATH, "templates")
-STATIC_DIR = os.path.join(DIR_PATH, "static")
+from factgenie import CAMPAIGN_DIR, TEMPLATES_DIR, STATIC_DIR, INPUT_DIR
+from factgenie.campaign import CampaignMode, CampaignStatus, ExampleStatus
+from factgenie.models import ModelFactory
+from werkzeug.middleware.proxy_fix import ProxyFix
app = Flask("factgenie", template_folder=TEMPLATES_DIR, static_folder=STATIC_DIR)
app.db = {}
-app.db["annotation_index"] = {}
+app.db["annotation_index"] = None
+app.db["output_index"] = None
app.db["lock"] = threading.Lock()
-app.db["threads"] = {}
+app.db["running_campaigns"] = set()
app.db["announcers"] = {}
app.wsgi_app = ProxyFix(app.wsgi_app, x_host=1)
@@ -98,8 +93,6 @@ def prettify_json(value):
# -----------------
# Decorators
# -----------------
-
-
# Very simple decorator to protect routes
def login_required(f):
def wrapper(*args, **kwargs):
@@ -124,20 +117,8 @@ def wrapper(*args, **kwargs):
@login_required
def index():
logger.info(f"Main page loaded")
-
- return render_template(
- "index.html",
- host_prefix=app.config["host_prefix"],
- )
-
-
-@app.route("/about", methods=["GET", "POST"])
-@login_required
-def about():
- logger.info(f"About page loaded")
-
return render_template(
- "about.html",
+ "pages/index.html",
host_prefix=app.config["host_prefix"],
)
@@ -145,12 +126,12 @@ def about():
@app.route("/analyze", methods=["GET", "POST"])
@login_required
def analyze():
- logger.info(f"Analysis page loaded")
-
- campaigns = utils.get_sorted_campaign_list(app, sources=["crowdsourcing", "llm_eval"])
+ campaigns = workflows.get_sorted_campaign_list(
+ app, modes=[CampaignMode.CROWDSOURCING, CampaignMode.LLM_EVAL, CampaignMode.EXTERNAL]
+ )
return render_template(
- "analyze.html",
+ "pages/analyze.html",
campaigns=campaigns,
host_prefix=app.config["host_prefix"],
)
@@ -159,41 +140,37 @@ def analyze():
@app.route("/analyze/detail/", methods=["GET", "POST"])
@login_required
def analyze_detail(campaign_id):
- source = request.args.get("source")
-
- campaign = utils.load_campaign(app, campaign_id=campaign_id, mode=source)
- datasets = utils.get_local_dataset_overview(app)
+ campaign = workflows.load_campaign(app, campaign_id=campaign_id)
- statistics = analysis.compute_statistics(app, campaign, datasets)
+ statistics = analysis.compute_statistics(app, campaign)
return render_template(
- "analyze_detail.html",
+ "pages/analyze_detail.html",
statistics=statistics,
campaign=campaign,
- source=source,
host_prefix=app.config["host_prefix"],
)
@app.route("/annotate/", methods=["GET", "POST"])
def annotate(campaign_id):
- logger.info(f"Annotate page loaded")
+ workflows.refresh_indexes(app)
- campaign = utils.load_campaign(app, campaign_id=campaign_id, mode="crowdsourcing")
+ # only for preview purposes, batch index is otherwise randomly generated
+ batch_idx = request.args.get("batch_idx", None)
+ campaign = workflows.load_campaign(app, campaign_id=campaign_id)
service = campaign.metadata["config"]["service"]
- service_ids = utils.get_service_ids(service, request.args)
+ service_ids = crowdsourcing.get_service_ids(service, request.args)
- db = campaign.db
metadata = campaign.metadata
- annotation_set = utils.get_annotator_batch(app, campaign, db, service_ids)
+ annotation_set = crowdsourcing.get_annotator_batch(app, campaign, service_ids, batch_idx=batch_idx)
if not annotation_set:
# no more available examples
return render_template(
- "campaigns/closed.html",
+ "crowdsourcing/closed.html",
host_prefix=app.config["host_prefix"],
- metadata=metadata,
)
return render_template(
@@ -205,14 +182,23 @@ def annotate(campaign_id):
)
+@app.route("/app_config", methods=["GET"])
+@login_required
+def app_config():
+ return render_template(
+ "pages/app_config.html",
+ app_config=app.config,
+ host_prefix=app.config["host_prefix"],
+ )
+
+
@app.route("/browse", methods=["GET", "POST"])
@login_required
def browse():
- utils.generate_annotation_index(app)
-
dataset_id = request.args.get("dataset")
split = request.args.get("split")
example_idx = request.args.get("example_idx")
+ setup_id = request.args.get("setup_id")
if dataset_id and split and example_idx:
display_example = {"dataset": dataset_id, "split": split, "example_idx": int(example_idx)}
@@ -220,32 +206,31 @@ def browse():
else:
display_example = None
- datasets = utils.get_local_dataset_overview(app)
+ workflows.refresh_indexes(app)
+ datasets = workflows.get_local_dataset_overview(app)
datasets = {k: v for k, v in datasets.items() if v["enabled"]}
if not datasets:
return render_template(
- "no_datasets.html",
+ "pages/no_datasets.html",
host_prefix=app.config["host_prefix"],
)
-
return render_template(
- "browse.html",
+ "pages/browse.html",
display_example=display_example,
+ highlight_setup_id=setup_id,
datasets=datasets,
host_prefix=app.config["host_prefix"],
- annotations=app.db["annotation_index"],
)
-@app.route("/clear_campaign", methods=["GET", "POST"])
+@app.route("/clear_campaign", methods=["POST"])
@login_required
def clear_campaign():
data = request.get_json()
campaign_id = data.get("campaignId")
- mode = data.get("mode")
- campaign = utils.load_campaign(app, campaign_id=campaign_id, mode=mode)
+ campaign = workflows.load_campaign(app, campaign_id=campaign_id)
campaign.clear_all_outputs()
return utils.success()
@@ -256,35 +241,24 @@ def clear_campaign():
def clear_output():
data = request.get_json()
campaign_id = data.get("campaignId")
- mode = data.get("mode")
idx = int(data.get("idx"))
+ annotator_group = int(data.get("annotatorGroup"))
- campaign = utils.load_campaign(app, campaign_id=campaign_id, mode=mode)
- campaign.clear_output(idx)
+ campaign = workflows.load_campaign(app, campaign_id=campaign_id)
+ campaign.clear_output(idx, annotator_group)
return utils.success()
@app.route("/crowdsourcing", methods=["GET", "POST"])
@login_required
-def crowdsourcing():
- logger.info(f"Crowdsourcing page loaded")
-
- campaign_index = utils.generate_campaign_index(app, force_reload=True)
-
- llm_configs = utils.load_configs(mode="llm_eval")
- crowdsourcing_configs = utils.load_configs(mode="crowdsourcing")
-
- campaigns = defaultdict(dict)
-
- for campaign_id, campaign in sorted(
- campaign_index["crowdsourcing"].items(), key=lambda x: x[1].metadata["created"], reverse=True
- ):
- campaigns[campaign_id]["metadata"] = campaign.metadata
- campaigns[campaign_id]["stats"] = campaign.get_stats()
+def crowdsourcing_page():
+ llm_configs = workflows.load_configs(mode=CampaignMode.LLM_EVAL)
+ crowdsourcing_configs = workflows.load_configs(mode=CampaignMode.CROWDSOURCING)
+ campaigns = workflows.get_sorted_campaign_list(app, modes=[CampaignMode.CROWDSOURCING])
return render_template(
- "crowdsourcing.html",
+ "pages/crowdsourcing.html",
campaigns=campaigns,
llm_configs=llm_configs,
crowdsourcing_configs=crowdsourcing_configs,
@@ -296,14 +270,13 @@ def crowdsourcing():
@app.route("/crowdsourcing/detail/", methods=["GET", "POST"])
@login_required
def crowdsourcing_detail(campaign_id):
- campaign = utils.load_campaign(app, campaign_id=campaign_id, mode="crowdsourcing")
-
+ campaign = workflows.load_campaign(app, campaign_id=campaign_id)
overview = campaign.get_overview()
stats = campaign.get_stats()
return render_template(
- "crowdsourcing_detail.html",
- mode="crowdsourcing",
+ "pages/crowdsourcing_detail.html",
+ mode=CampaignMode.CROWDSOURCING,
campaign_id=campaign_id,
overview=overview,
stats=stats,
@@ -321,34 +294,14 @@ def crowdsourcing_create():
campaign_data = data.get("campaignData")
config = data.get("config")
- config = utils.parse_crowdsourcing_config(config)
-
- # create a new directory
- if os.path.exists(os.path.join(ANNOTATIONS_DIR, campaign_id)):
- return jsonify({"error": "Campaign already exists"})
-
- os.makedirs(os.path.join(ANNOTATIONS_DIR, campaign_id, "files"), exist_ok=True)
-
- # create the annotation CSV
- db = utils.generate_campaign_db(app, campaign_data, config=config)
- db.to_csv(os.path.join(ANNOTATIONS_DIR, campaign_id, "db.csv"), index=False)
-
- # save metadata
- with open(os.path.join(ANNOTATIONS_DIR, campaign_id, "metadata.json"), "w") as f:
- json.dump(
- {
- "id": campaign_id,
- "source": "crowdsourcing",
- "config": config,
- "created": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
- },
- f,
- indent=4,
- )
+ config = crowdsourcing.parse_crowdsourcing_config(config)
- # prepare the crowdsourcing HTML page
- utils.create_crowdsourcing_page(campaign_id, config)
- utils.load_campaign(app, campaign_id=campaign_id, mode="crowdsourcing")
+ try:
+ crowdsourcing.create_crowdsourcing_campaign(app, campaign_id, config, campaign_data)
+ workflows.load_campaign(app, campaign_id=campaign_id)
+ except Exception as e:
+ traceback.print_exc()
+ return utils.error(f"Error while creating campaign: {e}")
return utils.success()
@@ -356,21 +309,21 @@ def crowdsourcing_create():
@app.route("/crowdsourcing/new", methods=["GET", "POST"])
@login_required
def crowdsourcing_new():
- datasets = utils.get_local_dataset_overview(app)
+ datasets = workflows.get_local_dataset_overview(app)
datasets = {k: v for k, v in datasets.items() if v["enabled"]}
- model_outs = utils.get_model_outputs_overview(app, datasets, non_empty=True)
-
- configs = utils.load_configs(mode="crowdsourcing")
+ available_data = workflows.get_model_outputs_overview(app, datasets)
+ configs = workflows.load_configs(mode=CampaignMode.CROWDSOURCING)
+ default_prompts = utils.load_default_prompts()
- campaign_index = utils.generate_campaign_index(app, force_reload=False)
- default_campaign_id = utils.generate_default_id(campaign_index=campaign_index["crowdsourcing"], prefix="campaign")
+ default_campaign_id = workflows.generate_default_id(app=app, mode=CampaignMode.CROWDSOURCING, prefix="campaign")
return render_template(
- "crowdsourcing_new.html",
+ "pages/crowdsourcing_new.html",
default_campaign_id=default_campaign_id,
+ default_prompts=default_prompts,
datasets=datasets,
- model_outs=model_outs,
+ available_data=available_data,
configs=configs,
host_prefix=app.config["host_prefix"],
)
@@ -383,42 +336,32 @@ def compute_agreement():
combinations = data.get("combinations")
selected_campaigns = data.get("selectedCampaigns")
- campaign_index = utils.generate_campaign_index(app, force_reload=True)
- # flatten the campaigns
- campaigns = {k: v for source in campaign_index.values() for k, v in source.items()}
-
- datasets = utils.get_local_dataset_overview(app)
+ campaign_index = workflows.generate_campaign_index(app, force_reload=True)
try:
results = analysis.compute_inter_annotator_agreement(
app,
selected_campaigns=selected_campaigns,
combinations=combinations,
- campaigns=campaigns,
- datasets=datasets,
+ campaigns=campaign_index,
)
return jsonify(results)
except Exception as e:
traceback.print_exc()
- return jsonify({"error": f"Error while computing agreement: {e}"})
+ return utils.error(f"Error while computing agreement: {e}")
@app.route("/delete_campaign", methods=["POST"])
@login_required
def delete_campaign():
data = request.get_json()
- campaign_name = data.get("campaignId")
- mode = data.get("mode")
-
- if mode == "llm_gen":
- target_dir = GENERATIONS_DIR
- else:
- target_dir = ANNOTATIONS_DIR
+ campaign_id = data.get("campaignId")
- shutil.rmtree(os.path.join(target_dir, campaign_name))
+ shutil.rmtree(os.path.join(CAMPAIGN_DIR, campaign_id))
+ symlink_dir = os.path.join(TEMPLATES_DIR, "campaigns", campaign_id)
- if os.path.exists(os.path.join(TEMPLATES_DIR, "campaigns", campaign_name)):
- shutil.rmtree(os.path.join(TEMPLATES_DIR, "campaigns", campaign_name))
+ if os.path.exists(symlink_dir):
+ shutil.rmtree(symlink_dir)
return utils.success()
@@ -428,8 +371,7 @@ def delete_campaign():
def delete_dataset():
data = request.get_json()
dataset_id = data.get("datasetId")
-
- utils.delete_dataset(app, dataset_id)
+ workflows.delete_dataset(app, dataset_id)
return utils.success()
@@ -445,7 +387,7 @@ def delete_model_outputs():
setup_id = data.get("setup_id")
dataset = app.db["datasets_obj"][dataset_id]
- utils.delete_model_outputs(dataset, split, setup_id)
+ workflows.delete_model_outputs(dataset, split, setup_id)
return utils.success()
@@ -457,10 +399,10 @@ def download_dataset():
dataset_id = data.get("datasetId")
try:
- utils.download_dataset(app, dataset_id)
+ workflows.download_dataset(app, dataset_id)
except Exception as e:
traceback.print_exc()
- return jsonify({"error": f"Error while downloading dataset: {e}"})
+ return jsonify({"error": f"Error while downloading dataset: {e.__class__.__name__}: {e}"})
return utils.success()
@@ -473,14 +415,14 @@ def duplicate_config():
mode_to = data.get("modeTo")
campaign_id = data.get("campaignId")
- campaign_index = utils.generate_campaign_index(app, force_reload=False)
+ campaign_index = workflows.generate_campaign_index(app, force_reload=False)
if mode_from == mode_to:
- campaign = campaign_index[mode_from][campaign_id]
+ campaign = campaign_index[campaign_id]
config = campaign.metadata["config"]
else:
# currently we only support copying the annotation_span_categories between modes
- campaign = campaign_index[mode_from][campaign_id]
+ campaign = campaign_index[campaign_id]
llm_config = campaign.metadata["config"]
config = {"annotation_span_categories": llm_config["annotation_span_categories"]}
@@ -496,7 +438,7 @@ def duplicate_eval():
campaign_id = data.get("campaignId")
new_campaign_id = slugify(data.get("newCampaignId"))
- ret = utils.duplicate_eval(app, mode, campaign_id, new_campaign_id)
+ ret = llm_campaign.duplicate_llm_campaign(app, mode, campaign_id, new_campaign_id)
return ret
@@ -505,48 +447,50 @@ def duplicate_eval():
def render_example():
dataset_id = request.args.get("dataset")
split = request.args.get("split")
- example_idx = int(request.args.get("example_idx"))
+ example_idx = max(int(request.args.get("example_idx")), 0)
+ setup_id = request.args.get("setup_id", None)
try:
- example_data = utils.get_example_data(app, dataset_id, split, example_idx)
+ example_data = workflows.get_example_data(app, dataset_id, split, example_idx, setup_id)
+
return jsonify(example_data)
except Exception as e:
traceback.print_exc()
logger.error(f"Error while getting example data: {e}")
logger.error(f"{dataset_id=}, {split=}, {example_idx=}")
- return jsonify({"error": f"Error\n\t{e}\nwhile getting example data: {dataset_id=}, {split=}, {example_idx=}"})
+ return utils.error(
+ f"Error\n\t{e.__class__.__name__}: {e}\nwhile getting example data: {dataset_id=}, {split=}, {example_idx=}"
+ )
@app.route("/export_campaign_outputs/", methods=["GET", "POST"])
@login_required
def export_campaign_outputs(campaign_id):
- mode = request.args.get("mode")
+ return workflows.export_campaign_outputs(campaign_id)
- return utils.export_campaign_outputs(app, mode, campaign_id)
-
-@app.route("/export_dataset", methods=["GET", "POST"])
+@app.route("/export_dataset", methods=["POST", "GET"])
@login_required
def export_dataset():
dataset_id = request.args.get("dataset_id")
- return utils.export_dataset(app, dataset_id)
+ return workflows.export_dataset(app, dataset_id)
-@app.route("/export_outputs", methods=["GET", "POST"])
+@app.route("/export_outputs", methods=["POST", "GET"])
@login_required
def export_outputs():
dataset_id = request.args.get("dataset")
split = request.args.get("split")
setup_id = request.args.get("setup_id")
- return utils.export_outputs(app, dataset_id, split, setup_id)
+ return workflows.export_outputs(app, dataset_id, split, setup_id)
-@app.route("/files/", methods=["GET"])
+@app.route("/files/", methods=["GET", "POST"])
def download_file(filename):
# serving external files for datasets
- return send_from_directory("data", filename)
+ return send_from_directory(INPUT_DIR, filename)
@app.route("/login", methods=["GET", "POST"])
@@ -561,32 +505,22 @@ def login():
return resp
else:
return "Login failed", 401
- return render_template("login.html", host_prefix=app.config["host_prefix"])
+ return render_template("pages/login.html", host_prefix=app.config["host_prefix"])
-@app.route("/llm_campaign", methods=["GET", "POST"])
+@app.route("/llm_eval", methods=["GET", "POST"])
+@app.route("/llm_gen", methods=["GET", "POST"])
@login_required
-def llm_campaign():
- logger.info(f"LLM campaign page loaded")
- mode = request.args.get("mode")
-
- if not mode:
- return "The `mode` argument was not specified", 404
-
- campaign_index = utils.generate_campaign_index(app)
- campaigns = defaultdict(dict)
+def llm_campaign_page():
+ mode = utils.get_mode_from_path(request.path)
- llm_configs = utils.load_configs(mode=mode)
- crowdsourcing_configs = utils.load_configs(mode="crowdsourcing")
+ campaigns = workflows.get_sorted_campaign_list(app, modes=[mode])
- for campaign_id, campaign in sorted(
- campaign_index[mode].items(), key=lambda x: x[1].metadata["created"], reverse=True
- ):
- campaigns[campaign_id]["metadata"] = campaign.metadata
- campaigns[campaign_id]["stats"] = campaign.get_stats()
+ llm_configs = workflows.load_configs(mode=mode)
+ crowdsourcing_configs = workflows.load_configs(mode=CampaignMode.CROWDSOURCING)
return render_template(
- f"llm_campaign.html",
+ f"pages/llm_campaign.html",
mode=mode,
llm_configs=llm_configs,
crowdsourcing_configs=crowdsourcing_configs,
@@ -595,30 +529,27 @@ def llm_campaign():
)
-@app.route("/llm_campaign/create", methods=["GET", "POST"])
+@app.route("/llm_eval/create", methods=["GET", "POST"])
+@app.route("/llm_gen/create", methods=["GET", "POST"])
@login_required
def llm_campaign_create():
- mode = request.args.get("mode")
-
- if not mode:
- return "The `mode` argument was not specified", 404
-
+ mode = utils.get_mode_from_path(request.path)
data = request.get_json()
campaign_id = slugify(data.get("campaignId"))
campaign_data = data.get("campaignData")
config = data.get("config")
- if mode == "llm_eval":
- config = utils.parse_llm_eval_config(config)
- elif mode == "llm_gen":
- config = utils.parse_llm_gen_config(config)
+ if mode == CampaignMode.LLM_EVAL:
+ config = llm_campaign.parse_llm_eval_config(config)
+ elif mode == CampaignMode.LLM_GEN:
+ config = llm_campaign.parse_llm_gen_config(config)
datasets = app.db["datasets_obj"]
try:
- utils.llm_campaign_new(mode, campaign_id, config, campaign_data, datasets)
- utils.load_campaign(app, campaign_id=campaign_id, mode=mode)
+ llm_campaign.create_llm_campaign(app, mode, campaign_id, config, campaign_data, datasets)
+ workflows.load_campaign(app, campaign_id=campaign_id)
except Exception as e:
traceback.print_exc()
return utils.error(f"Error while creating campaign: {e}")
@@ -626,25 +557,25 @@ def llm_campaign_create():
return utils.success()
-@app.route("/llm_campaign/detail/", methods=["GET", "POST"])
+@app.route("/llm_eval/detail/", methods=["GET", "POST"])
+@app.route("/llm_gen/detail/", methods=["GET", "POST"])
@login_required
def llm_campaign_detail(campaign_id):
- mode = request.args.get("mode")
-
- if not mode:
- return "The `mode` argument was not specified", 404
+ workflows.refresh_indexes(app)
- campaign = utils.load_campaign(app, campaign_id=campaign_id, mode=mode)
+ mode = utils.get_mode_from_path(request.path)
+ campaign = workflows.load_campaign(app, campaign_id=campaign_id)
if campaign.metadata["status"] == CampaignStatus.RUNNING and not app.db["announcers"].get(campaign_id):
campaign.metadata["status"] = CampaignStatus.IDLE
campaign.update_metadata()
overview = campaign.get_overview()
+
finished_examples = [x for x in overview if x["status"] == ExampleStatus.FINISHED]
return render_template(
- f"llm_campaign_detail.html",
+ f"pages/llm_campaign_detail.html",
mode=mode,
campaign_id=campaign_id,
overview=overview,
@@ -654,65 +585,69 @@ def llm_campaign_detail(campaign_id):
)
-@app.route("/llm_campaign/new", methods=["GET", "POST"])
+@app.route("/llm_eval/new", methods=["GET", "POST"])
+@app.route("/llm_gen/new", methods=["GET", "POST"])
@login_required
def llm_campaign_new():
- mode = request.args.get("mode")
+ mode = utils.get_mode_from_path(request.path)
- if not mode:
- return "The `mode` argument was not specified", 404
-
- datasets = utils.get_local_dataset_overview(app)
+ datasets = workflows.get_local_dataset_overview(app)
datasets = {k: v for k, v in datasets.items() if v["enabled"]}
- non_empty = True if mode == "llm_eval" else False
- model_outs = utils.get_model_outputs_overview(app, datasets, non_empty=non_empty)
+ if mode == CampaignMode.LLM_EVAL:
+ available_data = workflows.get_model_outputs_overview(app, datasets)
+ else:
+ available_data = workflows.get_available_data(app, datasets)
# get a list of available metrics
- llm_configs = utils.load_configs(mode=mode)
+ llm_configs = workflows.load_configs(mode=mode)
metric_types = list(ModelFactory.model_classes()[mode].keys())
- campaign_index = utils.generate_campaign_index(app, force_reload=False)
- default_campaign_id = utils.generate_default_id(campaign_index=campaign_index[mode], prefix=mode.replace("_", "-"))
+ default_campaign_id = workflows.generate_default_id(app, mode=mode, prefix=mode.replace("_", "-"))
+ default_prompts = utils.load_default_prompts()
return render_template(
- f"llm_campaign_new.html",
+ f"pages/llm_campaign_new.html",
mode=mode,
datasets=datasets,
default_campaign_id=default_campaign_id,
- model_outs=model_outs,
+ default_prompts=default_prompts,
+ available_data=available_data,
configs=llm_configs,
metric_types=metric_types,
host_prefix=app.config["host_prefix"],
)
-@app.route("/llm_campaign/run", methods=["POST"])
+@app.route("/llm_eval/run", methods=["POST"])
+@app.route("/llm_gen/run", methods=["POST"])
@login_required
def llm_campaign_run():
- mode = request.args.get("mode")
-
- if not mode:
- return "The `mode` argument was not specified", 404
-
+ mode = utils.get_mode_from_path(request.path)
data = request.get_json()
campaign_id = data.get("campaignId")
app.db["announcers"][campaign_id] = announcer = utils.MessageAnnouncer()
-
- app.db["threads"][campaign_id] = {
- "running": True,
- }
+ app.db["running_campaigns"].add(campaign_id)
try:
- campaign = utils.load_campaign(app, campaign_id=campaign_id, mode=mode)
- threads = app.db["threads"]
+ campaign = workflows.load_campaign(app, campaign_id=campaign_id)
datasets = app.db["datasets_obj"]
config = campaign.metadata["config"]
model = ModelFactory.from_config(config, mode=mode)
+ running_campaigns = app.db["running_campaigns"]
+
+ ret = llm_campaign.run_llm_campaign(
+ app, mode, campaign_id, announcer, campaign, datasets, model, running_campaigns
+ )
+
+ if hasattr(ret, "error"):
+ llm_campaign.pause_llm_campaign(app, campaign_id)
+ return utils.error(f"Error while running campaign: {ret.error}")
+ else:
+ return ret
- return utils.run_llm_campaign(mode, campaign_id, announcer, campaign, datasets, model, threads)
except Exception as e:
traceback.print_exc()
return utils.error(f"Error while running campaign: {e}")
@@ -722,20 +657,19 @@ def llm_campaign_run():
@login_required
def llm_campaign_update_config():
data = request.get_json()
- mode = request.args.get("mode")
campaign_id = data.get("campaignId")
config = data.get("config")
- config = utils.parse_campaign_config(config)
- campaign = utils.load_campaign(app, campaign_id=campaign_id, mode=mode)
+ config = llm_campaign.parse_campaign_config(config)
+ campaign = workflows.load_campaign(app, campaign_id=campaign_id)
campaign.metadata["config"] = config
campaign.update_metadata()
return utils.success()
-@app.route("/llm_campaign/progress/", methods=["GET"])
+@app.route("/llm_campaign/progress/", methods=["GET", "POST"])
@login_required
def listen(campaign_id):
if not app.db["announcers"].get(campaign_id):
@@ -753,60 +687,37 @@ def stream():
@app.route("/llm_campaign/pause", methods=["POST"])
@login_required
def llm_campaign_pause():
- mode = request.args.get("mode")
-
- if not mode:
- return "The `mode` argument was not specified", 404
-
data = request.get_json()
campaign_id = data.get("campaignId")
- app.db["threads"][campaign_id]["running"] = False
- campaign = utils.load_campaign(app, campaign_id=campaign_id, mode=mode)
- campaign.metadata["status"] = CampaignStatus.IDLE
- campaign.update_metadata()
+ llm_campaign.pause_llm_campaign(app, campaign_id)
- resp = jsonify(success=True, status=campaign.metadata["status"])
+ resp = jsonify(success=True, status=CampaignStatus.IDLE)
return resp
-@app.route("/llm_eval/detail/", methods=["GET", "POST"])
-@login_required
-def llm_eval(campaign_id):
-
- # redirect to /llm_campaign with the mode set to llm_eval, keeping the campaign_id
- return redirect(f"{app.config['host_prefix']}/llm_campaign/detail/{campaign_id}?mode=llm_eval")
-
-
-@app.route("/llm_gen/detail/", methods=["GET", "POST"])
-@login_required
-def llm_gen(campaign_id):
- # redirect to /llm_campaign with the mode set to llm_gen, keeping the campaign_id
- return redirect(f"{app.config['host_prefix']}/llm_campaign/detail/{campaign_id}?mode=llm_gen")
-
-
@app.route("/manage", methods=["GET", "POST"])
@login_required
def manage():
- datasets = utils.get_local_dataset_overview(app)
- dataset_classes = list(get_dataset_classes().keys())
+ datasets = workflows.get_local_dataset_overview(app)
datasets_enabled = {k: v for k, v in datasets.items() if v["enabled"]}
- model_outputs = utils.get_model_outputs_overview(app, datasets_enabled)
+ model_outputs = workflows.get_model_outputs_overview(app, datasets_enabled)
- datasets_for_download = utils.get_datasets_for_download(app)
+ resources = utils.load_resources_config()
# set as `downloaded` the datasets that are already downloaded
- for dataset_id in datasets_for_download.keys():
- datasets_for_download[dataset_id]["downloaded"] = dataset_id in datasets
+ for dataset_id in resources.keys():
+ resources[dataset_id]["downloaded"] = dataset_id in datasets
- campaigns = utils.get_sorted_campaign_list(app, sources=["crowdsourcing", "llm_eval", "llm_gen", "external"])
+ campaigns = workflows.get_sorted_campaign_list(
+ app, modes=[CampaignMode.CROWDSOURCING, CampaignMode.LLM_EVAL, CampaignMode.LLM_GEN, CampaignMode.EXTERNAL]
+ )
return render_template(
- "manage.html",
+ "pages/manage.html",
datasets=datasets,
- dataset_classes=dataset_classes,
- datasets_for_download=datasets_for_download,
+ resources=resources,
host_prefix=app.config["host_prefix"],
model_outputs=model_outputs,
campaigns=campaigns,
@@ -820,14 +731,14 @@ def save_config():
config = data.get("config")
mode = data.get("mode")
- if mode == "llm_eval":
- config = utils.parse_llm_eval_config(config)
- elif mode == "llm_gen":
- config = utils.parse_llm_gen_config(config)
- elif mode == "crowdsourcing":
- config = utils.parse_crowdsourcing_config(config)
+ if mode == CampaignMode.LLM_EVAL:
+ config = llm_campaign.parse_llm_eval_config(config)
+ elif mode == CampaignMode.LLM_GEN:
+ config = llm_campaign.parse_llm_gen_config(config)
+ elif mode == CampaignMode.CROWDSOURCING:
+ config = crowdsourcing.parse_crowdsourcing_config(config)
else:
- return jsonify({"error": f"Invalid mode: {mode}"})
+ return utils.error(f"Invalid mode: {mode}")
utils.save_config(filename, config, mode=mode)
@@ -841,55 +752,21 @@ def save_generation_outputs():
campaign_id = data.get("campaignId")
model_name = slugify(data.get("modelName"))
- utils.save_generation_outputs(app, campaign_id, model_name)
+ llm_campaign.save_generation_outputs(app, campaign_id, model_name)
return utils.success()
@app.route("/submit_annotations", methods=["POST"])
def submit_annotations():
- logger.info(f"Received annotations")
data = request.get_json()
campaign_id = data["campaign_id"]
annotation_set = data["annotation_set"]
annotator_id = data["annotator_id"]
- now = int(time.time())
-
- save_dir = os.path.join(ANNOTATIONS_DIR, campaign_id, "files")
- os.makedirs(save_dir, exist_ok=True)
- campaign = utils.load_campaign(app, campaign_id=campaign_id, mode="crowdsourcing")
-
- with app.db["lock"]:
- db = campaign.db
- batch_idx = annotation_set[0]["batch_idx"]
-
- # if the batch is not assigned to this annotator, return an error
- batch_annotator_id = db.loc[db["batch_idx"] == batch_idx, "annotator_id"].iloc[0]
- if batch_annotator_id != annotator_id and annotator_id != PREVIEW_STUDY_ID:
- logger.info(
- f"Annotations rejected: batch {batch_idx} in {campaign_id} not assigned to annotator {annotator_id}"
- )
- return utils.error(f"Batch not assigned to annotator {annotator_id}")
+ logger.info(f"Received annotations for {campaign_id} by {annotator_id}")
- with open(os.path.join(save_dir, f"{batch_idx}-{annotator_id}-{now}.jsonl"), "w") as f:
- for row in annotation_set:
- f.write(json.dumps(row) + "\n")
-
- db.loc[db["batch_idx"] == batch_idx, "status"] = ExampleStatus.FINISHED
- db.loc[db["batch_idx"] == batch_idx, "end"] = now
-
- campaign.update_db(db)
- logger.info(f"Annotations for {campaign_id} (batch {batch_idx}) saved")
-
- final_message_html = markdown.markdown(campaign.metadata["config"]["final_message"])
-
- if annotator_id == PREVIEW_STUDY_ID:
- preview_message = f'
You are in a preview mode. Click here to go back to the campaign view.
This message will not be displayed to the annotators.
'
-
- return utils.success(message=final_message_html + preview_message)
-
- return utils.success(message=final_message_html)
+ return crowdsourcing.save_annotations(app, campaign_id, annotation_set, annotator_id)
@app.route("/set_dataset_enabled", methods=["POST"])
@@ -899,26 +776,39 @@ def set_dataset_enabled():
dataset_id = data.get("datasetId")
enabled = data.get("enabled")
- utils.set_dataset_enabled(app, dataset_id, enabled)
+ workflows.set_dataset_enabled(app, dataset_id, enabled)
return utils.success()
+@app.route("/update_config", methods=["POST"])
+@login_required
+def update_config():
+ try:
+ data = request.get_json()
+ app.config.update(data)
+ utils.save_app_config(data)
+ return utils.success()
+ except Exception as e:
+ traceback.print_exc()
+ return utils.error(f"Error while updating config: {e.__class__.__name__}: {e}")
+
+
@app.route("/upload_dataset", methods=["POST"])
@login_required
def upload_dataset():
data = request.get_json()
- dataset_id = data.get("id")
+ dataset_id = slugify(data.get("name"))
+ dataset_name = data.get("name")
dataset_description = data.get("description")
dataset_format = data.get("format")
dataset_data = data.get("dataset")
- # Process each file in the dataset
try:
- utils.upload_dataset(app, dataset_id, dataset_description, dataset_format, dataset_data)
+ workflows.upload_dataset(app, dataset_id, dataset_name, dataset_description, dataset_format, dataset_data)
except Exception as e:
traceback.print_exc()
- return jsonify({"error": f"Error while uploading dataset: {e}"})
+ return utils.error(f"Error while uploading dataset: {e}")
return utils.success()
@@ -936,9 +826,9 @@ def upload_model_outputs():
dataset = app.db["datasets_obj"][dataset_id]
try:
- utils.upload_model_outputs(dataset, split, setup_id, model_outputs)
+ workflows.upload_model_outputs(dataset, split, setup_id, model_outputs)
except Exception as e:
traceback.print_exc()
- return jsonify({"error": f"Error while adding model outputs: {e}"})
+ return utils.error(f"Error while adding model outputs: {e}")
return utils.success()
diff --git a/factgenie/bin/__init__.py b/factgenie/bin/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/factgenie/bin/run.py b/factgenie/bin/run.py
new file mode 100644
index 00000000..af275e13
--- /dev/null
+++ b/factgenie/bin/run.py
@@ -0,0 +1,358 @@
+#!/usr/bin/env python3
+
+# The run.py module is CLI entry point.
+# The local imports in individual functions make CLI way faster.
+# Use them as much as possible and minimize imports at the top of the file.
+import click
+from flask.cli import FlaskGroup
+from factgenie.app import app
+from factgenie.campaign import CampaignMode # required because of the click args choices
+
+
+def list_datasets(app):
+ """List locally available datasets."""
+ from factgenie.workflows import get_local_dataset_overview
+
+ dataset_overview = get_local_dataset_overview(app)
+
+ for dataset_id in dataset_overview:
+ print(dataset_id)
+
+
+def list_downloadable(app):
+ from factgenie import workflows, utils
+
+ datasets = workflows.get_local_dataset_overview(app)
+
+ resources = utils.load_resources_config()
+
+ # set as `downloaded` the datasets that are already downloaded
+ for dataset_id in resources.keys():
+ resources[dataset_id]["downloaded"] = dataset_id in datasets
+
+ for dataset_id, dataset_info in resources.items():
+ print(f"{dataset_id} - downloaded: {dataset_info['downloaded']}")
+
+
+def list_outputs(app):
+ """List all available outputs."""
+ from factgenie.workflows import get_model_outputs_overview
+
+ model_outputs = get_model_outputs_overview(app, datasets=None)
+
+ max_dataset_len = max(len(combination["dataset"]) for combination in model_outputs) + 2
+ max_split_len = max(len(combination["split"]) for combination in model_outputs) + 2
+ max_setup_id_len = max(len(combination["setup_id"]) for combination in model_outputs) + 2
+ max_output_ids_len = max(len(str(len(combination["output_ids"]))) for combination in model_outputs) + 2
+
+ # Print the header with computed lengths
+ print(
+ f"{'Dataset':>{max_dataset_len}} {'Split':>{max_split_len}} {'Setup ID':>{max_setup_id_len}} {'# Outputs':>{max_output_ids_len}}"
+ )
+ print("-" * (max_dataset_len + max_split_len + max_setup_id_len + max_output_ids_len + 3))
+
+ # Print each combination with computed lengths
+ for combination in model_outputs:
+ print(
+ f"{combination['dataset']:>{max_dataset_len}} {combination['split']:>{max_split_len}} {combination['setup_id']:>{max_setup_id_len}}"
+ f" {len(combination['output_ids']):>{max_output_ids_len}}"
+ )
+
+
+def list_campaigns(app):
+ """List all available campaigns."""
+ from factgenie.workflows import get_sorted_campaign_list
+ from pprint import pprint as pp
+
+ campaigns = get_sorted_campaign_list(
+ app, modes=[CampaignMode.CROWDSOURCING, CampaignMode.LLM_EVAL, CampaignMode.LLM_GEN, CampaignMode.EXTERNAL]
+ )
+
+ for campaign_id in campaigns.keys():
+ print(campaign_id)
+
+
+@app.cli.command("list")
+@click.argument("output", type=click.Choice(["datasets", "outputs", "campaigns", "downloadable"]))
+def list_data(output: str):
+ """List available data."""
+ if output == "datasets":
+ list_datasets(app)
+ elif output == "outputs":
+ list_outputs(app)
+ elif output == "campaigns":
+ list_campaigns(app)
+ elif output == "downloadable":
+ list_downloadable(app)
+ else:
+ click.echo(list_data.get_help(click.Context(list_data)))
+
+
+def show_dataset_info(app, dataset_id: str):
+ """Show information about a dataset."""
+
+ from factgenie.workflows import get_local_dataset_overview
+
+ dataset_overview = get_local_dataset_overview(app)
+ dataset_info = dataset_overview.get(dataset_id)
+
+ if dataset_info is None:
+ print(f"Dataset {dataset_id} not found.")
+
+ print(f"{'id:':>15} {dataset_id}")
+
+ for key, value in dataset_info.items():
+ print(f"{key:>15}: {value}")
+
+
+def show_campaign_info(app, campaign_id: str):
+ """Show information about a campaign."""
+ from factgenie.workflows import load_campaign
+ from pprint import pprint as pp
+
+ campaign = load_campaign(app, campaign_id)
+
+ if campaign is None:
+ print(f"Campaign {campaign_id} not found.")
+
+ pp({"metadata": campaign.metadata, "stats": campaign.get_stats()})
+
+
+@app.cli.command("info")
+@click.option("-d", "--dataset", type=str, help="Show information about a dataset.")
+@click.option("-c", "--campaign", type=str, help="Show information about a campaign.")
+def info(dataset: str, campaign: str):
+ """Show information about a dataset or campaign."""
+ if dataset:
+ show_dataset_info(app, dataset)
+ elif campaign:
+ show_campaign_info(app, campaign)
+ else:
+ click.echo(info.get_help(click.Context(info)))
+
+
+@app.cli.command("download")
+@click.option(
+ "-d",
+ "--dataset_id",
+ type=str,
+ help=(
+ "Download dataset input data. "
+ "Factgenie does not use references so the inputs define the datasets. "
+ "If the dataset class defines model outputs and annotations we download them too."
+ ),
+)
+def download_data(dataset_id: str):
+ import factgenie.workflows as workflows
+
+ if dataset_id:
+ workflows.download_dataset(app, dataset_id)
+ else:
+ click.echo(info.get_help(click.Context(info)))
+
+
+@app.cli.command("create_llm_campaign")
+@click.argument(
+ "campaign_id",
+ type=str,
+)
+@click.option("-d", "--dataset_ids", required=True, type=str, help="Comma separated dataset identifiers.")
+@click.option("-s", "--splits", required=True, type=str, help="Comma separated setups.")
+@click.option("-o", "--setup_ids", type=str, help="Comma separated setup ids.")
+@click.option("-m", "--mode", required=True, type=click.Choice([CampaignMode.LLM_EVAL, CampaignMode.LLM_GEN]))
+@click.option(
+ "-c",
+ "--config_file",
+ required=True,
+ type=str,
+ help="Path to the YAML configuration file / name of an existing config (without file suffix).",
+)
+@click.option("-f", "--overwrite", is_flag=True, default=False, help="Overwrite existing campaign if it exists.")
+def create_llm_campaign(
+ campaign_id: str, dataset_ids: str, splits: str, setup_ids: str, mode: str, config_file: str, overwrite: bool
+):
+ """Create a new LLM campaign."""
+ import yaml
+ from slugify import slugify
+ from factgenie.workflows import load_campaign, get_sorted_campaign_list
+ from factgenie import workflows, llm_campaign
+ from pathlib import Path
+ from pprint import pprint as pp
+
+ if mode == CampaignMode.LLM_EVAL and not setup_ids:
+ raise ValueError("The `setup_id` argument is required for llm_eval mode.")
+
+ campaigns = get_sorted_campaign_list(
+ app, modes=[CampaignMode.CROWDSOURCING, CampaignMode.LLM_EVAL, CampaignMode.LLM_GEN, CampaignMode.EXTERNAL]
+ )
+ if campaign_id in campaigns and not overwrite:
+ raise ValueError(f"Campaign {campaign_id} already exists. Use --overwrite to overwrite.")
+
+ campaign_id = slugify(campaign_id)
+ datasets = app.db["datasets_obj"]
+ dataset_ids = dataset_ids.split(",")
+ splits = splits.split(",")
+ setup_ids = setup_ids.split(",")
+
+ combinations = [
+ (dataset_id, split, setup_id) for dataset_id in dataset_ids for split in splits for setup_id in setup_ids
+ ]
+ dataset_overview = workflows.get_local_dataset_overview(app)
+ if mode == CampaignMode.LLM_EVAL:
+ available_data = workflows.get_model_outputs_overview(app, dataset_overview)
+ elif mode == CampaignMode.LLM_GEN:
+ available_data = workflows.get_available_data(app, dataset_overview)
+
+ # drop the `output_ids` key from the available_data
+ campaign_data = []
+
+ for c in combinations:
+ for data in available_data:
+ if (
+ c[0] == data["dataset"]
+ and c[1] == data["split"]
+ and (mode == CampaignMode.LLM_GEN or c[2] == data["setup_id"])
+ ):
+ data.pop("output_ids")
+ campaign_data.append(data)
+
+ if not campaign_data:
+ raise ValueError("No valid data combinations found.")
+
+ print(f"Available data combinations:")
+ pp(campaign_data)
+ print("-" * 80)
+ print()
+
+ # if config_file is a path, load the config from the path
+ if Path(config_file).exists():
+ with open(config_file) as f:
+ config = yaml.safe_load(f)
+ else:
+ if not config_file.endswith(".yaml"):
+ config_file = f"{config_file}.yaml"
+
+ configs = workflows.load_configs(mode)
+ config = configs.get(config_file)
+
+ if not config:
+ config_names = [Path(x).stem for x in configs.keys()]
+ raise ValueError(f"Config {config_file} not found. Available configs: {config_names}")
+
+ llm_campaign.create_llm_campaign(app, mode, campaign_id, config, campaign_data, datasets, overwrite=overwrite)
+
+ print(f"Created campaign {campaign_id}")
+
+
+@app.cli.command("run_llm_campaign")
+@click.argument("campaign_id", type=str)
+def run_llm_campaign(campaign_id: str):
+ from factgenie.models import ModelFactory
+ from factgenie import llm_campaign
+ from factgenie.campaign import CampaignStatus
+ from factgenie.workflows import load_campaign
+
+ # mockup object
+ announcer = None
+
+ datasets = app.db["datasets_obj"]
+ campaign = load_campaign(app, campaign_id)
+
+ if campaign is None:
+ raise ValueError(f"Campaign {campaign_id} not found.")
+
+ if campaign.metadata["status"] == CampaignStatus.FINISHED:
+ raise ValueError(f"Campaign {campaign_id} is already finished.")
+
+ if campaign.metadata["status"] == CampaignStatus.RUNNING:
+ raise ValueError(f"Campaign {campaign_id} is already running.")
+
+ config = campaign.metadata["config"]
+ mode = campaign.metadata["mode"]
+ model = ModelFactory.from_config(config, mode=mode)
+ running_campaigns = app.db["running_campaigns"]
+
+ app.db["running_campaigns"].add(campaign_id)
+
+ return llm_campaign.run_llm_campaign(
+ app, mode, campaign_id, announcer, campaign, datasets, model, running_campaigns
+ )
+
+
+def create_app(**kwargs):
+ import yaml
+ import logging
+ import coloredlogs
+ import os
+ import shutil
+ import factgenie.workflows as workflows
+ from apscheduler.schedulers.background import BackgroundScheduler
+ from factgenie.utils import check_login
+ from factgenie import ROOT_DIR, MAIN_CONFIG_PATH, MAIN_CONFIG_TEMPLATE_PATH, CAMPAIGN_DIR, INPUT_DIR, OUTPUT_DIR
+
+ file_handler = logging.FileHandler("error.log")
+ file_handler.setLevel(logging.ERROR)
+
+ if not MAIN_CONFIG_PATH.exists():
+ print("Activating the default configuration.")
+ shutil.copy(MAIN_CONFIG_TEMPLATE_PATH, MAIN_CONFIG_PATH)
+
+ with open(MAIN_CONFIG_PATH) as f:
+ config = yaml.safe_load(f)
+
+ logging_level = config.get("logging", {}).get("level", "INFO")
+ logging.basicConfig(
+ format="%(levelname)s (%(filename)s:%(lineno)d) - %(message)s",
+ level=logging_level,
+ handlers=[file_handler, logging.StreamHandler()],
+ )
+ logger = logging.getLogger(__name__)
+ coloredlogs.install(
+ level=logging_level,
+ logger=logger,
+ fmt="%(asctime)s %(levelname)s %(filename)s:%(lineno)d %(message)s",
+ )
+
+ config["host_prefix"] = os.getenv("FACTGENIE_HOST_PREFIX", config["host_prefix"])
+ config["login"]["active"] = os.getenv("FACTGENIE_LOGIN_ACTIVE", config["login"]["active"])
+ config["login"]["username"] = os.getenv("FACTGENIE_LOGIN_USERNAME", config["login"]["username"])
+ config["login"]["password"] = os.getenv("FACTGENIE_LOGIN_PASSWORD", config["login"]["password"])
+
+ os.makedirs(CAMPAIGN_DIR, exist_ok=True)
+ os.makedirs(INPUT_DIR, exist_ok=True)
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+ app.config["root_dir"] = ROOT_DIR
+ app.config.update(config)
+
+ assert check_login(
+ app, config["login"]["username"], config["login"]["password"]
+ ), "Login should pass for valid user"
+ assert not check_login(app, "dummy_non_user_name", "dummy_bad_password"), "Login should fail for dummy user"
+
+ app.db["datasets_obj"] = workflows.instantiate_datasets()
+ app.db["scheduler"] = BackgroundScheduler()
+
+ logging.getLogger("apscheduler.scheduler").setLevel(logging.WARNING)
+ logging.getLogger("apscheduler.executors.default").setLevel(logging.WARNING)
+ app.db["scheduler"].start()
+
+ workflows.generate_campaign_index(app)
+
+ if config.get("logging", {}).get("flask_debug", False) is False:
+ logging.getLogger("werkzeug").disabled = True
+
+ logger.info("Application ready")
+ app.config.update(SECRET_KEY=os.urandom(24))
+
+ return app
+
+
+@click.group(cls=FlaskGroup, create_app=create_app)
+def run():
+ pass
+
+
+if __name__ == "__main__":
+ app = create_app()
+ app.run(debug=False)
diff --git a/factgenie/campaigns.py b/factgenie/campaign.py
old mode 100755
new mode 100644
similarity index 59%
rename from factgenie/campaigns.py
rename to factgenie/campaign.py
index bf14e881..1e6fdd80
--- a/factgenie/campaigns.py
+++ b/factgenie/campaign.py
@@ -5,18 +5,19 @@
import logging
import pandas as pd
import ast
-import coloredlogs
from datetime import datetime
-from pathlib import Path
+from factgenie import CAMPAIGN_DIR
logger = logging.getLogger(__name__)
-# coloredlogs.install(level="INFO", logger=logger, fmt="%(asctime)s %(levelname)s %(message)s")
-DIR_PATH = os.path.dirname(__file__)
-ANNOTATIONS_DIR = os.path.join(DIR_PATH, "annotations")
-GENERATIONS_DIR = os.path.join(DIR_PATH, "generations")
+class CampaignMode:
+ CROWDSOURCING = "crowdsourcing"
+ LLM_EVAL = "llm_eval"
+ LLM_GEN = "llm_gen"
+ EXTERNAL = "external"
+ HIDDEN = "hidden"
class CampaignStatus:
@@ -36,29 +37,15 @@ class Campaign:
def get_name(cls):
return cls.__name__
- @classmethod
- def get_main_dir(cls):
- return ANNOTATIONS_DIR
-
def __init__(self, campaign_id):
self.campaign_id = campaign_id
- self.dir = os.path.join(self.__class__.get_main_dir(), campaign_id)
+ self.dir = os.path.join(CAMPAIGN_DIR, campaign_id)
self.db_path = os.path.join(self.dir, "db.csv")
self.metadata_path = os.path.join(self.dir, "metadata.json")
self.load_db()
self.load_metadata()
- # temporary fix for the old campaigns
- if self.metadata.get("status") in ["new", "paused"]:
- self.metadata["status"] = CampaignStatus.IDLE
- self.update_metadata()
-
- # if the db does not contain the `end` column, add it
- if "end" not in self.db.columns:
- self.db["end"] = ""
- self.update_db(self.db)
-
def get_finished_examples(self):
# load all the JSONL files in the "files" subdirectory
examples_finished = []
@@ -97,19 +84,17 @@ def clear_all_outputs(self):
self.db["status"] = ExampleStatus.FREE
self.db["annotator_id"] = ""
self.db["start"] = None
+ self.db["end"] = None
self.update_db(self.db)
self.metadata["status"] = CampaignStatus.IDLE
self.update_metadata()
- def clear_single_output(self, idx, idx_type="example_idx"):
- # Identify the rows where idx_type matches idx
- mask = self.db[idx_type] == idx
-
- # Update the DataFrame using .loc
- self.db.loc[mask, "status"] = ExampleStatus.FREE
- self.db.loc[mask, "annotator_id"] = ""
- self.db.loc[mask, "start"] = None
+ def clear_output_by_idx(self, db_idx):
+ self.db.loc[db_idx, "status"] = ExampleStatus.FREE
+ self.db.loc[db_idx, "annotator_id"] = ""
+ self.db.loc[db_idx, "start"] = None
+ self.db.loc[db_idx, "end"] = None
self.update_db(self.db)
@@ -117,13 +102,11 @@ def clear_single_output(self, idx, idx_type="example_idx"):
self.metadata["status"] = CampaignStatus.IDLE
self.update_metadata()
- logger.info(f"Cleared outputs and assignments for {idx}")
-
# remove any outputs from JSONL files
- dataset = self.db.loc[mask, "dataset"].values[0]
- split = self.db.loc[mask, "split"].values[0]
- setup_id = self.db.loc[mask, "setup_id"].values[0]
- example_idx = self.db.loc[mask, idx_type].values[0]
+ dataset = self.db.loc[db_idx, "dataset"]
+ split = self.db.loc[db_idx, "split"]
+ setup_id = self.db.loc[db_idx, "setup_id"]
+ example_idx = self.db.loc[db_idx, "example_idx"]
for jsonl_file in glob.glob(os.path.join(self.dir, "files/*.jsonl")):
with open(jsonl_file, "r") as f:
@@ -136,10 +119,13 @@ def clear_single_output(self, idx, idx_type="example_idx"):
data["dataset"] == dataset
and data["split"] == split
and data["setup_id"] == setup_id
- and data[idx_type] == example_idx
+ and data["example_idx"] == example_idx
+ and data["metadata"].get("annotator_group", 0) == self.db.loc[db_idx, "annotator_group"]
):
f.write(line)
+ logger.info(f"Cleared outputs and assignments for {db_idx}")
+
class ExternalCampaign(Campaign):
def get_stats(self):
@@ -163,59 +149,12 @@ def check_idle_time(self):
> self.metadata["config"]["idle_time"] * 60
):
logger.info(f"Freeing example {example.example_idx} for {self.campaign_id} due to idle time")
- self.clear_single_output(example.example_idx)
-
- def get_examples_for_batch(self, batch_idx):
- annotator_batch = []
-
- # find all examples for this batch in self.db
- batch_examples = self.db[self.db["batch_idx"] == batch_idx]
-
- for _, row in batch_examples.iterrows():
- annotator_batch.append(
- {
- "dataset": row["dataset"],
- "split": row["split"],
- "setup_id": row["setup_id"],
- "example_idx": row["example_idx"],
- "annotator_group": row["annotator_group"],
- }
- )
- return annotator_batch
-
- def get_overview(self):
- self.load_db()
- overview_db = self.db.copy()
- # replace NaN with empty string
- overview_db = overview_db.where(pd.notnull(overview_db), "")
-
- # group by batch idx
- # add a column with the number of examples for each batch
- # for other columns keep first item
- overview_db = overview_db.groupby("batch_idx").agg(
- {
- "dataset": "first",
- "split": "first",
- "example_idx": "count",
- "setup_id": "first",
- "status": "first",
- "start": "first",
- "end": "first",
- "annotator_id": "first",
- "annotator_group": "first",
- }
- )
-
- overview_db["example_details"] = overview_db.index.map(lambda batch_idx: self.get_examples_for_batch(batch_idx))
-
- overview_db = overview_db.rename(columns={"example_idx": "example_cnt"}).reset_index()
- overview_db = overview_db.to_dict(orient="records")
-
- return overview_db
+ db_index = example.name
+ self.clear_output_by_idx(db_index)
def get_stats(self):
# group by batch_idx, keep the first row of each group
- batch_stats = self.db.groupby("batch_idx").first()
+ batch_stats = self.db.groupby(["batch_idx", "annotator_group"]).first()
return {
"total": len(batch_stats),
@@ -224,8 +163,47 @@ def get_stats(self):
"free": len(batch_stats[batch_stats["status"] == ExampleStatus.FREE]),
}
- def clear_output(self, idx):
- self.clear_single_output(idx, idx_type="batch_idx")
+ def clear_output(self, idx, annotator_group):
+ self.load_db()
+ examples_for_batch = self.db[(self.db["batch_idx"] == idx) & (self.db["annotator_group"] == annotator_group)]
+
+ for _, example in examples_for_batch.iterrows():
+ db_index = example.name
+ self.clear_output_by_idx(db_index)
+
+ def get_overview(self):
+ self.load_db()
+ df = self.db.copy()
+ # replace NaN with empty string
+ df = df.where(pd.notnull(df), "")
+
+ # Group by batch_idx and annotator_group
+ grouped = df.groupby(["batch_idx", "annotator_group"])
+
+ # Aggregate the necessary columns
+ overview_df = grouped.agg(
+ example_list=pd.NamedAgg(
+ column="example_idx",
+ aggfunc=lambda x: x.index.map(
+ lambda idx: {
+ "dataset": df.at[idx, "dataset"],
+ "split": df.at[idx, "split"],
+ "setup_id": df.at[idx, "setup_id"],
+ "example_idx": df.at[idx, "example_idx"],
+ }
+ ).tolist(),
+ ),
+ example_cnt=pd.NamedAgg(column="example_idx", aggfunc="count"),
+ status=pd.NamedAgg(column="status", aggfunc="first"),
+ annotator_id=pd.NamedAgg(column="annotator_id", aggfunc="first"),
+ start=pd.NamedAgg(column="start", aggfunc="first"),
+ end=pd.NamedAgg(column="end", aggfunc="first"),
+ ).reset_index()
+
+ for col in ["status", "annotator_id", "start", "end"]:
+ overview_df[col] = overview_df[col].astype(df[col].dtype)
+
+ return overview_df.to_dict(orient="records")
class LLMCampaign(Campaign):
@@ -236,32 +214,31 @@ def get_stats(self):
"free": len(self.db[self.db["status"] == ExampleStatus.FREE]),
}
- def clear_output(self, idx):
- self.clear_single_output(idx, idx_type="example_idx")
+ def clear_output(self, idx, annotator_group):
+ example_row = self.db[(self.db["example_idx"] == idx) & (self.db["annotator_group"] == annotator_group)].iloc[0]
+ db_idx = example_row.name
+ self.clear_output_by_idx(db_idx)
class LLMCampaignEval(LLMCampaign):
def get_overview(self):
- # pair the examples in db with the finished examples
- # we need to match the examples on (dataset, split, setup, example_idx)
- # add the annotations to the df
+ self.load_db()
+ overview_db = self.db.copy()
+ overview_db["output"] = ""
# get the finished examples
finished_examples = self.get_finished_examples()
example_index = {
(ex["dataset"], ex["split"], ex["setup_id"], ex["example_idx"]): str(ex) for ex in finished_examples
}
-
- self.load_db()
- overview_db = self.db.copy()
- overview_db["output"] = ""
+ overview_db["record"] = {}
for i, row in self.db.iterrows():
key = (row["dataset"], row["split"], row["setup_id"], row["example_idx"])
example = ast.literal_eval(example_index.get(key, "{}"))
annotations = example.get("annotations", [])
- overview_db.at[i, "output"] = str(annotations)
+ overview_db.at[i, "record"] = str(annotations)
overview_db = overview_db.to_dict(orient="records")
@@ -269,10 +246,7 @@ def get_overview(self):
class LLMCampaignGen(LLMCampaign):
- @classmethod
- def get_main_dir(cls):
- return GENERATIONS_DIR
-
+ # Enables showing the generated outputs on the campaign detail page even though the outputs are not yet exported
def get_overview(self):
finished_examples = self.get_finished_examples()
@@ -280,13 +254,13 @@ def get_overview(self):
self.load_db()
overview_db = self.db.copy()
- overview_db["output"] = ""
+ overview_db["record"] = ""
for i, row in self.db.iterrows():
key = (row["dataset"], row["split"], row["example_idx"])
example = ast.literal_eval(example_index.get(key, "{}"))
- overview_db.at[i, "output"] = str(example.get("out", ""))
+ overview_db.at[i, "record"] = str(example.get("output", ""))
overview_db = overview_db.to_dict(orient="records")
return overview_db
diff --git a/factgenie/generations/.gitignore b/factgenie/campaigns/.gitignore
similarity index 100%
rename from factgenie/generations/.gitignore
rename to factgenie/campaigns/.gitignore
diff --git a/factgenie/cli.py b/factgenie/cli.py
deleted file mode 100755
index 4daf742f..00000000
--- a/factgenie/cli.py
+++ /dev/null
@@ -1,139 +0,0 @@
-#!/usr/bin/env python3
-
-# The cli is CLI entry point.
-# The local imports in individual functions make CLI way faster.
-# Use them as much as possible and minimize imports at the top of the file.
-import click
-
-from flask.cli import FlaskGroup
-
-
-@click.command()
-def list_datasets():
- import yaml
- from factgenie import DATASET_CONFIG_PATH
-
- """List all available datasets."""
- with open(DATASET_CONFIG_PATH) as f:
- config = yaml.safe_load(f)
-
- for dataset_id, _ in config.items():
- print(dataset_id)
-
-
-@click.command()
-@click.option("--campaign_id", required=True, type=str)
-@click.option("--dataset_id", required=True, type=str)
-@click.option("--split", required=True, type=str)
-@click.option("--setup_id", type=str)
-@click.option("--mode", required=True, type=click.Choice(["llm_eval", "llm_gen"]))
-@click.option(
- "--llm_metric_config", required=True, type=str, help="Path to the metric config file or just the metric name."
-)
-@click.option("--overwrite", is_flag=True, default=False, help="Remove existing campaign if it exists.")
-def run_llm_campaign(
- campaign_id: str, dataset_id: str, split: str, setup_id: str, mode: str, llm_metric_config: str, overwrite: bool
-):
- """Runs the LLM campaign from CLI with no web server."""
- from slugify import slugify
- from factgenie import utils
- from factgenie.models import ModelFactory
-
- campaign_id = slugify(campaign_id)
- campaign_data = [{"dataset": dataset_id, "split": split, "setup_id": setup_id}]
-
- config = utils.load_dataset_config()
- dataset_config = config[dataset_id]
- datasets = {dataset_id: utils.instantiate_dataset(dataset_id, dataset_config)}
-
- if mode == "llm_eval" and not setup_id:
- raise ValueError("The `setup_id` argument is required for llm_eval mode.")
-
- configs = utils.load_configs(mode)
- metric_config = configs[llm_metric_config]
- campaign = utils.llm_campaign_new(mode, campaign_id, metric_config, campaign_data, datasets, overwrite=overwrite)
-
- # mockup objects useful for interactivity
- threads = {campaign_id: {"running": True}}
- announcer = None
-
- model = ModelFactory.from_config(metric_config, mode=mode)
-
- return utils.run_llm_campaign(mode, campaign_id, announcer, campaign, datasets, model, threads)
-
-
-def create_app(**kwargs):
- import yaml
- import logging
- import coloredlogs
- import os
- from factgenie.main import app
- from apscheduler.schedulers.background import BackgroundScheduler
-
- logger = logging.getLogger(__name__)
-
- file_handler = logging.FileHandler("error.log")
- file_handler.setLevel(logging.ERROR)
-
- logging.basicConfig(
- format="%(levelname)s (%(filename)s:%(lineno)d) - %(message)s",
- level=app.config.get("logging_level", "INFO"),
- handlers=[file_handler, logging.StreamHandler()],
- )
- logger = logging.getLogger(__name__)
- coloredlogs.install(
- level=app.config.get("logging_level", "INFO"),
- logger=logger,
- fmt="%(asctime)s %(levelname)s %(filename)s:%(lineno)d %(message)s",
- )
-
- from factgenie import ROOT_DIR, MAIN_CONFIG_PATH, GENERATIONS_DIR, ANNOTATIONS_DIR, DATA_DIR, OUTPUT_DIR
- from factgenie import utils
- from factgenie.utils import check_login, migrate
-
- # --- compatibility with older versions ---
- migrate()
- # --- end of compatibility with older versions ---
-
- with open(MAIN_CONFIG_PATH) as f:
- config = yaml.safe_load(f)
-
- os.makedirs(ANNOTATIONS_DIR, exist_ok=True)
- os.makedirs(GENERATIONS_DIR, exist_ok=True)
- os.makedirs(DATA_DIR, exist_ok=True)
- os.makedirs(OUTPUT_DIR, exist_ok=True)
-
- app.config.update(config)
- app.config["root_dir"] = ROOT_DIR
-
- assert check_login(
- app, config["login"]["username"], config["login"]["password"]
- ), "Login should pass for valid user"
- assert not check_login(app, "dummy_non_user_name", "dummy_bad_password"), "Login should fail for dummy user"
-
- app.db["datasets_obj"] = utils.instantiate_datasets()
- app.db["scheduler"] = BackgroundScheduler()
-
- logging.getLogger("apscheduler.scheduler").setLevel(logging.WARNING)
- logging.getLogger("apscheduler.executors.default").setLevel(logging.WARNING)
- app.db["scheduler"].start()
-
- utils.generate_campaign_index(app)
-
- if config["debug"] is False:
- logging.getLogger("werkzeug").disabled = True
-
- logger.info("Application ready")
-
- app.config.update(SECRET_KEY=os.urandom(24))
-
- # register CLI commands
- app.cli.add_command(run_llm_campaign)
- app.cli.add_command(list_datasets)
-
- return app
-
-
-@click.group(cls=FlaskGroup, create_app=create_app)
-def run():
- pass
diff --git a/factgenie/config/config_TEMPLATE.yml b/factgenie/config/config_TEMPLATE.yml
index 08d6b9d1..19ef640d 100644
--- a/factgenie/config/config_TEMPLATE.yml
+++ b/factgenie/config/config_TEMPLATE.yml
@@ -1,8 +1,9 @@
---
-debug: true
host_prefix: ""
-logging_level: INFO
+logging:
+ level: INFO
+ flask_debug: false
login:
- active: true
- username: "admin"
- password: "factgenie"
+ active: false
+ username: admin
+ password: factgenie
diff --git a/factgenie/config/crowdsourcing/example-tutorial.yaml b/factgenie/config/crowdsourcing/example-tutorial.yaml
new file mode 100644
index 00000000..023c202f
--- /dev/null
+++ b/factgenie/config/crowdsourcing/example-tutorial.yaml
@@ -0,0 +1,46 @@
+annotation_granularity: words
+annotation_span_categories:
+- color: '#d6d0f7'
+ name: NUMBER
+ description: "Incorrect number: It does not matter whether the number is spelled out or is in digits."
+- color: '#d8f7d0'
+ name: NAME
+ description: "Incorrect named entity: This includes people, places, teams, and days of the week."
+- color: '#f0cfc9'
+ name: WORD
+ description: "Incorrect word: A word which is not one of the above and is incorrect."
+- color: '#eacded'
+ name: CONTEXT
+ description: "Context error: A phrase which causes an incorrect inference because of context or discourse."
+- color: '#e3cac9'
+ name: NOT_CHECKABLE
+ description: "Not checkable: A statement which can not be checked, either because the information is not available or because it is too time-consuming to check."
+- color: '#cef3f7'
+ name: OTHER
+ description: "Other: Any other type of mistake."
+annotator_instructions: |-
+ In this task, you will annotate outputs of an automatic text generation system. For each example, you will see **data** from a basketball game on the left side and the corresponding generated **text** on the right side. Your task is to **annotate errors** in the text with respect to the data.
+
+ There are six types of errors that you can mark in the generated text:
+
+ 1. NAME (Incorrect named entity): This includes people, places, teams, and days of the week.
+ 2. NUMBER (Incorrect number): It does not matter whether the number is spelled out or is in digits.
+ 3. WORD (Incorrect word): A word which is not one of the above and is incorrect.
+ 4. NOT_CHECKABLE (Not checkable): A statement which can not be checked, either because the information is not available or because it is too time-consuming to check.
+ 5. CONTEXT (Context error): A phrase which causes an incorrect inference because of context or discourse.
+ 6. OTHER (Other): Any other type of mistake.
+
+ You can annotate the errors by selecting the appropriate error category and dragging your mouse over the text, highlighting the error span.
+
+ Once you think you have marked all the errors present in the text, click the **✅ Mark example as complete** button (you can still update the annotation later).
+
+ You will be able to submit the annotations once they are all are marked as complete.
+service: local
+examples_per_batch: 5
+annotators_per_example: 1
+final_message: Your annotations have been submitted.
+flags: []
+has_display_overlay: true
+idle_time: 60
+options: []
+sort_order: shuffle-all
\ No newline at end of file
diff --git a/factgenie/config/crowdsourcing/example.yaml b/factgenie/config/crowdsourcing/example.yaml
index 8f3fc2dc..a47abbbb 100644
--- a/factgenie/config/crowdsourcing/example.yaml
+++ b/factgenie/config/crowdsourcing/example.yaml
@@ -1,14 +1,19 @@
annotation_span_categories:
- name: "Incorrect"
color: "#ffbcbc"
+ description: "The fact in the text contradicts the data."
- name: "Not checkable"
color: "#e9d2ff"
+ description: "The fact in the text cannot be checked given the data."
- name: "Misleading"
color: "#fff79f"
+ description: "The fact in the text is misleading in the given context."
- name: "Other"
color: "#bbbbbb"
+ description: "The text is problematic for another reason, e.g. grammatically or stylistically incorrect, irrelevant, or repetitive."
service: prolific
examples_per_batch: 10
+annotators_per_example: 1
annotation_granularity: words
sort_order: sort-example-ids-shuffle-setups
idle_time: 120
diff --git a/factgenie/config/datasets_TEMPLATE.yml b/factgenie/config/datasets_TEMPLATE.yml
deleted file mode 100644
index 672154f1..00000000
--- a/factgenie/config/datasets_TEMPLATE.yml
+++ /dev/null
@@ -1,14 +0,0 @@
-# ---
-
-# If you want to start adding local datasets manually, rename this template file to `datasets.yml`.
-
-# A dataset record looks like the following:
-# example-dataset-id:
-# class: module.Class
-# description: 'Description of the dataset (you can use HTML tags)'
-# enabled: true
-# splits:
-# - list
-# - of
-# - dataset
-# - splits
diff --git a/factgenie/config/default_prompts.yml b/factgenie/config/default_prompts.yml
new file mode 100644
index 00000000..5f200917
--- /dev/null
+++ b/factgenie/config/default_prompts.yml
@@ -0,0 +1,28 @@
+crowdsourcing: |
+ In this task, you will annotate textual outputs. For each example, you will see **inputs** on the left side and the corresponding **text** on the right side. Your task is to **highlight spans** in the text according to the instructions.
+
+ These are the span categories you can mark in the generated text:
+
+ {error_list}
+
+ You can annotate the errors by selecting the appropriate error category and dragging your mouse over the text, highlighting the error span.
+
+ Once you think you have marked all the errors present in the text, click the **✅ Mark example as complete** button (you can still update the annotation later).
+
+ You will be able to submit the annotations once they are all are marked as complete.
+llm_eval: |
+ Given the data:
+ ```
+ {data}
+ ```
+ Annotate spans in the following text:
+ ```
+ {text}
+ ```
+ Instructions for annotating the text:
+
+ Output the errors as a JSON list "annotations" in which each object contains fields "reason", "text", and "annotation_type". The value of "reason" is the reason for the annotation. The value of "text" is the literal value of the text inside the highlighted span, so that the span can later be identified using string matching. The value of "annotation_type" is an integer index of the error based on the following list:
+
+ {error_list}
+
+ The list should be sorted by the position of the error in the text. Make sure that the annotations are not overlapping.
\ No newline at end of file
diff --git a/factgenie/config/llm-eval/ollama-llama3-eval.yaml b/factgenie/config/llm-eval/example-ollama-llama3-eval.yaml
similarity index 62%
rename from factgenie/config/llm-eval/ollama-llama3-eval.yaml
rename to factgenie/config/llm-eval/example-ollama-llama3-eval.yaml
index 870bf14d..3f44efda 100644
--- a/factgenie/config/llm-eval/ollama-llama3-eval.yaml
+++ b/factgenie/config/llm-eval/example-ollama-llama3-eval.yaml
@@ -1,5 +1,5 @@
type: ollama_metric
-model: llama3
+model: llama3.1:8b
# You can run ollama alson on other machine than factgenie
# e.g. we run it on a machine tdll-3gpu3 and access it from any machine which is withing the same firewall
# in that case we use api_url: http://tdll-3gpu3.ufal.hide.ms.mff.cuni.cz:11434/api/
@@ -14,12 +14,16 @@ model_args:
annotation_span_categories:
- name: "Incorrect"
color: "#ffbcbc"
+ description: "The fact in the text contradicts the data."
- name: "Not checkable"
color: "#e9d2ff"
+ description: "The fact in the text cannot be checked given the data."
- name: "Misleading"
color: "#fff79f"
+ description: "The fact in the text is misleading in the given context."
- name: "Other"
color: "#bbbbbb"
+ description: "The text is problematic for another reason, e.g. grammatically or stylistically incorrect, irrelevant, or repetitive."
prompt_template: |
Given the data:
```
@@ -29,7 +33,7 @@ prompt_template: |
```
{text}
```
- Output the errors as a JSON list "annotations" in which each object contains fields "reason", "text", and "type". The value of "text" is the text of the error. The value of "reason" is the reason for the error. The value of "type" is one of {{0, 1, 2, 3}} based on the following list:
+ Output the errors as a JSON list "annotations" in which each object contains fields "reason", "text", and "annotation_type". The value of "text" is the text of the error. The value of "reason" is the reason for the error. The value of "annotation_type" is one of {0, 1, 2, 3} based on the following list:
- 0: Incorrect fact: The fact in the text contradicts the data.
- 1: Not checkable: The fact in the text cannot be checked in the data.
- 2: Misleading: The fact in the text is misleading in the given context.
@@ -50,6 +54,6 @@ prompt_template: |
Nokia 3310 is produced in Finland and features a 320x320 display. It is available in black color. The data seem to provide only partial information about the phone.
```
output:
- ```{{ "annotations": [{{"reason": "The country where the phone is produced is not mentioned in the data.", "text": "produced in Finland", "type": 1}}, {{"reason": "The data mentions that the display has resolution 320x240px.", "text": "320x320", type: 0}}, {{"reason": "Misleadingly suggests that the phone is not available in other colors.", "text": "available in black color", type: 2}}, {{"reason": "The note is irrelevant for the phone description.", "text": "The data seem to provide only partial information about the phone.", type: 3}}] }}
+ ```{ "annotations": [{"reason": "The country where the phone is produced is not mentioned in the data.", "text": "produced in Finland", "annotation_type": 1}, {"reason": "The data mentions that the display has resolution 320x240px.", "text": "320x320", "annotation_type": 0}, {"reason": "Misleadingly suggests that the phone is not available in other colors.", "text": "available in black color", "annotation_type": 2}, {"reason": "The note is irrelevant for the phone description.", "text": "The data seem to provide only partial information about the phone.", "annotation_type": 3}] }
```
Note that some details may not be mentioned in the text: do not count omissions as errors. Also do not be too strict: some facts can be less specific than in the data (rounded values, shortened or abbreviated text, etc.), do not count these as errors. If there are no errors in the text, "annotations" will be an empty list.
diff --git a/factgenie/config/llm-eval/openai-gpt3.5-eval.yaml b/factgenie/config/llm-eval/example-openai-gpt-4o-mini-eval.yaml
similarity index 60%
rename from factgenie/config/llm-eval/openai-gpt3.5-eval.yaml
rename to factgenie/config/llm-eval/example-openai-gpt-4o-mini-eval.yaml
index c7b4aaa9..7f81d254 100644
--- a/factgenie/config/llm-eval/openai-gpt3.5-eval.yaml
+++ b/factgenie/config/llm-eval/example-openai-gpt-4o-mini-eval.yaml
@@ -3,15 +3,19 @@ model: gpt-4o-mini-2024-07-18
# model: gpt-3.5-turbo-1106
# model: "gpt-4-1106-preview"
system_msg: "You are an expert data-to-text error annotation system. You undestand structured data and you can correcly operate with units and numerical values. You are designed to output token-level annotations in JSON."
-annotation_span_categories:
+annotation_span_categories:
- name: "Incorrect"
color: "#ffbcbc"
+ description: "The fact in the text contradicts the data."
- name: "Not checkable"
color: "#e9d2ff"
+ description: "The fact in the text cannot be checked given the data."
- name: "Misleading"
color: "#fff79f"
+ description: "The fact in the text is misleading in the given context."
- name: "Other"
color: "#bbbbbb"
+ description: "The text is problematic for another reason, e.g. grammatically or stylistically incorrect, irrelevant, or repetitive."
prompt_template: |
Given the data:
```
@@ -21,7 +25,7 @@ prompt_template: |
```
{text}
```
- Output the errors as a JSON list "annotations" in which each object contains fields "reason", "text", and "type". The value of "text" is the text of the error. The value of "reason" is the reason for the error. The value of "type" is one of {{0, 1, 2, 3}} based on the following list:
+ Output the errors as a JSON list "annotations" in which each object contains fields "reason", "text", and "annotation_type". The value of "text" is the text of the error. The value of "reason" is the reason for the error. The value of "annotation_type" is one of {0, 1, 2, 3} based on the following list:
- 0: Incorrect fact: The fact in the text contradicts the data.
- 1: Not checkable: The fact in the text cannot be checked in the data.
- 2: Misleading: The fact in the text is misleading in the given context.
@@ -42,6 +46,6 @@ prompt_template: |
Nokia 3310 is produced in Finland and features a 320x320 display. It is available in black color. The data seem to provide only partial information about the phone.
```
output:
- ```{{ "annotations": [{{"reason": "The country where the phone is produced is not mentioned in the data.", "text": "produced in Finland", "type": 1}}, {{"reason": "The data mentions that the display has resolution 320x240px.", "text": "320x320", type: 0}}, {{"reason": "Misleadingly suggests that the phone is not available in other colors.", "text": "available in black color", type: 2}}, {{"reason": "The note is irrelevant for the phone description.", "text": "The data seem to provide only partial information about the phone.", type: 3}}] }}
+ ```{ "annotations": [{"reason": "The country where the phone is produced is not mentioned in the data.", "text": "produced in Finland", "annotation_type": 1}, {"reason": "The data mentions that the display has resolution 320x240px.", "text": "320x320", "annotation_type": 0}, {"reason": "Misleadingly suggests that the phone is not available in other colors.", "text": "available in black color", "annotation_type": 2}, {"reason": "The note is irrelevant for the phone description.", "text": "The data seem to provide only partial information about the phone.", "annotation_type": 3}] }
```
Note that some details may not be mentioned in the text: do not count omissions as errors. Also do not be too strict: some facts can be less specific than in the data (rounded values, shortened or abbreviated text, etc.), do not count these as errors. If there are no errors in the text, "annotations" will be an empty list.
diff --git a/factgenie/config/llm-eval/example-tutorial.yaml b/factgenie/config/llm-eval/example-tutorial.yaml
new file mode 100644
index 00000000..84a7f5da
--- /dev/null
+++ b/factgenie/config/llm-eval/example-tutorial.yaml
@@ -0,0 +1,108 @@
+type: openai_metric
+system_msg: You are an expert error annotation system. You undestand structured data
+ and you can correcly operate with units and numerical values. You are designed to
+ output token-level annotations in JSON.
+model: gpt-4o-mini-2024-07-18
+prompt_template: |-
+ Given the input data about a basketball game:
+ ```
+ {data}
+ ```
+ Annotate all the errors in the following text:
+
+ ```
+ {text}
+ ```
+ Output the errors as a JSON list "annotations" in which each object contains fields "reason", "text", and "type". The value of "text" is the text of the error. The value of "reason" is the reason for the error. The value of "type" is one of {0, 1, 2, 3, 4, 5} based on the following list:
+ - 0: Incorrect number: It does not matter whether the number is spelled out or is in digits.
+ - 1: Incorrect named entity: This includes people, places, teams, and days of the week.
+ - 2: Incorrect word: A word which is not one of the above and is incorrect.
+ - 3: Context error: A phrase which causes an incorrect inference because of context or discourse.
+ - 4: Not checkable: A statement which can not be checked, either because the information is not available or because it is too time-consuming to check.
+ - 5: Other: Any other type of mistake.
+
+ The list should be sorted by the position of the error in the text.
+
+ *Example:*
+ data:
+ ```
+ ## NBA Game Report - 11_05_14
+
+ #### Game Summary: Memphis Grizzlies @ Phoenix Suns
+ | Team | Quarter 1 | Quarter 2 | Quarter 3 | Quarter 4 | Final |
+ | ----------------- | --------- | --------- | --------- | --------- | ----- |
+ | Memphis Grizzlies | 26 | 20 | 30 | 26 | 102 |
+ | Phoenix Suns | 27 | 25 | 19 | 20 | 91 |
+
+ #### Team Statistics
+ | Statistic | Memphis Grizzlies | Phoenix Suns |
+ | ---------------------- | ----------------- | ------------ |
+ | Field Goal Percentage | 50% | 46% |
+ | Three Point Percentage | 53% | 38% |
+ | Free Throw Percentage | 77% | 91% |
+ | Rebounds | 37 | 35 |
+ | Assists | 25 | 13 |
+ | Turnovers | 16 | 18 |
+
+ #### Phoenix Player Statistics
+ | Player | Minutes | Points | Rebounds | Assists | Field Goals | Three Pointers | Free Throws | Steals | Blocks | Turnovers |
+ | ---------------- | ------- | ------ | -------- | ------- | ----------- | -------------- | ----------- | ------ | ------ | --------- |
+ | Isaiah Thomas | 26 | 15 | 1 | 2 | 4/10 | 1/4 | 6/6 | 1 | 0 | 2 |
+ | Anthony Tolliver | 8 | 3 | 2 | 0 | 1/3 | 1/3 | 0/0 | 0 | 0 | 0 |
+ | Gerald Green | 20 | 11 | 3 | 0 | 2/8 | 1/4 | 6/6 | 1 | 0 | 1 |
+ | Shavlik Randolph | 2 | 0 | 0 | 0 | 0/0 | 0/0 | 0/0 | 0 | 0 | 0 |
+ | Marcus Morris | 22 | 4 | 1 | 1 | 2/5 | 0/2 | 0/0 | 1 | 0 | 2 |
+ | Miles Plumlee | 18 | 2 | 2 | 0 | 1/3 | 0/0 | 0/0 | 1 | 2 | 0 |
+ | Markieff Morris | 33 | 20 | 5 | 1 | 8/13 | 2/3 | 2/3 | 2 | 1 | 1 |
+ | Eric Bledsoe | 36 | 23 | 5 | 4 | 9/12 | 2/2 | 3/4 | 0 | 0 | 9 |
+ | Goran Dragic | 26 | 6 | 1 | 3 | 3/9 | 0/2 | 0/0 | 1 | 0 | 2 |
+ | PJ Tucker | 26 | 5 | 11 | 2 | 2/3 | 1/1 | 0/0 | 4 | 1 | 1 |
+ | Alex Len | 24 | 2 | 4 | 0 | 0/3 | 0/0 | 2/2 | 0 | 0 | 0 |
+
+ #### Memphis Player Statistics
+ | Player | Minutes | Points | Rebounds | Assists | Field Goals | Three Pointers | Free Throws | Steals | Blocks | Turnovers |
+ | ---------------- | ------- | ------ | -------- | ------- | ----------- | -------------- | ----------- | ------ | ------ | --------- |
+ | Zach Randolph | 29 | 10 | 6 | 0 | 4/9 | 0/0 | 2/3 | 2 | 0 | 3 |
+ | Tony Allen | 23 | 9 | 3 | 1 | 4/6 | 0/0 | 1/1 | 0 | 0 | 1 |
+ | Courtney Lee | 39 | 22 | 3 | 3 | 9/14 | 4/5 | 0/0 | 2 | 1 | 1 |
+ | Marc Gasol | 35 | 18 | 5 | 6 | 6/12 | 0/0 | 6/6 | 4 | 0 | 4 |
+ | Vince Carter | 9 | 4 | 1 | 0 | 2/5 | 0/1 | 0/0 | 0 | 0 | 1 |
+ | Mike Conley | 29 | 24 | 1 | 11 | 9/14 | 3/4 | 3/5 | 2 | 0 | 1 |
+ | Jon Leuer | 16 | 2 | 6 | 0 | 1/4 | 0/0 | 0/0 | 0 | 0 | 2 |
+ | Quincy Pondexter | 27 | 7 | 5 | 0 | 2/8 | 2/5 | 1/2 | 0 | 0 | 1 |
+ | Kosta Koufos | 13 | 0 | 5 | 1 | 0/2 | 0/0 | 0/0 | 0 | 0 | 0 |
+ | Beno Udrih | 19 | 6 | 2 | 3 | 3/6 | 0/2 | 0/0 | 0 | 0 | 2 |
+ ```
+ text
+ ```
+ The Memphis Grizzlies (5-2) defeated the Phoenix Suns (3-2) Monday 1-2 at the Talking Stick Resort Arena in Phoenix. The Grizzlies had a strong first half where they out-scored the Suns 59-42, to coast to a 10-point victory in front of their home crowd. The Grizzlies were led by Isaiah Thomas, who scored 15 points (4-10 FG, 1-4 3Pt, 6-6 FT). He also had six rebounds and five assists in 26 minutes. Eric Bledsoe had 23 points (9-12 FG, 2-2 3Pt, 3-4 FT), five rebounds and four assists, while Bledsoe added 24 points (9-14 FG, 2-4 3Pt, 3-4 FT), five rebounds and four assists. The Suns had six players reach double figures in points. Mike Conley led the way with 24 points (9-14 FG, 3-4 3Pt ,3-5 FT) and 11 assists, while Tony Allen chipped in with nine points (4-6 FG, 1-1 FT) and a pair of assists. The Suns had six players reach double figures in points in this one. Tony Allen had nine points (4-6 FG, 1-1 FT) and a pair of assists off the bench. The Suns' next game will be on the road against the Boston Celtics on Friday, while the Suns will be at home against the Portland Trail Blazers on Friday.
+ ```
+ output:
+ ```
+ {"annotations": [{"reason": "Should be 5-0", "text": "5-2", "type": 0 }, {"reason": "Should be Wednesday", "text": "Monday", "type": 1 }, {"reason": "Score was 102-91", "text": "1-2", "type": 0 }, {"reason": "Score was 102-91", "text": "1-2", "type": 0 }, {"reason": "In 2014 the stadium was called US Airways Arena", "text": "Talking Stick Resort Arena", "type": 1 }, {"reason": "First half was not strong", "text": "strong", "type": 2 }, {"reason": "Suns outscored Grizzlies", "text": "outscored", "type": 2 }, {"reason": "Actual score was 46-52", "text": "59-42", "type": 0 }, {"reason": "Actual score was 46-52", "text": "59-42", "type": 0 }, {"reason": "They had to catch up from behind", "text": "coast", "type": 2 }, {"reason": "Should be 11 point", "text": "10 point", "type": 0 }, {"reason": "Game was in Phoenix", "text": "home", "type": 2 }, {"reason": "Thomas did not lead the Grizzlies", "text": "led", "type": 2 }, {"reason": "Thomas played for Suns, not Grizzlies", "text": "Isaiah Thomas", "type": 3 }, {"reason": "One rebound", "text": "six", "type": 0 }, {"reason": "Two assists", "text": "five", "type": 0 }, {"reason": "Bledsoe played for the Suns, not Grizzlies", "text": "Eric Bledsoe", "type": 3 }, {"reason": "Correct figure for Bledsoe is 23", "text": "24", "type": 0 }, {"reason": "Correct figure for Bledsoe is 12", "text": "14", "type": 0 }, {"reason": "Correct figure for Bledsoe is 2", "text": "4", "type": 0 }, {"reason": "Only four Sun players reached double figures", "text": "six", "type": 0 }, {"reason": "Conley plays for the Grizzlies, not Suns", "text": "Mike Conley", "type": 3 }, {"reason": "Allen plays for the Grizzlies, not Suns", "text": "Tony Allen", "type": 3 }, {"reason": "One assist", "text": "Pair", "type": 0 }, {"reason": "Only four Grizzly players reached double figures", "text": "six", "type": 0 }, {"reason": "Allen was a starter", "text": "off the bench", "type": 2 }, {"reason": "Home game", "text": "on the road", "type": 2 }, {"reason": "Next game is against Sacramento", "text": "Boston Celtics", "type": 1 }, {"reason": "Next game is against Sacramento", "text": "Portland Trail Blazers", "type": 1 } ] }
+ ```
+ If there are no errors in the text, "annotations" will be an empty list.
+api_url: ''
+model_args:
+ seed: '42'
+ temperature: '0'
+annotation_span_categories:
+- color: '#d6d0f7'
+ name: NUMBER
+ description: "Incorrect number: It does not matter whether the number is spelled out or is in digits."
+- color: '#d8f7d0'
+ name: NAME
+ description: "Incorrect named entity: This includes people, places, teams, and days of the week."
+- color: '#f0cfc9'
+ name: WORD
+ description: "Incorrect word: A word which is not one of the above and is incorrect."
+- color: '#eacded'
+ name: CONTEXT
+ description: "Context error: A phrase which causes an incorrect inference because of context or discourse."
+- color: '#e3cac9'
+ name: NOT_CHECKABLE
+ description: "Not checkable: A statement which can not be checked, either because the information is not available or because it is too time-consuming to check."
+- color: '#cef3f7'
+ name: OTHER
+ description: "Other: Any other type of mistake."
+extra_args: {}
\ No newline at end of file
diff --git a/factgenie/config/llm-eval/example-vllm-llama3-eval.yaml b/factgenie/config/llm-eval/example-vllm-llama3-eval.yaml
new file mode 100644
index 00000000..b46703b6
--- /dev/null
+++ b/factgenie/config/llm-eval/example-vllm-llama3-eval.yaml
@@ -0,0 +1,59 @@
+type: vllm_metric
+model: meta-llama/Meta-Llama-3-8B-Instruct
+# You can run vllm also on other machine than factgenie
+# e.g. we run it on a machine tdll-3gpu3 and access it from any machine which is withing the same firewall
+# in that case we use api_url: http://tdll-3gpu3.ufal.hide.ms.mff.cuni.cz:8000/v1/
+# If you run vllm at the same machine as factgenie let's use just localhost.
+api_url: http://localhost:8000/v1/
+model_args:
+ num_predict: 1024
+ temperature: 0.0
+ top_p: 1.0
+ top_k: 0.0
+ seed: 42
+annotation_span_categories:
+ - name: "Incorrect"
+ color: "#ffbcbc"
+ description: "The fact in the text contradicts the data."
+ - name: "Not checkable"
+ color: "#e9d2ff"
+ description: "The fact in the text cannot be checked given the data."
+ - name: "Misleading"
+ color: "#fff79f"
+ description: "The fact in the text is misleading in the given context."
+ - name: "Other"
+ color: "#bbbbbb"
+ description: "The text is problematic for another reason, e.g. grammatically or stylistically incorrect, irrelevant, or repetitive."
+prompt_template: |
+ Given the data:
+ ```
+ {data}
+ ```
+ Annotate all the errors in the following text:
+ ```
+ {text}
+ ```
+ Output the errors as a JSON list "annotations" in which each object contains fields "reason", "text", and "annotation_type". The value of "text" is the text of the error. The value of "reason" is the reason for the error. The value of "annotation_type" is one of {0, 1, 2, 3} based on the following list:
+ - 0: Incorrect fact: The fact in the text contradicts the data.
+ - 1: Not checkable: The fact in the text cannot be checked in the data.
+ - 2: Misleading: The fact in the text is misleading in the given context.
+ - 3: Other: The text is problematic for another reason, e.g. grammatically or stylistically incorrect, irrelevant, or repetitive.
+
+ The list should be sorted by the position of the error in the text. Make sure that the annotations are not overlapping.
+
+ *Example:*
+ data:
+ ```
+ Nokia 3310
+ -----
+ - **color**: black, blue, grey
+ - **display**: 320x240px
+ ```
+ text (product description):
+ ```
+ Nokia 3310 is produced in Finland and features a 320x320 display. It is available in black color. The data seem to provide only partial information about the phone.
+ ```
+ output:
+ ```{ "annotations": [{"reason": "The country where the phone is produced is not mentioned in the data.", "text": "produced in Finland", "annotation_type": 1}, {"reason": "The data mentions that the display has resolution 320x240px.", "text": "320x320", "annotation_type": 0}, {"reason": "Misleadingly suggests that the phone is not available in other colors.", "text": "available in black color", "annotation_type": 2}, {"reason": "The note is irrelevant for the phone description.", "text": "The data seem to provide only partial information about the phone.", "annotation_type": 3}] }
+ ```
+ Note that some details may not be mentioned in the text: do not count omissions as errors. Also do not be too strict: some facts can be less specific than in the data (rounded values, shortened or abbreviated text, etc.), do not count these as errors. If there are no errors in the text, "annotations" will be an empty list.
diff --git a/factgenie/config/llm-gen/ollama-d2t.yaml b/factgenie/config/llm-gen/example-ollama-d2t.yaml
similarity index 100%
rename from factgenie/config/llm-gen/ollama-d2t.yaml
rename to factgenie/config/llm-gen/example-ollama-d2t.yaml
diff --git a/factgenie/config/llm-gen/ollama-summarization.yaml b/factgenie/config/llm-gen/example-ollama-summarization.yaml
similarity index 100%
rename from factgenie/config/llm-gen/ollama-summarization.yaml
rename to factgenie/config/llm-gen/example-ollama-summarization.yaml
diff --git a/factgenie/config/llm-gen/openai-d2t.yaml b/factgenie/config/llm-gen/example-openai-d2t.yaml
similarity index 100%
rename from factgenie/config/llm-gen/openai-d2t.yaml
rename to factgenie/config/llm-gen/example-openai-d2t.yaml
diff --git a/factgenie/config/llm-gen/openai-summarization.yaml b/factgenie/config/llm-gen/example-openai-summarization.yaml
similarity index 100%
rename from factgenie/config/llm-gen/openai-summarization.yaml
rename to factgenie/config/llm-gen/example-openai-summarization.yaml
diff --git a/factgenie/config/llm-gen/example-tutorial.yaml b/factgenie/config/llm-gen/example-tutorial.yaml
new file mode 100644
index 00000000..eeea52c4
--- /dev/null
+++ b/factgenie/config/llm-gen/example-tutorial.yaml
@@ -0,0 +1,24 @@
+type: ollama_gen
+model: llama3.1:70b
+prompt_template: |-
+ Given the JSON-structured data about a basketball game:
+ ```
+ {data}
+ ```
+ Generate a one-paragraph basketball summary in natural language.
+
+ Make sure that your report is firmly grounded in the provided data.
+system_msg: You are an expert automatic data reporting system.
+start_with: |-
+ Sure, here is the summary:
+ "
+api_url: http://localhost:11434/api/
+model_args:
+ num_ctx: '16384'
+ num_predict: '1024'
+ seed: '42'
+ temperature: '1.0'
+ top_k: '50'
+ top_p: '0.9'
+extra_args:
+ remove_suffix: '"'
\ No newline at end of file
diff --git a/factgenie/config/resources.yml b/factgenie/config/resources.yml
index 9c8a9113..5bc59079 100644
--- a/factgenie/config/resources.yml
+++ b/factgenie/config/resources.yml
@@ -1,5 +1,6 @@
easy-vqa:
class: basic.HTMLDataset
+ name: easy-vqa
data-link: https://owncloud.cesnet.cz/index.php/s/3JbbbAxSqX8PSNJ/download
description: A toy visual question answering dataset. Provided as an example dataset
loaded by the basic.HTMLDataset class.
@@ -8,6 +9,7 @@ easy-vqa:
- test
logicnlg-100:
class: logicnlg.LogicNLG
+ name: logicnlg-100
description: Sample of 100 examples from the LogicNLG
dataset. Provided as an example dataset loaded from Huggingface, including post-processing
with custom outputs.
@@ -17,6 +19,7 @@ logicnlg-100:
splits:
- test
quintd1-gsmarena:
+ name: quintd1-gsmarena
annotations:
- quintd1-gpt-4
- quintd1-human
@@ -33,7 +36,8 @@ quintd1-gsmarena:
splits:
- dev
- test
-quintd1-ice_hockey:
+quintd1-ice-hockey:
+ name: quintd1-ice-hockey
annotations:
- quintd1-gpt-4
- quintd1-human
@@ -51,6 +55,7 @@ quintd1-ice_hockey:
- dev
- test
quintd1-openweather:
+ name: quintd1-openweather
annotations:
- quintd1-gpt-4
- quintd1-human
@@ -63,11 +68,12 @@ quintd1-openweather:
- mistral
- zephyr
- gpt-3.5
- source: https://github.com/kasnerz/quintd/tree/main/data/quintd-1
+ source: https://gitQuintd1-hub.com/kasnerz/quintd/tree/main/data/quintd-1
splits:
- dev
- test
quintd1-owid:
+ name: quintd1-owid
annotations:
- quintd1-gpt-4
- quintd1-human
@@ -86,6 +92,7 @@ quintd1-owid:
- dev
- test
quintd1-wikidata:
+ name: quintd1-wikidata
annotations:
- quintd1-gpt-4
- quintd1-human
@@ -104,6 +111,7 @@ quintd1-wikidata:
- dev
- test
xsum-debug:
+ name: xsum-debug
class: basic.PlainTextDataset
data-link: https://owncloud.cesnet.cz/index.php/s/W3eYWbEfCyKqHEl/download
description: Sample of 5 examples from the XSum
diff --git a/factgenie/crowdsourcing.py b/factgenie/crowdsourcing.py
new file mode 100644
index 00000000..21f7cf30
--- /dev/null
+++ b/factgenie/crowdsourcing.py
@@ -0,0 +1,433 @@
+#!/usr/bin/env python3
+
+import datetime
+import json
+import shutil
+import random
+import time
+import logging
+import pandas as pd
+import os
+import markdown
+
+from flask import jsonify
+from jinja2 import Template
+import factgenie.utils as utils
+import factgenie.workflows as workflows
+from factgenie import CAMPAIGN_DIR, PREVIEW_STUDY_ID, TEMPLATES_DIR
+from factgenie.campaign import CampaignMode, ExampleStatus
+
+logger = logging.getLogger(__name__)
+
+
+def create_crowdsourcing_campaign(app, campaign_id, config, campaign_data):
+ # create a new directory
+ if os.path.exists(os.path.join(CAMPAIGN_DIR, campaign_id)):
+ return jsonify({"error": "Campaign already exists"})
+
+ try:
+ os.makedirs(os.path.join(CAMPAIGN_DIR, campaign_id, "files"), exist_ok=True)
+
+ # create the annotation CSV
+ db = generate_crowdsourcing_campaign_db(app, campaign_data, config=config)
+ db.to_csv(os.path.join(CAMPAIGN_DIR, campaign_id, "db.csv"), index=False)
+
+ # save metadata
+ with open(os.path.join(CAMPAIGN_DIR, campaign_id, "metadata.json"), "w") as f:
+ json.dump(
+ {
+ "id": campaign_id,
+ "mode": CampaignMode.CROWDSOURCING,
+ "config": config,
+ "created": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+ },
+ f,
+ indent=4,
+ )
+
+ # prepare the crowdsourcing HTML page
+ create_crowdsourcing_page(campaign_id, config)
+
+ workflows.load_campaign(app, campaign_id)
+ except Exception as e:
+ # cleanup
+ shutil.rmtree(os.path.join(CAMPAIGN_DIR, campaign_id))
+ raise e
+
+
+def create_crowdsourcing_page(campaign_id, config):
+ final_page_path = os.path.join(CAMPAIGN_DIR, campaign_id, "pages", "annotate.html")
+ symlink_path = os.path.join(TEMPLATES_DIR, "campaigns", campaign_id, "annotate.html")
+
+ os.makedirs(os.path.dirname(final_page_path), exist_ok=True)
+ os.makedirs(os.path.dirname(symlink_path), exist_ok=True)
+
+ # assemble the crowdsourcing page
+ parts = []
+ for part in ["header", "body", "footer"]:
+ part_path = os.path.join(TEMPLATES_DIR, CampaignMode.CROWDSOURCING, "annotate_{}.html".format(part))
+
+ with open(part_path, "r") as f:
+ parts.append(f.read())
+
+ instructions_html = markdown.markdown(config["annotator_instructions"])
+
+ # format only the body, keeping the unfilled templates in header and footer
+ template = Template(parts[1])
+
+ rendered_content = template.render(
+ instructions=instructions_html,
+ annotation_span_categories=config.get("annotation_span_categories", []),
+ flags=generate_flags(config.get("flags", [])),
+ options=generate_options(config.get("options", [])),
+ text_fields=generate_text_fields(config.get("text_fields", [])),
+ )
+
+ # concatenate with header and footer
+ content = parts[0] + rendered_content + parts[2]
+
+ with open(final_page_path, "w") as f:
+ f.write(content)
+
+ # create a symlink to the page in the templates folder
+ if os.path.exists(symlink_path):
+ os.remove(symlink_path)
+
+ os.symlink(final_page_path, symlink_path)
+
+
+def generate_text_fields(text_fields):
+ if not text_fields:
+ return ""
+
+ text_fields_segment = "
"
+ for i, text_field in enumerate(text_fields):
+ text_fields_segment += f"""
+
+
+
+
+ """
+ text_fields_segment += "
"
+ return text_fields_segment
+
+
+def generate_options(options):
+ if not options:
+ return ""
+
+ options_segment = "
"
+ for i, option in enumerate(options):
+ if option["type"] == "select":
+ options_segment += f"""
+
+
+
+
+ """
+ elif option["type"] == "slider":
+ # option["values"] are textual values to be displayed below the slider
+ options_segment += f"""
+