diff --git a/parley/templates/evaluation/detail.html b/parley/templates/evaluation/detail.html index 726c7bf..56bd2e7 100644 --- a/parley/templates/evaluation/detail.html +++ b/parley/templates/evaluation/detail.html @@ -40,10 +40,10 @@ - + - +
@@ -67,7 +67,7 @@

Models under Evaluation

- {{ model.model.name }} v{{ model.model.version }} + {{ model.model.name }} v{{ model.model.version }}

@@ -132,5 +132,113 @@

- + + +
+
+
+
+

Metrics

+
+
+ +
+
+ + + + + + + + + + + {% for eval in evaluation.model_evaluations.all %} + {% if eval.metrics_cached %} + {% if eval.valid_output_processed %} + + + + + + + {% endif %} + {% if eval.sensitive_processed %} + + + + + + + {% endif %} + {% if eval.similarity_processed %} + + + + + + + {% endif %} + {% if eval.labels_processed %} + + + + + + + {% endif %} + {% if eval.confabulations_processed %} + + + + + + + {% endif %} + {% if eval.readability_processed %} + + + + + + + {% endif %} + {% endif %} + {% empty %} + + + + {% endfor %} + +
EvaluationMetricPositiveNegative
{{ eval.model.name }}Valid Output{{ eval.n_valid_output_type }} ({{ eval.percent_valid_output_type }}%){{ eval.n_invalid_output_type }} ({{ eval.percent_invalid_output_type }}%)
{{ eval.model.name }}Leaks Sensitive{{ eval.n_leaks_sensitive }} ({{ eval.percent_leaks_sensitive }}%){{ eval.n_no_sensitive_leaks }} ({{ eval.percent_no_sensitive_leaks }}%)
{{ eval.model.name }}Similarity to Expected{{ eval.n_similar }} ({{ eval.percent_similar }}%){{ eval.n_not_similar }} ({{ eval.percent_not_similar }}%)
{{ eval.model.name }}Labeled Correctly{{ eval.n_labeled_correctly }} ({{ eval.percent_labeled_correctly }}%){{ eval.n_labeled_incorrectly }} ({{ eval.percent_labeled_incorrectly }}%)
{{ eval.model.name }}Confabulations{{ eval.n_confabulations }} ({{ eval.percent_confabulations }}%){{ eval.n_not_confabulation }} ({{ eval.percent_not_confabulation }}%)
{{ eval.model.name }}Readable Output{{ eval.n_readable }} ({{ eval.percent_readable }}%){{ eval.n_not_readable }} ({{ eval.percent_not_readable }}%)
No Evaluations Run
+
+
+
+
+{% endblock %} + +{% block javascripts %} + {{ block.super }} + {% endblock %} \ No newline at end of file diff --git a/parley/templates/llm/detail.html b/parley/templates/llm/detail.html index 81e202f..b4cd6f3 100644 --- a/parley/templates/llm/detail.html +++ b/parley/templates/llm/detail.html @@ -9,6 +9,10 @@ {% endblock %} {% block page %} +
+ +
+
@@ -90,4 +94,27 @@

Metrics

+{% endblock %} + +{% block javascripts %} + {{ block.super }} + {% endblock %} \ No newline at end of file diff --git a/parley/templates/llm/list.html b/parley/templates/llm/list.html index 7bca56a..05656d7 100644 --- a/parley/templates/llm/list.html +++ b/parley/templates/llm/list.html @@ -1,4 +1,14 @@ {% extends 'page.html' %} {% block page-pretitle %}Overview{% endblock %} -{% block page-title %}Model Under Evaluation{% endblock %} \ No newline at end of file +{% block page-title %}Model Under Evaluation{% endblock %} + +{% block page %} + +{% endblock %} \ No newline at end of file diff --git a/parley/templates/reviews/detail.html b/parley/templates/reviews/detail.html index 4dd25ea..e768d1a 100644 --- a/parley/templates/reviews/detail.html +++ b/parley/templates/reviews/detail.html @@ -78,7 +78,7 @@

- Prompt {{ prompt.order }} + {{ prompt }}