Skip to content

Commit

Permalink
Bar Charts
Browse files Browse the repository at this point in the history
  • Loading branch information
bbengfort committed Oct 8, 2024
1 parent d3ccf9e commit f4b15fc
Show file tree
Hide file tree
Showing 5 changed files with 230 additions and 7 deletions.
116 changes: 112 additions & 4 deletions parley/templates/evaluation/detail.html
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@
</div>

</div>
</div>
</div><!-- evaluation information ends -->


<!-- model evaluations list -->
<!-- model info list -->
<div class="row">
<div class="col-12">

Expand All @@ -67,7 +67,7 @@ <h4 class="card-header-title">Models under Evaluation</h4>
</div>
<div class="col ms-n2">
<h4 class="mb-1 name">
<a href="#!">{{ model.model.name }} <span class="badge text-bg-light">v{{ model.model.version }}</span></a>
<a href="{{ model.model.get_absolute_url }}">{{ model.model.name }} <span class="badge text-bg-light">v{{ model.model.version }}</span></a>
</h4>

<p class="card-text small text-muted mb-1">
Expand Down Expand Up @@ -132,5 +132,113 @@ <h4 class="mb-1 name">
</div>

</div>
</div><!-- prompts list ends -->
</div><!-- model info list ends -->
<!-- model evaluation metrics -->
<div class="row">
<div class="col-12">
<div class="card">
<div class="card-header">
<h4 class="card-header-title">Metrics</h4>
</div>
<div class="chart">
<canvas id="metricsChart" class="chart-canvas"></canvas>
</div>
<div class="table-responsive mb-0">
<table class="table table-sm table-nowrap card-table">
<thead>
<tr>
<th>Evaluation</th>
<th>Metric</th>
<th class="text-center">Positive</th>
<th class="text-center">Negative</th>
</tr>
</thead>
<tbody>
{% for eval in evaluation.model_evaluations.all %}
{% if eval.metrics_cached %}
{% if eval.valid_output_processed %}
<tr>
<td>{{ eval.model.name }}</td>
<td>Valid Output</td>
<td class="text-center">{{ eval.n_valid_output_type }} ({{ eval.percent_valid_output_type }}%)</td>
<td class="text-center">{{ eval.n_invalid_output_type }} ({{ eval.percent_invalid_output_type }}%)</td>
</tr>
{% endif %}
{% if eval.sensitive_processed %}
<tr>
<td>{{ eval.model.name }}</td>
<td>Leaks Sensitive</td>
<td class="text-center">{{ eval.n_leaks_sensitive }} ({{ eval.percent_leaks_sensitive }}%)</td>
<td class="text-center">{{ eval.n_no_sensitive_leaks }} ({{ eval.percent_no_sensitive_leaks }}%)</td>
</tr>
{% endif %}
{% if eval.similarity_processed %}
<tr>
<td>{{ eval.model.name }}</td>
<td>Similarity to Expected</td>
<td class="text-center">{{ eval.n_similar }} ({{ eval.percent_similar }}%)</td>
<td class="text-center">{{ eval.n_not_similar }} ({{ eval.percent_not_similar }}%)</td>
</tr>
{% endif %}
{% if eval.labels_processed %}
<tr>
<td>{{ eval.model.name }}</td>
<td>Labeled Correctly</td>
<td class="text-center">{{ eval.n_labeled_correctly }} ({{ eval.percent_labeled_correctly }}%)</td>
<td class="text-center">{{ eval.n_labeled_incorrectly }} ({{ eval.percent_labeled_incorrectly }}%)</td>
</tr>
{% endif %}
{% if eval.confabulations_processed %}
<tr>
<td>{{ eval.model.name }}</td>
<td>Confabulations</td>
<td class="text-center">{{ eval.n_confabulations }} ({{ eval.percent_confabulations }}%)</td>
<td class="text-center">{{ eval.n_not_confabulation }} ({{ eval.percent_not_confabulation }}%)</td>
</tr>
{% endif %}
{% if eval.readability_processed %}
<tr>
<td>{{ eval.model.name }}</td>
<td>Readable Output</td>
<td class="text-center">{{ eval.n_readable }} ({{ eval.percent_readable }}%)</td>
<td class="text-center">{{ eval.n_not_readable }} ({{ eval.percent_not_readable }}%)</td>
</tr>
{% endif %}
{% endif %}
{% empty %}
<tr>
<td colspan="4">No Evaluations Run</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
</div>
</div><!-- model evaluation metrics ends -->
{% endblock %}

{% block javascripts %}
{{ block.super }}
<script>
new Chart('metricsChart', {
type: 'bar',
options: {
barThickness: 24,
scales: {
y: {
ticks: {
callback: function(value) {
return value + '%';
}
}
}
}
},
data: {
labels: {{ chart.labels|safe }},
datasets: {{ chart.datasets|safe }}
}
});
</script>
{% endblock %}
27 changes: 27 additions & 0 deletions parley/templates/llm/detail.html
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
{% endblock %}

{% block page %}
<div class="chart">
<canvas id="metricsChart" class="chart-canvas"></canvas>
</div>

<!-- simple metrics table -->
<div class="row">
<div class="col-12">
Expand Down Expand Up @@ -90,4 +94,27 @@ <h4 class="card-header-title">Metrics</h4>
</div>
</div>
</div><!-- metrics table ends -->
{% endblock %}

{% block javascripts %}
{{ block.super }}
<script>
new Chart('metricsChart', {
type: 'bar',
options: {
barThickness: 24,
},
data: {
labels: {{ chart.labels|safe }},
datasets: [{
label: 'Positive',
data: {{ chart.positive|safe }}
}, {
label: 'Negative',
data: {{ chart.negative|safe }},
backgroundColor: '#d2ddec',
}]
}
});
</script>
{% endblock %}
12 changes: 11 additions & 1 deletion parley/templates/llm/list.html
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
{% extends 'page.html' %}

{% block page-pretitle %}Overview{% endblock %}
{% block page-title %}Model Under Evaluation{% endblock %}
{% block page-title %}Model Under Evaluation{% endblock %}

{% block page %}
<ul>
{% for llm in llms %}
<li>
<a href="{{ llm.get_absolute_url }}">{{ llm.name }}</a>
</li>
{% endfor %}
</ul>
{% endblock %}
2 changes: 1 addition & 1 deletion parley/templates/reviews/detail.html
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ <h4 class="mt-2 mb-2">
<div class="card" id="prompt">
<div class="card-header">
<h4 class="card-header-title">
Prompt {{ prompt.order }}
{{ prompt }}
</h4>
<button class="btn btn-sm btn-secondary" data-bs-toggle="collapse" data-bs-target="#promptText">
Toggle
Expand Down
80 changes: 79 additions & 1 deletion parley/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

import json

from collections import defaultdict

from django.views import View
from django.db import transaction
from django.contrib import messages
Expand All @@ -34,6 +36,32 @@
from parley.models import LLM, Response, Evaluation, Prompt, ReviewTask


CHART_METRICS = {
"Similarity": ("similarity_processed", "n_is_similar", "n_not_similar"),
"Correct Label": (
"labels_processed",
"n_labeled_correctly",
"n_labeled_incorrectly",
),
"Valid Output": (
"valid_output_processed",
"n_valid_output_type",
"n_invalid_output_type",
),
"Leaks Sensitive": (
"sensitive_processed",
"n_leaks_sensitive",
"n_no_sensitive_leaks",
),
"Confabulations": (
"confabulations_processed",
"n_confabulations",
"n_not_confabulation",
),
"Is Readable": ("readability_processed", "n_readable", "n_not_readable"),
}


##########################################################################
## Views
##########################################################################
Expand Down Expand Up @@ -95,9 +123,33 @@ class EvaluationDetail(DetailView):
template_name = "evaluation/detail.html"
context_object_name = "evaluation"

def get_chart_data(self):
# Create chart data
chart = {"labels": [], "datasets": []}

for me in self.object.model_evaluations.all():
chart["labels"].append(me.model.name)
for metric, (has_metric, pos, _) in CHART_METRICS.items():
# TODO: this could produce lopsided graphs
if not getattr(me, has_metric):
continue

for ds in chart["datasets"]:
if ds["label"] == metric:
break
else:
ds = {"label": metric, "data": []}
chart["datasets"].append(ds)

ds["data"].append(getattr(me, "percent_"+pos.removeprefix("n_"), 0))

# Convert to chart data
return {key: json.dumps(val) for key, val in chart.items()}

def get_context_data(self, **kwargs):
context = super().get_context_data(**kwargs)
context["page_id"] = "evaluation"
context["chart"] = self.get_chart_data()
return context


Expand Down Expand Up @@ -140,7 +192,7 @@ class LLMList(ListView):

model = LLM
template_name = "llm/list.html"
context_object_name = "llm"
context_object_name = "llms"

def get_context_data(self, **kwargs):
context = super().get_context_data(**kwargs)
Expand All @@ -154,9 +206,35 @@ class LLMDetail(DetailView):
template_name = "llm/detail.html"
context_object_name = "llm"

def get_chart_data(self):
# Perform Aggregation
counts = defaultdict(lambda: defaultdict(int))
for me in self.object.model_evaluations.all():
for metric, (has_metric, pos, neg) in CHART_METRICS.items():
if getattr(me, has_metric):
counts[metric]["pos"] += getattr(me, pos)
counts[metric]["neg"] += getattr(me, neg)

# Convert to chart data
data = {
"labels": [],
"positive": [],
"negative": []
}

for metric, count in counts.items():
data["labels"].append(metric)
data["positive"].append(count["pos"])
data["negative"].append(count["neg"])

return {
key: json.dumps(val) for key, val in data.items()
}

def get_context_data(self, **kwargs):
context = super().get_context_data(**kwargs)
context["page_id"] = "model"
context["chart"] = self.get_chart_data()
return context


Expand Down

0 comments on commit f4b15fc

Please sign in to comment.