From c20cf4bde1fcc69f5fd257c022503dbbdef8cbd9 Mon Sep 17 00:00:00 2001 From: Benjamin Bengfort Date: Sun, 6 Oct 2024 21:38:25 -0500 Subject: [PATCH 1/3] Review Task Views --- parlance/urls.py | 11 +++-- parlance/views.py | 6 ++- parley/migrations/0001_initial.py | 4 +- parley/models/llm.py | 3 ++ parley/models/user.py | 23 ++++++++++- parley/templates/evaluation/detail.html | 4 ++ parley/templates/reviews/detail.html | 4 ++ parley/views.py | 14 ++++++- templates/site/dashboard.html | 54 +++++++++++++++++-------- 9 files changed, 98 insertions(+), 25 deletions(-) create mode 100644 parley/templates/reviews/detail.html diff --git a/parlance/urls.py b/parlance/urls.py index c7b8672..390bb28 100644 --- a/parlance/urls.py +++ b/parlance/urls.py @@ -33,9 +33,10 @@ from django.contrib import admin from django.urls import path, include +from parley.views import ReviewTaskDetail from parley.views import LLMList, LLMDetail -from parley.views import UploaderFormView, CreateReviewTask, ResponseDetail from parlance.views import Dashboard, AccountSettings, AccountProfile +from parley.views import UploaderFormView, CreateReviewTask, ResponseDetail from parley.views import EvaluationList, EvaluationDetail, DownloadPrompts @@ -47,17 +48,21 @@ # Application Pages path("", Dashboard.as_view(), name="dashboard"), path("upload/", UploaderFormView.as_view(), name="upload"), + path("account/profile", AccountProfile.as_view(), name="account-profile"), path("account/settings", AccountSettings.as_view(), name="account-settings"), + path("evaluations/", EvaluationList.as_view(), name="evaluations-list"), path("evaluations/", EvaluationDetail.as_view(), name="evaluation-detail"), path("evaluations//download", DownloadPrompts.as_view(), name="evaluation-download"), + path("evaluations/create-review-task", CreateReviewTask.as_view(), name="create-review-task"), + + path("reviews/", ReviewTaskDetail.as_view(), name="review-task"), + path("models/", LLMList.as_view(), name="llms-list"), path("models/", LLMDetail.as_view(), name="llm-detail"), path("responses/", ResponseDetail.as_view(), name="response-detail"), - path("evaluations/create-review-task", CreateReviewTask.as_view(), name="create-review-task"), - # Admin URLs path("admin/", admin.site.urls), diff --git a/parlance/views.py b/parlance/views.py index 2023cbc..7ae34bd 100644 --- a/parlance/views.py +++ b/parlance/views.py @@ -19,7 +19,7 @@ from django.shortcuts import render from django.views.generic import TemplateView -from parley.models import LLM, Evaluation, Prompt, Response +from parley.models import LLM, Evaluation, Prompt, Response, ReviewTask ########################################################################## @@ -41,7 +41,9 @@ def get_context_data(self, **kwargs): context["n_responses"] = Response.objects.count() # Get evaluations and models - context["evaluations"] = Evaluation.objects.filter(active=True) + context["evaluations"] = ReviewTask.objects.filter( + completed_on=None, user=self.request.user + ) context["llms"] = LLM.objects.all()[:20] return context diff --git a/parley/migrations/0001_initial.py b/parley/migrations/0001_initial.py index 5e560fe..885308f 100644 --- a/parley/migrations/0001_initial.py +++ b/parley/migrations/0001_initial.py @@ -1,4 +1,4 @@ -# Generated by Django 5.1.1 on 2024-10-07 01:00 +# Generated by Django 5.1.1 on 2024-10-07 01:54 import django.db.models.deletion import parley.models.llm @@ -814,6 +814,7 @@ class Migration(migrations.Migration): ( "started_on", models.DateTimeField( + blank=True, default=None, help_text="The timestamp that the review was start on, null if not started", null=True, @@ -822,6 +823,7 @@ class Migration(migrations.Migration): ( "completed_on", models.DateTimeField( + blank=True, default=None, help_text="The timestamp that the review was completed, null if not completed", null=True, diff --git a/parley/models/llm.py b/parley/models/llm.py index 92ed17b..9ec3a1f 100644 --- a/parley/models/llm.py +++ b/parley/models/llm.py @@ -282,6 +282,9 @@ class Meta: def image(self): return self.model.image + def prompts(self): + return self.evaluation.prompts.filter(exclude=False) + def responses(self): return Response.objects.filter( model=self.model, prompt__evaluation=self.evaluation diff --git a/parley/models/user.py b/parley/models/user.py index 6d2eaf9..fb66712 100644 --- a/parley/models/user.py +++ b/parley/models/user.py @@ -20,6 +20,7 @@ from .base import TimestampedModel from django.db import models +from django.urls import reverse ########################################################################## @@ -49,12 +50,12 @@ class ReviewTask(TimestampedModel): ) started_on = models.DateTimeField( - null=True, default=None, + null=True, default=None, blank=True, help_text="The timestamp that the review was start on, null if not started" ) completed_on = models.DateTimeField( - null=True, default=None, + null=True, default=None, blank=True, help_text="The timestamp that the review was completed, null if not completed", ) @@ -64,6 +65,10 @@ class Meta: get_latest_by = "created" unique_together = ("user", "evaluation") + @property + def task(self): + return self.evaluation.evaluation.task + @property def is_started(self): return self.started_on is not None @@ -72,6 +77,20 @@ def is_started(self): def is_completed(self): return self.completed_on is not None + @property + def percent_complete(self): + n_prompts = self.evaluation.prompts().count() + if n_prompts == 0: + return 0 + n_reviews = self.response_reviews.count() + return int((float(n_reviews) / float(n_prompts)) * 100) + + def __str__(self): + return f"{self.evaluation.evaluation.name} ({self.evaluation.model.name})" + + def get_absolute_url(self): + return reverse("review-task", args=(self.id,)) + class ResponseReview(TimestampedModel): diff --git a/parley/templates/evaluation/detail.html b/parley/templates/evaluation/detail.html index 29666a1..f67d6b8 100644 --- a/parley/templates/evaluation/detail.html +++ b/parley/templates/evaluation/detail.html @@ -82,9 +82,13 @@

{% get_review_task user model as review_task %} {% if review_task %} + {% if review_task.is_completed %} + Review Complete! + {% else %} Continue Review + {% endif %} {% else %}
diff --git a/parley/templates/reviews/detail.html b/parley/templates/reviews/detail.html new file mode 100644 index 0000000..0a6d509 --- /dev/null +++ b/parley/templates/reviews/detail.html @@ -0,0 +1,4 @@ +{% extends 'page.html' %} + +{% block page-pretitle %}Reviews{% endblock %} +{% block page-title %}{{ review }}{% endblock %} \ No newline at end of file diff --git a/parley/views.py b/parley/views.py index 1015e52..c64ff4a 100644 --- a/parley/views.py +++ b/parley/views.py @@ -31,7 +31,7 @@ from parley.exceptions import ParlanceUploadError from parley.forms import Uploader, CreateReviewForm -from parley.models import LLM, Response, Evaluation, Prompt +from parley.models import LLM, Response, Evaluation, Prompt, ReviewTask ########################################################################## @@ -194,3 +194,15 @@ def form_invalid(self, form): def get(self, *args, **kwargs): return HttpResponseNotAllowed(['POST']) + + +class ReviewTaskDetail(DetailView): + + model = ReviewTask + template_name = "reviews/detail.html" + context_object_name = "review" + + def get_context_data(self, **kwargs): + context = super().get_context_data(**kwargs) + context["page_id"] = "review" + return context diff --git a/templates/site/dashboard.html b/templates/site/dashboard.html index aaaa34d..a34e865 100644 --- a/templates/site/dashboard.html +++ b/templates/site/dashboard.html @@ -28,7 +28,7 @@

- Active Evaluations + Pending Review Tasks

@@ -39,7 +39,7 @@

- + @@ -49,7 +49,7 @@

- {{ evaluation.name }} + {{ evaluation }}

@@ -57,17 +57,26 @@

{{ evaluation.task }}

- -

- {{ evaluation.prompts.count }} prompts -

+ + {% with pcent=evaluation.percent_complete %} +
+
+
{{ pcent }}%
+
+
+
+
+
+
+
+ {% endwith %}

@@ -79,14 +88,14 @@

@@ -96,7 +105,20 @@

{% empty %}
  • -

    No Active Review Tasks

    +

    + You have no pending review tasks! +

    +

    + To begin a review, go to the + Evaluations page, and select + an evaluation that has at least one model associated with it. When you + reach the evaluation detail page, click on the "Begin Review" button to + start an evaluation. +

    +

    + If all models have been marked as reviewed — congratulations, you + are an expert reviewer! Let's train a new model and get evaluating! +

  • {% endfor %} From 9a65393fd5eecee98442f1209a3e15346906184b Mon Sep 17 00:00:00 2001 From: Benjamin Bengfort Date: Sun, 6 Oct 2024 21:54:37 -0500 Subject: [PATCH 2/3] signals --- parley/models/user.py | 2 +- parley/signals.py | 36 ++++++++++++++++++++++++- parley/templates/evaluation/detail.html | 20 ++++++++++++++ 3 files changed, 56 insertions(+), 2 deletions(-) diff --git a/parley/models/user.py b/parley/models/user.py index fb66712..8e6a06c 100644 --- a/parley/models/user.py +++ b/parley/models/user.py @@ -106,7 +106,7 @@ class ResponseReview(TimestampedModel): 'parley.Response', null=False, on_delete=models.CASCADE, - related_name=("reviews"), + related_name="reviews", ) output_correct = models.BooleanField( diff --git a/parley/signals.py b/parley/signals.py index 8d0ac59..97f7c6a 100644 --- a/parley/signals.py +++ b/parley/signals.py @@ -17,10 +17,11 @@ ## Imports ########################################################################## +from django.utils import timezone from django.dispatch import receiver from django.db.models.signals import post_save, post_delete -from parley.models import Response, ModelEvaluation +from parley.models import Response, ModelEvaluation, ResponseReview ########################################################################## @@ -61,3 +62,36 @@ def model_evaluation_unlink(sender, instance, *args, **kwargs): ModelEvaluation.objects.get(**kwargs).delete() except ModelEvaluation.DoesNotExist: pass + + +########################################################################## +## Ensure ReviewTasks are +########################################################################## + +@receiver(post_save, sender=ResponseReview, dispatch_uid="check_review_task_completion") +def check_review_task_completion(sender, instance, created, *args, **kwargs): + task = instance.review + changed = False + + if not task.started_on: + task.started_on = timezone.localtime() + changed = True + + if not task.completed_on: + n_prompts = task.evaluation.prompts().count() + if n_prompts == 0 or task.reviews.count() == n_prompts: + task.completed_on = timezone.localtime() + changed = True + + if changed: + task.save() + + +@receiver(post_delete, sender=ResponseReview, dispatch_uid="check_review_task_unfinished") +def check_review_task_unfinished(sender, instance, created, *args, **kwargs): + task = instance.review + if task.completed_on: + n_prompts = task.evaluation.prompts().count() + if n_prompts > 0 and task.reviews.count() < n_prompts: + task.completed_on = None + task.save() diff --git a/parley/templates/evaluation/detail.html b/parley/templates/evaluation/detail.html index f67d6b8..cf70f24 100644 --- a/parley/templates/evaluation/detail.html +++ b/parley/templates/evaluation/detail.html @@ -100,6 +100,26 @@

    {% endif %}

    +
    + + + + +
    {% empty %} From a5cafba036fe1eddfc0c79c8f78f27ce8d4ce7fb Mon Sep 17 00:00:00 2001 From: Benjamin Bengfort Date: Sun, 6 Oct 2024 22:26:28 -0500 Subject: [PATCH 3/3] fix signals --- parley/forms.py | 8 ++++++-- parley/migrations/0001_initial.py | 8 ++++---- parley/models/llm.py | 2 +- parley/models/user.py | 19 +++++++++++++------ parley/signals.py | 23 ++++++++++++++++++----- parley/templatetags/parlance.py | 2 +- templates/site/dashboard.html | 4 ++-- 7 files changed, 45 insertions(+), 21 deletions(-) diff --git a/parley/forms.py b/parley/forms.py index 04d15e3..6c238c6 100644 --- a/parley/forms.py +++ b/parley/forms.py @@ -187,8 +187,12 @@ class CreateReviewForm(forms.Form): def save(self): try: ReviewTask.objects.create( - user=User.objects.get(pk=self.cleaned_data["user"]), - evaluation=ModelEvaluation.objects.get(pk=self.cleaned_data["evaluation"]), + user=User.objects.get( + pk=self.cleaned_data["user"] + ), + model_evaluation=ModelEvaluation.objects.get( + pk=self.cleaned_data["evaluation"] + ), ) except (User.DoesNotExist, ModelEvaluation.DoesNotExist): return diff --git a/parley/migrations/0001_initial.py b/parley/migrations/0001_initial.py index 885308f..39e2dbd 100644 --- a/parley/migrations/0001_initial.py +++ b/parley/migrations/0001_initial.py @@ -1,4 +1,4 @@ -# Generated by Django 5.1.1 on 2024-10-07 01:54 +# Generated by Django 5.1.1 on 2024-10-07 03:15 import django.db.models.deletion import parley.models.llm @@ -704,7 +704,7 @@ class Migration(migrations.Migration): "verbose_name": "response", "verbose_name_plural": "responses", "db_table": "responses", - "ordering": ("-created",), + "ordering": ("created",), "get_latest_by": "created", }, ), @@ -830,7 +830,7 @@ class Migration(migrations.Migration): ), ), ( - "evaluation", + "model_evaluation", models.ForeignKey( help_text="The evaluation the user is performing", on_delete=django.db.models.deletion.CASCADE, @@ -858,7 +858,7 @@ class Migration(migrations.Migration): "db_table": "review_tasks", "ordering": ("-created",), "get_latest_by": "created", - "unique_together": {("user", "evaluation")}, + "unique_together": {("user", "model_evaluation")}, }, ), migrations.AddField( diff --git a/parley/models/llm.py b/parley/models/llm.py index 9ec3a1f..a8fa433 100644 --- a/parley/models/llm.py +++ b/parley/models/llm.py @@ -426,7 +426,7 @@ class Response(BaseModel): class Meta: db_table = "responses" - ordering = ("-created",) + ordering = ("created",) get_latest_by = "created" verbose_name = "response" verbose_name_plural = "responses" diff --git a/parley/models/user.py b/parley/models/user.py index 8e6a06c..d31d346 100644 --- a/parley/models/user.py +++ b/parley/models/user.py @@ -37,7 +37,7 @@ class ReviewTask(TimestampedModel): help_text="The user that is conducting the evaluation", ) - evaluation = models.ForeignKey( + model_evaluation = models.ForeignKey( "parley.ModelEvaluation", null=False, on_delete=models.CASCADE, @@ -63,11 +63,18 @@ class Meta: db_table = "review_tasks" ordering = ("-created",) get_latest_by = "created" - unique_together = ("user", "evaluation") + unique_together = ("user", "model_evaluation") @property - def task(self): - return self.evaluation.evaluation.task + def evaluation(self): + return self.model_evaluation.evaluation + + @property + def model(self): + return self.model_evaluation.model + + def prompts(self): + return self.model_evaluation.prompts() @property def is_started(self): @@ -79,14 +86,14 @@ def is_completed(self): @property def percent_complete(self): - n_prompts = self.evaluation.prompts().count() + n_prompts = self.prompts().count() if n_prompts == 0: return 0 n_reviews = self.response_reviews.count() return int((float(n_reviews) / float(n_prompts)) * 100) def __str__(self): - return f"{self.evaluation.evaluation.name} ({self.evaluation.model.name})" + return f"{self.evaluation.name} ({self.model.name})" def get_absolute_url(self): return reverse("review-task", args=(self.id,)) diff --git a/parley/signals.py b/parley/signals.py index 97f7c6a..39e46db 100644 --- a/parley/signals.py +++ b/parley/signals.py @@ -78,8 +78,8 @@ def check_review_task_completion(sender, instance, created, *args, **kwargs): changed = True if not task.completed_on: - n_prompts = task.evaluation.prompts().count() - if n_prompts == 0 or task.reviews.count() == n_prompts: + n_prompts = task.prompts().count() + if n_prompts == 0 or task.response_reviews.count() == n_prompts: task.completed_on = timezone.localtime() changed = True @@ -88,10 +88,23 @@ def check_review_task_completion(sender, instance, created, *args, **kwargs): @receiver(post_delete, sender=ResponseReview, dispatch_uid="check_review_task_unfinished") -def check_review_task_unfinished(sender, instance, created, *args, **kwargs): +def check_review_task_unfinished(sender, instance, *args, **kwargs): task = instance.review if task.completed_on: - n_prompts = task.evaluation.prompts().count() - if n_prompts > 0 and task.reviews.count() < n_prompts: + n_prompts = task.prompts().count() + if n_prompts == 0: + return + + n_reviews = task.response_reviews.count() + changed = False + + if n_reviews < n_prompts: task.completed_on = None + changed = True + + if n_reviews == 0: + task.started_on = None + changed = True + + if changed: task.save() diff --git a/parley/templatetags/parlance.py b/parley/templatetags/parlance.py index 376cc3c..aa59b8f 100644 --- a/parley/templatetags/parlance.py +++ b/parley/templatetags/parlance.py @@ -46,6 +46,6 @@ def alert_level(level): @register.simple_tag() def get_review_task(user, evaluation): try: - return ReviewTask.objects.get(user=user, evaluation=evaluation) + return ReviewTask.objects.get(user=user, model_evaluation=evaluation) except ReviewTask.DoesNotExist: return None diff --git a/templates/site/dashboard.html b/templates/site/dashboard.html index a34e865..fb6820e 100644 --- a/templates/site/dashboard.html +++ b/templates/site/dashboard.html @@ -91,10 +91,10 @@

    Edit in CMS - + View Evaluation - + View Model