diff --git a/.gitignore b/.gitignore index cd6949f7..df7cd102 100644 --- a/.gitignore +++ b/.gitignore @@ -125,7 +125,7 @@ celerybeat.pid *.sage.py # Environments -.env +*.env .venv env/ venv/ diff --git a/backend/api/urls.py b/backend/api/urls.py index ff272fb8..d9ebce91 100644 --- a/backend/api/urls.py +++ b/backend/api/urls.py @@ -1,10 +1,14 @@ from django.urls import path from users.views import login, register_user +from documents.views import upload_pdf +from flashcards.views import generate_mock_flashcard from documents.views import create_flashcards urlpatterns = [ path("create-user/", register_user, name="create-user"), path("login/", login, name="login"), + path("upload/", upload_pdf, name="upload"), + path("generate-mock-flashcard/", generate_mock_flashcard, name="generate-mock-flashcards"), path("create-flashcards/", create_flashcards, name="create-flashcards"), ] diff --git a/backend/config.py b/backend/config.py new file mode 100644 index 00000000..7868220c --- /dev/null +++ b/backend/config.py @@ -0,0 +1,18 @@ +import os +from dotenv import load_dotenv + +""" +This module provides the Config class to manage configuration variables +from environment files. It also supports fetching test cases +ifthe configuration is loaded from a test environment file. +""" + +#a class for defining the config variables +class Config(): + def __init__(self, path='config.env', gpt_model="gpt-3.5-turbo"): + self.path = path + self.GPT_MODEL = gpt_model + load_dotenv(dotenv_path=path) + self.API_KEY = os.getenv('OPENAI_API_KEY') + + diff --git a/backend/flashcards/__init__.py b/backend/flashcards/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/flashcards/admin.py b/backend/flashcards/admin.py new file mode 100644 index 00000000..3c33bec3 --- /dev/null +++ b/backend/flashcards/admin.py @@ -0,0 +1,7 @@ +from django.contrib import admin + +from flashcards.models import Flashcard, Cardset + +# Register your models here. +admin.site.register(Flashcard) +admin.site.register(Cardset) \ No newline at end of file diff --git a/backend/flashcards/apps.py b/backend/flashcards/apps.py new file mode 100644 index 00000000..681fc385 --- /dev/null +++ b/backend/flashcards/apps.py @@ -0,0 +1,6 @@ +from django.apps import AppConfig + + +class FlashcardsConfig(AppConfig): + default_auto_field = 'django.db.models.BigAutoField' + name = 'flashcards' diff --git a/backend/flashcards/convert_pdf_to_txt.py b/backend/flashcards/convert_pdf_to_txt.py new file mode 100644 index 00000000..f7dfedb4 --- /dev/null +++ b/backend/flashcards/convert_pdf_to_txt.py @@ -0,0 +1,15 @@ +import os +from pdfminer.high_level import extract_text + +def convert_pdf_to_txt(pdf_file): + """Convert a PDF file to text and return the path to the text file. + + Args: + pdf_file (str/pdf): Path to the PDF file or the PDF file itself. + + Returns: + str: Text content of PDF file. + """ + # Extract text from the PDF file + text = extract_text(pdf_file, codec='utf-8') + return text diff --git a/backend/flashcards/models.py b/backend/flashcards/models.py new file mode 100644 index 00000000..f2b3a7f9 --- /dev/null +++ b/backend/flashcards/models.py @@ -0,0 +1,27 @@ +from django.db import models + +# Create your models here. + +class Flashcard(models.Model): + """Model to store flashcards""" + id = models.AutoField(primary_key=True) + front = models.TextField(help_text="The front of the flashcard") + back = models.TextField(help_text="The back of the flashcard") + cardset = models.ForeignKey( + "Cardset", on_delete=models.CASCADE, help_text="The cardset to which the flashcard belongs" + ) + + def __str__(self): + return self.front + +class Cardset(models.Model): + """Model to store cardsets""" + id = models.AutoField(primary_key=True) + name = models.CharField(max_length=100, help_text="The name of the cardset") + description = models.TextField(help_text="The description of the cardset") + user = models.ForeignKey( + "users.User", on_delete=models.CASCADE, help_text="The user to which the cardset belongs" + ) + + def __str__(self): + return self.name \ No newline at end of file diff --git a/backend/flashcards/test.pdf b/backend/flashcards/test.pdf new file mode 100644 index 00000000..8b6622bd Binary files /dev/null and b/backend/flashcards/test.pdf differ diff --git a/backend/flashcards/tests.py b/backend/flashcards/tests.py new file mode 100644 index 00000000..8cd97e23 --- /dev/null +++ b/backend/flashcards/tests.py @@ -0,0 +1,86 @@ +from django.test import TestCase, Client +from django.core.files.uploadedfile import SimpleUploadedFile + +from flashcards.convert_pdf_to_txt import convert_pdf_to_txt + +import os +from flashcards.models import Cardset, Flashcard +from users.models import User + +# Create your tests here. + + +base = "/api/" + +class ConvertPdfTest(TestCase): + + def setUp(self) -> None: + self.pdf_file_path = os.path.join(os.path.dirname(__file__), 'test.pdf') + + def test_convert_pdf(self): + # Convert the PDF file to text + text = convert_pdf_to_txt(self.pdf_file_path) + # Assert that the returned value is a string + self.assertIsInstance(text, str) + +class GetFlashcardTest(TestCase): + + def setUp(self) -> None: + self.client = Client() + self.url = base + "generate-mock-flashcard/" + + def test_get_flashcards(self): + response = self.client.get(self.url) + self.assertEqual(response.status_code, 200) + + def test_get_flashcards_format(self): + response = self.client.get(self.url) + self.assertEqual(response['Content-Type'], "application/json") + # Check that response is a list + self.assertIsInstance(response.json(), list) + # Check that response items have the correct keys + self.assertIn("front", response.json()[0]) + self.assertIn("back", response.json()[0]) + +class testPersistantFlashcard(TestCase): + + def setUp(self): + # This code will run before each test + self.user = User.objects.create_user(username="testuser", password="12345") + self.valid_username = "username" + + + def tearDown(self): + # This code will run after each test + self.user.delete() + + def test_persistant_cardset(self): + self.assertTrue(Cardset.objects.count() == 0) + + self.cardset = Cardset.objects.create(name="testcardset", description="testcardset", user=self.user) + self.assertTrue(Cardset.objects.count() == 1) + + self.cardset.delete() + + def test_persistant_flashcard(self): + self.assertTrue(Flashcard.objects.count() == 0) + + self.cardset = Cardset.objects.create(name="testcardset", description="testcardset", user=self.user) + self.card1 = Flashcard.objects.create(front="testfront", back="testback", cardset=self.cardset) + self.card2 = Flashcard.objects.create(front="testfront2", back="testback2", cardset=self.cardset) + self.assertTrue(Flashcard.objects.count() == 2) + + self.cardset.delete() + self.card1.delete() + self.card2.delete() + + def test_get_flashcards_from_cardset(self): + self.cardset = Cardset.objects.create(name="testcardset", description="testcardset", user=self.user) + self.assertTrue(self.cardset.flashcard_set.all().count() == 0) + + self.card1 = Flashcard.objects.create(front="testfront", back="testback", cardset=self.cardset) + self.card2 = Flashcard.objects.create(front="testfront2", back="testback2", cardset=self.cardset) + self.assertTrue(self.cardset.flashcard_set.all().count() == 2) + + + \ No newline at end of file diff --git a/backend/flashcards/textToFlashcards.py b/backend/flashcards/textToFlashcards.py new file mode 100644 index 00000000..5b3ac441 --- /dev/null +++ b/backend/flashcards/textToFlashcards.py @@ -0,0 +1,101 @@ +import openai +from config import Config + +api_key = Config().API_KEY + +openai.api_key = api_key +sample_info = "Cristiano Ronaldo dos Santos Aveiro GOIH ComM (Portuguese pronunciation: [kɾiʃˈtjɐnu ʁɔˈnaldu]; born 5 February 1985) is a Portuguese professional footballer who plays as a forward for and captains both Saudi Pro League club Al Nassr and the Portugal national team. Widely regarded as one of the greatest players of all-time, Ronaldo has won five Ballon d'Or awards,[note 3] a record three UEFA Men's Player of the Year Awards, and four European Golden Shoes, the most by a European player." + + +def request_chat_completion(previous_message: dict, role: str = "system", message: str = "", functions: list = []) -> list[str]: + """ + Returns a response from the OpenAI API + + Args: + previous_message (dict): The previous message in the conversation + role (str, optional): The role of the message. Defaults to "system". + message (str, optional): The message to be sent. Defaults to "". + functions (list, optional): The functions to be used. Defaults to []. + + Returns: + response list[str]: The response from the OpenAI API + if empty string, an error has occured + """ + try: + if(not (role == "system" or "user" or "assistant")): + print("Invalid role") + return "" + + if(previous_message): + response = openai.chat.completions.create( + model = "gpt-4", + messages = [ + previous_message, + {"role": role, "content": message} + ], + functions = functions + ) + else: + response = openai.chat.completions.create( + model = "gpt-4", + messages=[ + {"role": role, "content": message}, + ] + ) + return response.choices[0].message.content + + except Exception as error: + print(f"An error has occured while requesting chat completion.") + print(f"The error: {str(error)}") + return "" + +def generate_template(sample_info: str) -> str: + """ + Returns a template with the correct flashcard and prompt format which can be used to generate flashcards using the sample text + """ + example_flashcard = "What is the capital of France? - Paris | Why is is coffe good? - Because it is tasty. | Who was the first man on the moon - Lance Armstrong" + template = f"Generate a set flashcard with this format {example_flashcard} about the most important part of this sample text: {sample_info}. Use only information from the sample text. Use only the format given. " + + return template + +def generate_flashcards(sample_info: str = sample_info) -> str: + """ + Returns a flashcard generated from the sample text + + Args: + sample_info (str): The sample text to be used + + Returns: + str: The flashcard generated from the sample text + """ + template = generate_template(sample_info) + + result = request_chat_completion(None, 'system', template) + result = result.split('|') + + return result +def parse_flashcard(flashcards_data: list[str]) -> list[dict[str, str]]: + """ + Returns a list of dictionaries with the front and back of the flashcard + + Args: + flashcards_data (list[str]): The flashcard to be parsed + + Returns: + list[dict[str, str]]: A list of dictionaries with the front and back of the flashcard + + example: + [{"front": "What is the capital of the USA?", "back": "Washington DC"}, {"front": "What is the capital of France?", "back": "Paris"}] + + """ + parse_flashcard = [] + separator = '-' + + for card in flashcards_data: + card = { + "front": card.split(separator)[0].strip(), + "back": card.split(separator)[1].strip() + } + parse_flashcard.append(card) + + return parse_flashcard diff --git a/backend/flashcards/views.py b/backend/flashcards/views.py new file mode 100644 index 00000000..0faded8f --- /dev/null +++ b/backend/flashcards/views.py @@ -0,0 +1,37 @@ +from django.shortcuts import render +from flashcards.textToFlashcards import generate_flashcards, parse_flashcard + +from rest_framework.decorators import api_view +from rest_framework.response import Response +from rest_framework import status + +from drf_yasg.utils import swagger_auto_schema +from drf_yasg import openapi + +#Flashcard view +get_mock_flashcard_error_response = openapi.Response( + description="Error generating flashcards", + examples={"application/json": {"message": "Error generating flashcards"}}, +) + +get_mock_flashcard_success_response = openapi.Response( + description="Flashcards generated successfully", + examples={"application/json": [{"front": "What is the capital of India?", "back": "New Delhi"}]}, +) + +@swagger_auto_schema( + method="get", + operation_description="Generate flashcards from a given text", + tags=["Flashcards"], + responses={200: get_mock_flashcard_error_response, 400: get_mock_flashcard_error_response}, +) +@api_view(["GET"]) +def generate_mock_flashcard(request): + flashcards = generate_flashcards() + flashcards = parse_flashcard(flashcards) + + if flashcards == "": + return Response( + {"message": "Error generating flashcards"}, status=status.HTTP_400_BAD_REQUEST + ) + return Response(flashcards, status=status.HTTP_200_OK) diff --git a/backend/requirements.txt b/backend/requirements.txt index af3e81a0..a07b40f9 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,17 +1,38 @@ +annotated-types==0.6.0 +anyio==4.2.0 asgiref==3.7.2 +certifi==2023.11.17 +cffi==1.16.0 +charset-normalizer==3.3.2 +colorama==0.4.6 +cryptography==41.0.7 +distro==1.9.0 Django==5.0.1 django-cors-headers==4.3.1 django-rest-framework==0.1.0 djangorestframework==3.14.0 djangorestframework-simplejwt==5.3.1 drf-yasg==1.21.7 +h11==0.14.0 +httpcore==1.0.2 +httpx==0.26.0 +idna==3.6 inflection==0.5.1 +openai==1.6.1 packaging==23.2 +pdfminer.six==20231228 psycopg2==2.9.9 +pycparser==2.21 +pydantic==2.5.3 +pydantic_core==2.14.6 PyJWT==2.8.0 +PyPDF2==1.26.0 +python-dotenv==1.0.0 pytz==2023.3.post1 PyYAML==6.0.1 +sniffio==1.3.0 sqlparse==0.4.4 +tqdm==4.66.1 typing_extensions==4.9.0 tzdata==2023.4 uritemplate==4.1.1 diff --git a/backend/tutorai/settings.py b/backend/tutorai/settings.py index 682ceb19..5211890f 100644 --- a/backend/tutorai/settings.py +++ b/backend/tutorai/settings.py @@ -46,6 +46,7 @@ "api", "users", "documents", + "flashcards", ] MIDDLEWARE = [ diff --git a/backend/users/models.py b/backend/users/models.py index b2dcd8a3..6a78f91f 100644 --- a/backend/users/models.py +++ b/backend/users/models.py @@ -15,6 +15,6 @@ class User(AbstractUser): auto_now=True, help_text="Last time the user fulfilled the daily task" ) streak_count = models.IntegerField(default=0, help_text="Current streak count") - + def __str__(self): return self.username