Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Stress Testing Throughput for Club Submission Applications #606

Draft
wants to merge 12 commits into
base: master
Choose a base branch
from
173 changes: 173 additions & 0 deletions backend/clubs/management/commands/stress_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
import asyncio
import datetime
import logging
import random
import time

from asgiref.sync import sync_to_async
from django.contrib.auth import get_user_model
from django.core.management.base import BaseCommand
from django.db.models import Prefetch
from django.utils import timezone
from rest_framework.test import APIRequestFactory

from clubs.models import ApplicationQuestion, Club, ClubApplication
from clubs.views import UserViewSet


class Command(BaseCommand):
help = """
Runs stress tests on Penn Clubs database to assess performance
upgrades on club submissions.
"""

def setUp(self):
self.num_clubs = 10
self.num_users = 10
self.subset_size = 1
self.num_questions_per_club = 2
self.total_submissions = 1
self.club_prefix = "test_club_"
self.user_prefix = "test_user_"

self.uri = "/users/question_response/"
self.factory = APIRequestFactory()
self.view = UserViewSet.as_view({"post": "question_response"})
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will create objects for real right? So we want to tear down?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah the tearDown() function handles that, but if it needs to be more explicit I can do that.


self.club_question_ids = {}
self.users = []
now = timezone.now()

# Create Clubs
clubs = [
Club(code=(self.club_prefix + str(i)), name=(f"Test Club {i}"))
for i in range(self.num_clubs)
]
Club.objects.bulk_create(clubs)
clubs = Club.objects.filter(code__startswith=self.club_prefix)

# Create Club Applications
applications = [
ClubApplication(
name="Test Application",
club=club,
application_start_time=now - datetime.timedelta(days=1),
application_end_time=now + datetime.timedelta(days=3),
result_release_time=now + datetime.timedelta(weeks=1),
external_url="https://pennlabs.org/",
)
for club in clubs
]
ClubApplication.objects.bulk_create(applications)
applications = ClubApplication.objects.filter(club__in=clubs)

# Create Club Application Questions
questions = [
ApplicationQuestion(
question_type=ApplicationQuestion.FREE_RESPONSE,
prompt="Answer the prompt you selected",
word_limit=150,
application=application,
)
for _ in range(self.num_questions_per_club)
for application in applications
]
ApplicationQuestion.objects.bulk_create(questions)

clubs_data = Club.objects.filter(
code__startswith=self.club_prefix
).prefetch_related(
Prefetch(
"clubapplication_set",
queryset=ClubApplication.objects.prefetch_related("questions"),
)
)
for club in clubs_data:
question_ids = [
str(question.id)
for application in club.clubapplication_set.all()
for question in application.questions.all()
]
self.club_question_ids[club.id] = question_ids
print("Finished setting up clubs.")

# Create Users (Applicants)
User = get_user_model()
User.objects.bulk_create(
[
User(
username=self.user_prefix + str(i),
email=str(i) + "@upenn.edu",
password="test",
)
for i in range(self.num_users)
]
)
self.users = list(User.objects.filter(username__startswith=self.user_prefix))
print("Finished setting up users.")

@sync_to_async
Copy link
Member

@rohangpta rohangpta Jan 24, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This decorator is misleading. All this is doing is taking your function and convering it to an awaitable. It unfortunately doesn't make your code "async for free". In fact, with thread_sensitive = True (which is the default behaviour), I believe these functions will all run in the main thread, and probably even slower then usual since it goes through the overhead of "asyncifying". Some more information here

It isn't worth changing it back since this is just a test but I thought I should let you know about this behaviour. IIf you wanted better async, you might want to use httpx or something like that (I think newer versions of Django support Async more natively).

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I was starting to notice that in the end. I originally was designing it as sending HTTP requests and was going to use httpx but then the mocking seemed like a better option. Thanks for sharing!

def submit_application(self, user, club_id):
start_time = time.time()
data = {
"questionIds": self.club_question_ids[club_id],
}
for question_id in self.club_question_ids[club_id]:
data[question_id] = {"text": "This is a test answer."}

request = self.factory.post(self.uri, data, format="json")
request.user = user

self.view(request)
end_time = time.time()
return end_time - start_time

def tearDown(self):
Club.objects.filter(code__startswith=self.club_prefix).delete()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might have stray ClubApplication, ApplicationSubmission, etc objects which you'll need to tear down

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Checking from the models, this should all be handled with on_delete behavior? It should form separate "trees" with roots of Clubs and Users.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, this makes sense. Please do double check empirically though; we should definitely avoid excess footprint on the DB, especially if we test in prod multiple times.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah just double checked, no rows are left over. The only concern I guess could be the autoincrementing index and offsetting that by the number of test instances. Should not be an issue at all, but if we want full isolation, we could create a separate database on the same instance.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this should be fine! Thank you for double checking

for user in self.users:
user.delete()

async def handleAsync(self, *args, **kwargs):
random.seed(0)

# Performance Testing!
# Randomly choose some subset of clubs (size: clubs_per_user).
# Then apply to them 3 times each in a random order.
user_application_pairs = []
club_keys = list(self.club_question_ids.keys())

for user in self.users:
sample = random.sample(club_keys, self.subset_size)
for club_id in sample:
user_application_pairs.extend(
[(user, club_id)] * self.total_submissions
)
random.shuffle(user_application_pairs)
print("Finished generating and shuffling pairs.")

print("Starting Stress Test.")
start_time = time.time()
tasks = []
for i in range(len(user_application_pairs)):
task = asyncio.create_task(
self.submit_application(
user_application_pairs[i][0], user_application_pairs[i][1]
)
)
tasks.append(task)
all_tasks = await asyncio.gather(*tasks, return_exceptions=True)
print(all_tasks)
end_time = time.time()

print(f"Throughput was: {sum(all_tasks) / len(all_tasks)} seconds per txn.")
print(f"Total processing time was: {end_time - start_time} seconds.")

def handle(self, *args, **kwargs):
self.setUp()
try:
asyncio.run(self.handleAsync(args, kwargs))
self.tearDown()
except Exception as e:
print(e)
logging.exception("Something happened!")
self.tearDown()
Loading