Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

import course staff management command #255

Merged
merged 1 commit into from
Apr 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
Empty file.
93 changes: 93 additions & 0 deletions edx_exams/apps/core/management/commands/bulk_add_course_staff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
"""Management command to populate the CourseStaffRole model and related User objects, from LMS, using CSV"""
import csv
import logging
import time

from django.core.management.base import BaseCommand
from django.db import transaction

from edx_exams.apps.core.models import CourseStaffRole, User

logger = logging.getLogger(__name__)


class Command(BaseCommand):
"""
Management command to add Course Staff (and User) in batches from CSV
"""
help = """
Add Course Staff in bulk from CSV.
Expects that the data will be provided in a csv file format with the first row
being the header and columns being: username, email, role, course_id.
Examples:
$ ... bulk_add_course_staff --csv_path=foo.csv
$ ... bulk_add_course_staff --csv_path=foo.csv --batch_size=100 --batch_delay=2
"""

def add_arguments(self, parser):
parser.add_argument(
varshamenon4 marked this conversation as resolved.
Show resolved Hide resolved
'-p', '--csv_path',
metavar='csv_path',
dest='csv_path',
required=True,
help='Path to CSV file.')
parser.add_argument(
'--batch_size',
type=int,
default=200,
dest='batch_size',
help='Batch size')
parser.add_argument(
'--batch_delay',
type=float,
default=1.0,
dest='batch_delay',
help='Time delay in seconds for each batch')

@transaction.atomic
def handle(self, *args, **options):
"""
The main logic and entry point of the management command
"""
csv_path = options['csv_path']
batch_size = options['batch_size']
batch_delay = options['batch_delay']

with open(csv_path, 'r') as csv_file:
self.add_course_staff_from_csv(csv_file, batch_size, batch_delay)

logger.info('Bulk add course staff complete!')

def add_course_staff_from_csv(self, csv_file, batch_size, batch_delay):
"""
Add the given set of course staff provided in csv
"""
reader = list(csv.DictReader(csv_file))
users_to_create = []
users_existing = {u.username for u in User.objects.filter(username__in=[r.get('username') for r in reader])}
varshamenon4 marked this conversation as resolved.
Show resolved Hide resolved
for row in reader:
if row.get('username') not in users_existing:
users_to_create.append(row)

# bulk create users
for i in range(0, len(users_to_create), batch_size):
User.objects.bulk_create(
User(
username=user.get('username'),
email=user.get('email'),
)
for user in users_to_create[i:i + batch_size]
)
time.sleep(batch_delay)

# bulk create course staff
for i in range(0, len(reader), batch_size):
CourseStaffRole.objects.bulk_create(
CourseStaffRole(
user=User.objects.get(username=row.get('username')),
course_id=row.get('course_id'),
role=row.get('role'),
)
for row in reader[i:i + batch_size]
)
time.sleep(batch_delay)
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
""" Tests for bulk_add_course_staff management command """
from tempfile import NamedTemporaryFile

from django.core.management import call_command
from django.test import TestCase

from edx_exams.apps.core.models import CourseStaffRole, User
from edx_exams.apps.core.test_utils.factories import UserFactory


class TestBulkAddCourseStaff(TestCase):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be worth adding a test here that ensures this command produces the expected number or queries w/ assertNumQueries. example:

with self.assertNumQueries(4):

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That makes sense! When I used this method, I see that there are a number of calls: https://github.com/edx/edx-exams/actions/runs/8394389300/job/22991407617?pr=255. Is it better to just check relative number of queries (for example, that batch size affects the number of queries for example) or is there a better way to finetune? I couldn't figure out a good way to constrain to only a certain type of query (for example, just the bulk query). Was poking around here: https://docs.djangoproject.com/en/5.0/topics/testing/tools/

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can probably just make this it's own test rather than baking it into the others as an extra assertion. For that one I'd try a bunch of extra rows to ensure the query count has not increased in an unexpected way based on more data.

For getting the expected number you should be able to walk though the code and count things up for what you expect. If it's different, it would be good to know why or where the extra query is coming from. There's a few ways to debug django queries but an easy one is to use

from django.db import connection
print(connection.queries)

""" Test bulk_add_course_staff management command """

def setUp(self):
super().setUp()
self.command = 'bulk_add_course_staff'
self.success_log_message = 'Bulk add course staff complete!'

# create existing user
self.user = UserFactory.create(
username='amy',
is_active=True,
is_staff=True,
)
self.user.email = '[email protected]'
self.user.save()

self.course_id = 'course-v1:edx+test+f19'
self.course_role = 'staff'

def _write_test_csv(self, csv, lines):
""" Write a test csv file with the lines provided """
csv.write(b'username,email,role,course_id\n')
for line in lines:
csv.write(line.encode())
csv.seek(0)
return csv

def _assert_user_and_role(self, username, email, course_role, course_id):
""" Helper that asserts that User and CourseStaffRole are created """
user = User.objects.filter(username=username, email=email)
assert user.exists()
assert CourseStaffRole.objects.filter(
user=user[0].id,
course_id=course_id,
role=course_role,
).exists()

def test_empty_csv(self):
lines = []
with NamedTemporaryFile() as csv:
csv = self._write_test_csv(csv, lines)
call_command(self.command, f'--csv_path={csv.name}')
varshamenon4 marked this conversation as resolved.
Show resolved Hide resolved
assert not CourseStaffRole.objects.filter(
user=self.user.id,
course_id=self.course_id,
role=self.course_role,
).exists()

def test_add_course_staff_with_existing_user(self):
lines = [f'{self.user.username},{self.user.email},{self.course_role},{self.course_id}\n']
with NamedTemporaryFile() as csv:
csv = self._write_test_csv(csv, lines)
call_command(self.command, f'--csv_path={csv.name}')
self._assert_user_and_role(self.user.username, self.user.email, self.course_role, self.course_id)

def test_add_course_staff_with_new_user(self):
username, email = 'pam', '[email protected]'
lines = [f'{username},{email},{self.course_role},{self.course_id}\n']
with NamedTemporaryFile() as csv:
csv = self._write_test_csv(csv, lines)
call_command(self.command, f'--csv_path={csv.name}')
self._assert_user_and_role(username, email, self.course_role, self.course_id)

def test_add_course_staff_multiple(self):
""" Assert that the course staff role is correct given multiple lines """
username, email = 'pam', '[email protected]'
username2, email2 = 'cam', '[email protected]'
lines = [f'{username},{email},{self.course_role},{self.course_id}\n',
f'{username2},{email2},{self.course_role},{self.course_id}\n']
with NamedTemporaryFile() as csv:
csv = self._write_test_csv(csv, lines)
call_command(self.command, f'--csv_path={csv.name}')
self._assert_user_and_role(username, email, self.course_role, self.course_id)
self._assert_user_and_role(username2, email2, self.course_role, self.course_id)

def test_add_course_staff_with_not_default_batch_size(self):
""" Assert that the number of queries is correct given 2 batches """
lines = ['pam,[email protected],staff,course-v1:edx+test+f20\n',
'sam,[email protected],staff,course-v1:edx+test+f20\n']
with NamedTemporaryFile() as csv:
csv = self._write_test_csv(csv, lines)
with self.assertNumQueries(9):
call_command(self.command, f'--csv_path={csv.name}', '--batch_size=1')

def test_add_course_staff_with_not_default_batch_delay(self):
username, email = 'pam', '[email protected]'
username2, email2 = 'cam', '[email protected]'
lines = [f'{username},{email},{self.course_role},{self.course_id}\n',
f'{username2},{email2},{self.course_role},{self.course_id}\n']
with NamedTemporaryFile() as csv:
csv = self._write_test_csv(csv, lines)
call_command(self.command, f'--csv_path={csv.name}', '--batch_size=1', '--batch_delay=2')
self._assert_user_and_role(username, email, self.course_role, self.course_id)
self._assert_user_and_role(username2, email2, self.course_role, self.course_id)

def test_num_queries_correct(self):
"""
Assert the number of queries to be 5 + 1 * number of lines:
2 for savepoint/release savepoint, 1 to get existing usernames,
1 to bulk create users, 1 to bulk create course role
1 for each user (to get user)
"""
num_lines = 20
lines = [f'pam{i},pam{i}@pond.com,staff,course-v1:edx+test+f20\n' for i in range(num_lines)]
with NamedTemporaryFile() as csv:
csv = self._write_test_csv(csv, lines)
with self.assertNumQueries(5 + num_lines):
call_command(self.command, f'--csv_path={csv.name}')
Loading