Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: take exec ed course data from course run instead of additional_metadata attempt 3 #966

Merged
merged 1 commit into from
Oct 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions enterprise_catalog/apps/api/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from enterprise_catalog.apps.catalog.models import CatalogQuery, ContentMetadata
from enterprise_catalog.apps.catalog.serializers import (
DEFAULT_NORMALIZED_PRICE,
NormalizedContentMetadataSerializer,
_find_best_mode_seat,
)
from enterprise_catalog.apps.catalog.tests.factories import (
Expand Down Expand Up @@ -55,6 +56,37 @@ def mock_task(self, *args, **kwargs): # pylint: disable=unused-argument
mock_task.name = 'mock_task'


def _hydrate_normalized_metadata(metadata_record):
"""
Populate normalized_metadata fields for ContentMetadata
"""
normalized_metadata_input = {
'course_metadata': metadata_record.json_metadata,
}
metadata_record.json_metadata['normalized_metadata'] =\
NormalizedContentMetadataSerializer(normalized_metadata_input).data
metadata_record.json_metadata['normalized_metadata_by_run'] = {}
for run in metadata_record.json_metadata.get('course_runs', []):
metadata_record.json_metadata['normalized_metadata_by_run'].update({
run['key']: NormalizedContentMetadataSerializer({
'course_run_metadata': run,
'course_metadata': metadata_record.json_metadata,
}).data
})


def _hydrate_course_normalized_metadata():
"""
Populate normalized_metadata fields for all course ContentMetadata
Needed for tests that generate test ContentMetadata, which does not have
normalized_metadata populated by default.
"""
all_course_metadata = ContentMetadata.objects.filter(content_type=COURSE)
for course_metadata in all_course_metadata:
_hydrate_normalized_metadata(course_metadata)
course_metadata.save()


@ddt.ddt
class TestTaskResultFunctions(TestCase):
"""
Expand Down Expand Up @@ -827,6 +859,8 @@ def setUp(self):
self.course_run_metadata_unpublished.catalog_queries.set([course_run_catalog_query])
self.course_run_metadata_unpublished.save()

_hydrate_course_normalized_metadata()

def _set_up_factory_data_for_algolia(self):
expected_catalog_uuids = sorted([
str(self.enterprise_catalog_courses.uuid),
Expand Down Expand Up @@ -1026,6 +1060,7 @@ def test_index_algolia_program_common_uuids_only(self, mock_search_client):
test_course_1.save()
test_course_2.save()
test_course_3.save()
_hydrate_course_normalized_metadata()

actual_algolia_products_sent = []

Expand Down Expand Up @@ -1124,6 +1159,7 @@ def test_index_algolia_program_unindexable_content(self, mock_search_client):
test_course_1.save()
test_course_2.save()
test_course_3.save()
_hydrate_course_normalized_metadata()

actual_algolia_products_sent = []

Expand Down Expand Up @@ -2138,6 +2174,7 @@ def test_index_algolia_duplicate_content_uuids(self, mock_search_client):
)
course_run_for_duplicate = ContentMetadataFactory(content_type=COURSE_RUN, parent_content_key='duplicateX')
course_run_for_duplicate.catalog_queries.set([self.enterprise_catalog_course_runs.catalog_query])
_hydrate_course_normalized_metadata()

actual_algolia_products_sent_sequence = []

Expand Down
206 changes: 98 additions & 108 deletions enterprise_catalog/apps/api/v1/export_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,69 +170,102 @@ def program_hit_to_row(hit):
return csv_row


def course_hit_to_row(hit):
"""
Helper function to construct a CSV row according to a single Algolia result course hit.
"""
csv_row = []
csv_row.append(hit.get('title'))

if hit.get('partners'):
csv_row.append(hit['partners'][0]['name'])
else:
csv_row.append(None)
def _base_csv_row_data(hit):
""" Returns the formatted, shared attributes common across all course types. """
title = hit.get('title')
aggregation_key = hit.get('aggregation_key')
language = hit.get('language')
transcript_languages = ', '.join(hit.get('transcript_languages', []))
marketing_url = hit.get('marketing_url')
short_description = strip_tags(hit.get('short_description', ''))
subjects = ', '.join(hit.get('subjects', []))
skills = ', '.join([skill['name'] for skill in hit.get('skills', [])])
outcome = strip_tags(hit.get('outcome', '')) # What You’ll Learn

# FIXME: currently ignores partner names when a course has multiple partners
partner_name = hit['partners'][0]['name'] if hit.get('partners') else None

empty_advertised_course_run = {}
advertised_course_run = hit.get('advertised_course_run', empty_advertised_course_run)
advertised_course_run_key = advertised_course_run.get('key')
min_effort = advertised_course_run.get('min_effort')
max_effort = advertised_course_run.get('max_effort')
weeks_to_complete = advertised_course_run.get('weeks_to_complete') # Length

if start_date := advertised_course_run.get('start'):
start_date = parser.parse(start_date).strftime(DATE_FORMAT)
csv_row.append(start_date)

if end_date := advertised_course_run.get('end'):
end_date = parser.parse(end_date).strftime(DATE_FORMAT)
csv_row.append(end_date)

# upgrade_deadline deprecated in favor of enroll_by
if upgrade_deadline := advertised_course_run.get('upgrade_deadline'):
upgrade_deadline = datetime.datetime.fromtimestamp(upgrade_deadline).strftime(DATE_FORMAT)
csv_row.append(upgrade_deadline)

if enroll_by := advertised_course_run.get('enroll_by'):
enroll_by = datetime.datetime.fromtimestamp(enroll_by).strftime(DATE_FORMAT)
csv_row.append(enroll_by)

pacing_type = advertised_course_run.get('pacing_type')
key = advertised_course_run.get('key')

csv_row.append(', '.join(hit.get('programs', [])))
csv_row.append(', '.join(hit.get('program_titles', [])))

csv_row.append(pacing_type)

csv_row.append(hit.get('level_type'))

content_price = None
if content_price := advertised_course_run.get('content_price'):
content_price = math.trunc(float(content_price))
csv_row.append(content_price)
return {
'title': title,
'partner_name': partner_name,
'start_date': start_date,
'end_date': end_date,
'enroll_by': enroll_by,
'aggregation_key': aggregation_key,
'advertised_course_run_key': advertised_course_run_key,
'language': language,
'transcript_languages': transcript_languages,
'marketing_url': marketing_url,
'short_description': short_description,
'subjects': subjects,
'skills': skills,
'min_effort': min_effort,
'max_effort': max_effort,
'weeks_to_complete': weeks_to_complete,
'outcome': outcome,
'advertised_course_run': advertised_course_run,
'content_price': content_price
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🥳

}

csv_row.append(hit.get('language'))
csv_row.append(', '.join(hit.get('transcript_languages', [])))
csv_row.append(hit.get('marketing_url'))
csv_row.append(strip_tags(hit.get('short_description', '')))

csv_row.append(', '.join(hit.get('subjects', [])))
csv_row.append(key)
csv_row.append(hit.get('aggregation_key'))
def course_hit_to_row(hit):
"""
Helper function to construct a CSV row according to a single Algolia result course hit.
"""
row_data = _base_csv_row_data(hit)
csv_row = []
csv_row.append(row_data.get('title'))
csv_row.append(row_data.get('partner_name'))

skills = [skill['name'] for skill in hit.get('skills', [])]
csv_row.append(', '.join(skills))
advertised_course_run = row_data.get('advertised_course_run')

advertised_course_run = hit.get('advertised_course_run', {})
csv_row.append(advertised_course_run.get('min_effort'))
csv_row.append(advertised_course_run.get('max_effort'))
csv_row.append(advertised_course_run.get('weeks_to_complete')) # Length
csv_row.append(row_data.get('start_date'))
csv_row.append(row_data.get('end_date'))

csv_row.append(strip_tags(hit.get('outcome', ''))) # What You’ll Learn
# upgrade_deadline deprecated in favor of enroll_by
if upgrade_deadline := advertised_course_run.get('upgrade_deadline'):
upgrade_deadline = datetime.datetime.fromtimestamp(upgrade_deadline).strftime(DATE_FORMAT)
csv_row.append(upgrade_deadline)
csv_row.append(row_data.get('enroll_by'))
csv_row.append(', '.join(hit.get('programs', [])))
csv_row.append(', '.join(hit.get('program_titles', [])))

pacing_type = advertised_course_run.get('pacing_type')
csv_row.append(pacing_type)

csv_row.append(hit.get('level_type'))
csv_row.append(row_data.get('content_price'))
csv_row.append(row_data.get('language'))
csv_row.append(row_data.get('transcript_languages'))
csv_row.append(row_data.get('marketing_url'))
csv_row.append(row_data.get('short_description'))
csv_row.append(row_data.get('subjects'))
csv_row.append(row_data.get('advertised_course_run_key'))
csv_row.append(row_data.get('aggregation_key'))
csv_row.append(row_data.get('skills'))
csv_row.append(row_data.get('min_effort'))
csv_row.append(row_data.get('max_effort'))
csv_row.append(row_data.get('weeks_to_complete'))
csv_row.append(row_data.get('outcome'))

csv_row.append(strip_tags(hit.get('prerequisites_raw', ''))) # Pre-requisites

Expand All @@ -242,75 +275,32 @@ def course_hit_to_row(hit):
return csv_row


def fetch_and_format_registration_date(obj):
enroll_by_date = obj.get('registration_deadline')
stripped_enroll_by = enroll_by_date.split("T")[0]
formatted_enroll_by = None
try:
enroll_by_datetime_obj = datetime.datetime.strptime(stripped_enroll_by, '%Y-%m-%d')
formatted_enroll_by = enroll_by_datetime_obj.strftime('%m-%d-%Y')
except ValueError as exc:
logger.info(f"Unable to format registration deadline, failed with error: {exc}")
return formatted_enroll_by


def exec_ed_course_to_row(hit):
"""
Helper function to construct a CSV row according to a single executive education course hit.
"""
row_data = _base_csv_row_data(hit)
csv_row = []
csv_row.append(hit.get('title'))

if hit.get('partners'):
csv_row.append(hit['partners'][0]['name'])
else:
csv_row.append(None)
if hit.get('additional_metadata'):
start_date = None
additional_md = hit['additional_metadata']
if additional_md.get('start_date'):
start_date = parser.parse(additional_md['start_date']).strftime(DATE_FORMAT)
csv_row.append(start_date)

end_date = None
if additional_md.get('end_date'):
end_date = parser.parse(additional_md['end_date']).strftime(DATE_FORMAT)
csv_row.append(end_date)
formatted_enroll_by = fetch_and_format_registration_date(additional_md)
else:
csv_row.append(None) # no start date
csv_row.append(None) # no end date
formatted_enroll_by = None

csv_row.append(formatted_enroll_by)

adv_course_run = hit.get('advertised_course_run', {})
key = adv_course_run.get('key')

empty_advertised_course_run = {}
advertised_course_run = hit.get('advertised_course_run', empty_advertised_course_run)
if content_price := advertised_course_run.get('content_price'):
content_price = math.trunc(float(content_price))
csv_row.append(content_price)

csv_row.append(hit.get('language'))
csv_row.append(', '.join(hit.get('transcript_languages', [])))
csv_row.append(hit.get('marketing_url'))
csv_row.append(strip_tags(hit.get('short_description', '')))

csv_row.append(', '.join(hit.get('subjects', [])))
csv_row.append(key)
csv_row.append(hit.get('aggregation_key'))

skills = [skill['name'] for skill in hit.get('skills', [])]
csv_row.append(', '.join(skills))

csv_row.append(adv_course_run.get('min_effort'))
csv_row.append(adv_course_run.get('max_effort'))
csv_row.append(adv_course_run.get('weeks_to_complete')) # Length

csv_row.append(strip_tags(hit.get('outcome', ''))) # What You’ll Learn

csv_row.append(row_data.get('title'))
csv_row.append(row_data.get('partners'))

csv_row.append(row_data.get('start_date'))
csv_row.append(row_data.get('end_date'))
csv_row.append(row_data.get('enroll_by'))

csv_row.append(row_data.get('content_price'))
csv_row.append(row_data.get('language'))
csv_row.append(row_data.get('transcript_languages'))
csv_row.append(row_data.get('marketing_url'))
csv_row.append(row_data.get('short_description'))
csv_row.append(row_data.get('subjects'))
csv_row.append(row_data.get('advertised_course_run_key'))
csv_row.append(row_data.get('aggregation_key'))
csv_row.append(row_data.get('skills'))
csv_row.append(row_data.get('min_effort'))
csv_row.append(row_data.get('max_effort'))
csv_row.append(row_data.get('weeks_to_complete'))
csv_row.append(row_data.get('outcome'))
csv_row.append(strip_tags(hit.get('full_description', '')))

return csv_row
Expand Down
13 changes: 0 additions & 13 deletions enterprise_catalog/apps/api/v1/tests/test_export_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,3 @@ def test_retrieve_available_fields(self):
"""
# assert that ALGOLIA_ATTRIBUTES_TO_RETRIEVE is a SUBSET of ALGOLIA_FIELDS
assert set(export_utils.ALGOLIA_ATTRIBUTES_TO_RETRIEVE) <= set(algolia_utils.ALGOLIA_FIELDS)

def test_fetch_and_format_registration_date(self):
"""
Test the export properly fetches executive education registration dates
"""
# expected hit format from algolia, porperly reformatted for csv download
assert export_utils.fetch_and_format_registration_date(
{'registration_deadline': '2002-02-15T12:12:200'}
) == '02-15-2002'
# some other format from algolia, should return None
assert export_utils.fetch_and_format_registration_date(
{'registration_deadline': '02-15-2015T12:12:200'}
) is None
1 change: 1 addition & 0 deletions enterprise_catalog/apps/api/v1/tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -958,6 +958,7 @@ class EnterpriseCatalogWorkbookViewTests(APITestMixin):
"weeks_to_complete": 6,
"upgrade_deadline": 32503680000.0,
"enroll_by": 32503680000.0,
"content_price": 2843.00
},
"course_runs": [

Expand Down
21 changes: 6 additions & 15 deletions enterprise_catalog/apps/catalog/algolia_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ def course_run_not_active_checker():
return not is_course_run_active(advertised_course_run)

def deadline_passed_checker():
return _has_enroll_by_deadline_passed(course_json_metadata, advertised_course_run)
return _has_enroll_by_deadline_passed(course_json_metadata)

for should_not_index_function, log_message in (
(no_advertised_course_run_checker, 'no advertised course run'),
Expand All @@ -243,22 +243,13 @@ def deadline_passed_checker():
return True


def _has_enroll_by_deadline_passed(course_json_metadata, advertised_course_run):
def _has_enroll_by_deadline_passed(course_json_metadata):
"""
Helper to determine if the enrollment deadline has passed for the given course
and advertised course run. For course metadata records with a `course_type` of "course" (e.g. OCM courses),
this is based on the verified upgrade deadline.
For 2u exec ed courses, this is based on the registration deadline.
"""
enroll_by_deadline_timestamp = 0
if course_json_metadata.get('course_type') == EXEC_ED_2U_COURSE_TYPE:
additional_metadata = course_json_metadata.get('additional_metadata') or {}
registration_deadline = additional_metadata.get('registration_deadline')
if registration_deadline:
enroll_by_deadline_timestamp = parse_datetime(registration_deadline).timestamp()
else:
enroll_by_deadline_timestamp = _get_verified_upgrade_deadline(advertised_course_run)

based on normalized_metadata's enroll_by_date
"""
enroll_by_deadline = course_json_metadata.get('normalized_metadata')['enroll_by_date']
enroll_by_deadline_timestamp = parse_datetime(enroll_by_deadline).timestamp()
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return enroll_by_deadline_timestamp < localized_utcnow().timestamp()


Expand Down
Loading
Loading