Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: programs ONLY inherit UUIDs for catalogs common to ALL content within. #688

Merged
merged 1 commit into from
Oct 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 37 additions & 8 deletions enterprise_catalog/apps/api/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from celery import shared_task, states
from celery.exceptions import Ignore
from celery_utils.logged_task import LoggedTask
from django.conf import settings
from django.db import IntegrityError
from django.db.models import Prefetch, Q
from django.db.utils import OperationalError
Expand Down Expand Up @@ -676,6 +677,7 @@ def add_metadata_to_algolia_objects(
_add_in_algolia_products_by_object_id(algolia_products_by_object_id, batched_metadata)


# pylint: disable=too-many-statements
def _get_algolia_products_for_batch(
batch_num,
content_keys_batch,
Expand Down Expand Up @@ -727,6 +729,9 @@ def _get_algolia_products_for_batch(
customer_uuids_by_key = defaultdict(set)
catalog_queries_by_key = defaultdict(set)

catalog_query_uuid_by_catalog_uuid = defaultdict(set)
customer_uuid_by_catalog_uuid = defaultdict(set)

# Create a shared convenience queryset to prefetch catalogs for all metadata lookups below.
all_catalog_queries = CatalogQuery.objects.prefetch_related('enterprise_catalogs')

Expand Down Expand Up @@ -789,18 +794,42 @@ def _get_algolia_products_for_batch(
for catalog in associated_catalogs:
catalog_uuids_by_key[content_key].add(str(catalog.uuid))
customer_uuids_by_key[content_key].add(str(catalog.enterprise_uuid))
# Cache UUIDs related to each catalog.
catalog_query_uuid_by_catalog_uuid[str(catalog.uuid)] = (str(catalog_query.uuid), catalog_query.title)
customer_uuid_by_catalog_uuid[str(catalog.uuid)] = str(catalog.enterprise_uuid)

# Second pass. This time the goal is to capture indirect relationships on programs:
# * For each program:
# - Absorb all UUIDs associated with every associated course.
for metadata in content_metadata_to_process:
if metadata.content_type != PROGRAM:
continue
program_content_key = metadata.content_key
for metadata in program_to_courses_mapping[program_content_key]:
catalog_queries_by_key[program_content_key].update(catalog_queries_by_key[metadata.content_key])
catalog_uuids_by_key[program_content_key].update(catalog_uuids_by_key[metadata.content_key])
customer_uuids_by_key[program_content_key].update(customer_uuids_by_key[metadata.content_key])
if settings.ENABLE_ENT_7729_ONLY_SHOW_COMPLETE_PROGRAMS:
for program_metadata in content_metadata_to_process:
if program_metadata.content_type != PROGRAM:
continue
program_content_key = program_metadata.content_key
catalog_uuids_for_courses_of_program = [
catalog_uuids_by_key[course_metadata.content_key]
for course_metadata in program_to_courses_mapping[program_content_key]
]
common_catalogs = set()
if catalog_uuids_for_courses_of_program:
common_catalogs = set.intersection(*catalog_uuids_for_courses_of_program)
for course_metadata in program_to_courses_mapping[program_content_key]:
catalog_queries_by_key[program_content_key].update(
catalog_query_uuid_by_catalog_uuid[catalog_uuid] for catalog_uuid in common_catalogs
)
catalog_uuids_by_key[program_content_key].update(common_catalogs)
customer_uuids_by_key[program_content_key].update(
customer_uuid_by_catalog_uuid[catalog_uuid] for catalog_uuid in common_catalogs
)
else: # Old deprecated code in this else block. Remove as part of ENT-7729.
for metadata in content_metadata_to_process:
if metadata.content_type != PROGRAM:
continue
program_content_key = metadata.content_key
for metadata in program_to_courses_mapping[program_content_key]:
catalog_queries_by_key[program_content_key].update(catalog_queries_by_key[metadata.content_key])
catalog_uuids_by_key[program_content_key].update(catalog_uuids_by_key[metadata.content_key])
customer_uuids_by_key[program_content_key].update(customer_uuids_by_key[metadata.content_key])

# Third pass. This time the goal is to capture indirect relationships on pathways:
# * For each pathway:
Expand Down
182 changes: 182 additions & 0 deletions enterprise_catalog/apps/api/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,188 @@ def _set_up_factory_data_for_algolia(self):
'course_metadata_unpublished': self.course_metadata_unpublished,
}

@mock.patch('django.conf.settings.ENABLE_ENT_7729_ONLY_SHOW_COMPLETE_PROGRAMS', True)
@mock.patch('enterprise_catalog.apps.api.tasks.get_initialized_algolia_client', return_value=mock.MagicMock())
def test_index_algolia_partial_program(self, mock_search_client):
"""
Assert that when a program contains multiple courses, that program only inherits the UUIDs common to all
contained courses.

This DAG represents the complete test environment:
┌────────────┐┌────────────┐┌────────────┐
│*course-1 ││*course-2 ││*course-3 │
│------------││------------││------------│
│in catalog-1││ ││ │
│in catalog-2││in catalog-2││ │
│in catalog-3││in catalog-3││in catalog-3│
│ ││in catalog-4││in catalog-4│
│ ││ ││in catalog-5│
└┬───────────┘└┬───────────┘└┬───────────┘
┌▽─────────────▽─────────────▽───────────┐
│*program-1 │
│----------------------------------------│
│(should inherit catalog-3 only) │
└────────────────────────────────────────┘
* = indexable
Comment on lines +729 to +744
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

amazing

"""
program_1 = ContentMetadataFactory(content_type=PROGRAM, content_key='program-1')
course_1 = ContentMetadataFactory(content_type=COURSE, content_key='test-course-1')
course_2 = ContentMetadataFactory(content_type=COURSE, content_key='test-course-2')
course_3 = ContentMetadataFactory(content_type=COURSE, content_key='test-course-3')

# Associate all three courses with the program.
course_1.associated_content_metadata.set([program_1])
course_2.associated_content_metadata.set([program_1])
course_3.associated_content_metadata.set([program_1])

# Create all 5 test catalogs.
catalog_queries = [CatalogQueryFactory(uuid=uuid.uuid4()) for _ in range(5)]
catalogs = [
EnterpriseCatalogFactory(catalog_query=query)
for query in catalog_queries
]

# Associate the 5 catalogs to the 3 courses in a staggering fashion.
course_1.catalog_queries.set(catalog_queries[0:3])
course_2.catalog_queries.set(catalog_queries[1:4])
course_3.catalog_queries.set(catalog_queries[2:5])

course_1.save()
course_2.save()
course_3.save()

actual_algolia_products_sent = []

# `replace_all_objects` is swapped out for a mock implementation that forces generator evaluation and saves the
# result into `actual_algolia_products_sent` for unit testing.
def mock_replace_all_objects(products_iterable):
nonlocal actual_algolia_products_sent
actual_algolia_products_sent = list(products_iterable)
mock_search_client().replace_all_objects.side_effect = mock_replace_all_objects

with mock.patch('enterprise_catalog.apps.api.tasks.ALGOLIA_FIELDS', self.ALGOLIA_FIELDS):
with self.assertLogs(level='INFO') as info_logs:
tasks.index_enterprise_catalog_in_algolia_task() # pylint: disable=no-value-for-parameter

products_found_log_records = [record for record in info_logs.output if ' products found.' in record]
assert ' 15 products found.' in products_found_log_records[0]

# create expected data to be added/updated in the Algolia index.
expected_program_1_objects_to_index = []
program_uuid = program_1.json_metadata.get('uuid')
expected_program_1_objects_to_index.append({
'objectID': f'program-{program_uuid}-catalog-uuids-0',
'enterprise_catalog_uuids': [str(catalogs[2].uuid)],
})
expected_program_1_objects_to_index.append({
'objectID': f'program-{program_uuid}-customer-uuids-0',
'enterprise_customer_uuids': [str(catalogs[2].enterprise_uuid)],
})
expected_program_1_objects_to_index.append({
'objectID': f'program-{program_uuid}-catalog-query-uuids-0',
'enterprise_catalog_query_uuids': [str(catalog_queries[2].uuid)],
'enterprise_catalog_query_titles': [catalog_queries[2].title],
})

# verify replace_all_objects is called with the correct Algolia object data.
expected_program_call_args = sorted(expected_program_1_objects_to_index, key=itemgetter('objectID'))
actual_program_call_args = sorted(
[product for product in actual_algolia_products_sent if program_uuid in product['objectID']],
key=itemgetter('objectID'),
)
assert expected_program_call_args == actual_program_call_args

@mock.patch('django.conf.settings.ENABLE_ENT_7729_ONLY_SHOW_COMPLETE_PROGRAMS', False)
@mock.patch('enterprise_catalog.apps.api.tasks.get_initialized_algolia_client', return_value=mock.MagicMock())
def test_index_algolia_partial_program_disabled(self, mock_search_client):
"""
Assert that when a program contains multiple courses, that program inherits all the UUIDs from contained
courses. This is the old behavior prior to ENT-7729. Remove this unit test as part of that ticket.

This DAG represents the complete test environment:
┌────────────┐┌────────────┐┌────────────┐
│*course-1 ││*course-2 ││*course-3 │
│------------││------------││------------│
│in catalog-1││ ││ │
│in catalog-2││in catalog-2││ │
│in catalog-3││in catalog-3││in catalog-3│
│ ││in catalog-4││in catalog-4│
│ ││ ││in catalog-5│
└┬───────────┘└┬───────────┘└┬───────────┘
┌▽─────────────▽─────────────▽───────────┐
│*program-1 │
│----------------------------------------│
│(should inherit all catalogs) │
└────────────────────────────────────────┘
* = indexable
"""
program_1 = ContentMetadataFactory(content_type=PROGRAM, content_key='program-1')
course_1 = ContentMetadataFactory(content_type=COURSE, content_key='test-course-1')
course_2 = ContentMetadataFactory(content_type=COURSE, content_key='test-course-2')
course_3 = ContentMetadataFactory(content_type=COURSE, content_key='test-course-3')

# Associate all three courses with the program.
course_1.associated_content_metadata.set([program_1])
course_2.associated_content_metadata.set([program_1])
course_3.associated_content_metadata.set([program_1])

# Create all 5 test catalogs.
catalog_queries = [CatalogQueryFactory(uuid=uuid.uuid4()) for _ in range(5)]
catalogs = [
EnterpriseCatalogFactory(catalog_query=query)
for query in catalog_queries
]

# Associate the 5 catalogs to the 3 courses in a staggering fashion.
course_1.catalog_queries.set(catalog_queries[0:3])
course_2.catalog_queries.set(catalog_queries[1:4])
course_3.catalog_queries.set(catalog_queries[2:5])

course_1.save()
course_2.save()
course_3.save()

actual_algolia_products_sent = []

# `replace_all_objects` is swapped out for a mock implementation that forces generator evaluation and saves the
# result into `actual_algolia_products_sent` for unit testing.
def mock_replace_all_objects(products_iterable):
nonlocal actual_algolia_products_sent
actual_algolia_products_sent = list(products_iterable)
mock_search_client().replace_all_objects.side_effect = mock_replace_all_objects

with mock.patch('enterprise_catalog.apps.api.tasks.ALGOLIA_FIELDS', self.ALGOLIA_FIELDS):
with self.assertLogs(level='INFO') as info_logs:
tasks.index_enterprise_catalog_in_algolia_task() # pylint: disable=no-value-for-parameter

products_found_log_records = [record for record in info_logs.output if ' products found.' in record]
assert ' 15 products found.' in products_found_log_records[0]

# create expected data to be added/updated in the Algolia index.
expected_program_1_objects_to_index = []
program_uuid = program_1.json_metadata.get('uuid')
expected_program_1_objects_to_index.append({
'objectID': f'program-{program_uuid}-catalog-uuids-0',
'enterprise_catalog_uuids': sorted([str(catalog.uuid) for catalog in catalogs]),
})
expected_program_1_objects_to_index.append({
'objectID': f'program-{program_uuid}-customer-uuids-0',
'enterprise_customer_uuids': sorted([str(catalog.enterprise_uuid) for catalog in catalogs]),
})
expected_program_1_objects_to_index.append({
'objectID': f'program-{program_uuid}-catalog-query-uuids-0',
'enterprise_catalog_query_uuids': sorted([str(catalog_query.uuid) for catalog_query in catalog_queries]),
'enterprise_catalog_query_titles': sorted([catalog_query.title for catalog_query in catalog_queries]),
})

# verify replace_all_objects is called with the correct Algolia object data.
expected_program_call_args = sorted(expected_program_1_objects_to_index, key=itemgetter('objectID'))
actual_program_call_args = sorted(
[product for product in actual_algolia_products_sent if program_uuid in product['objectID']],
key=itemgetter('objectID'),
)
assert expected_program_call_args == actual_program_call_args

def test_index_content_keys_in_algolia(self):
"""
Test the _index_content_keys_in_algolia helper function to make sure it creates a generator to support batching
Expand Down
7 changes: 7 additions & 0 deletions enterprise_catalog/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,3 +429,10 @@
'VERSION': '1.0.0',
'SERVE_INCLUDE_SCHEMA': False,
}

# (ENT-7729) When indexing programs in Algolia, only attach catalog query/catalog/customer UUIDs common to all content
# within the program. This should have the outcome of only showing completely accessible programs in the catalog search
# page: https://enterprise.edx.org/<customer>/search
#
# Enable this on stage first.
ENABLE_ENT_7729_ONLY_SHOW_COMPLETE_PROGRAMS = False
Loading