diff --git a/openedx/core/djangoapps/content/search/api.py b/openedx/core/djangoapps/content/search/api.py new file mode 100644 index 000000000000..7962f00f95d6 --- /dev/null +++ b/openedx/core/djangoapps/content/search/api.py @@ -0,0 +1,455 @@ +""" +Content index and search API using Meilisearch +""" +from __future__ import annotations + +import logging +import time +from contextlib import contextmanager +from datetime import datetime, timedelta, timezone +from functools import wraps +from typing import Callable, Generator + +from django.conf import settings +from django.contrib.auth import get_user_model +from django.core.cache import cache +from meilisearch import Client as MeilisearchClient +from meilisearch.errors import MeilisearchError +from meilisearch.models.task import TaskInfo +from opaque_keys.edx.keys import UsageKey + +from openedx.core.djangoapps.content.search.documents import ( + STUDIO_INDEX_NAME, + Fields, + meili_id_from_opaque_key, + searchable_doc_for_course_block, + searchable_doc_for_library_block +) +from openedx.core.djangoapps.content_libraries import api as lib_api +from xmodule.modulestore import ModuleStoreEnum +from xmodule.modulestore.django import modulestore + +from .documents import Fields, searchable_doc_for_course_block, searchable_doc_for_library_block + +log = logging.getLogger(__name__) + +User = get_user_model() + +STUDIO_INDEX_NAME = "studio_content" + +if hasattr(settings, "MEILISEARCH_INDEX_PREFIX"): + INDEX_NAME = settings.MEILISEARCH_INDEX_PREFIX + STUDIO_INDEX_NAME +else: + INDEX_NAME = STUDIO_INDEX_NAME + + +_MEILI_CLIENT = None +_MEILI_API_KEY_UID = None + +LOCK_EXPIRE = 5 * 60 # Lock expires in 5 minutes + + +@contextmanager +def _index_rebuild_lock() -> Generator[str, None, None]: + """ + Lock to prevent that more than one rebuild is running at the same time + """ + timeout_at = time.monotonic() + LOCK_EXPIRE + lock_id = f"lock-meilisearch-index-{INDEX_NAME}" + new_index_name = INDEX_NAME + "_new" + + while True: + status = cache.add(lock_id, new_index_name, LOCK_EXPIRE) + if status: + # Lock acquired + try: + yield new_index_name + break + finally: + # Release the lock + cache.delete(lock_id) + + if time.monotonic() > timeout_at: + raise TimeoutError("Timeout acquiring lock") + + time.sleep(1) + + +def _get_running_rebuild_index_name() -> str | None: + lock_id = f"lock-meilisearch-index-{INDEX_NAME}" + + return cache.get(lock_id) + + +def _get_meilisearch_client(): + """ + Get the Meiliesearch client + """ + global _MEILI_CLIENT # pylint: disable=global-statement + + # Connect to Meilisearch + if not is_meilisearch_enabled(): + raise RuntimeError("MEILISEARCH_ENABLED is not set - search functionality disabled.") + + if _MEILI_CLIENT is not None: + return _MEILI_CLIENT + + _MEILI_CLIENT = MeilisearchClient(settings.MEILISEARCH_URL, settings.MEILISEARCH_API_KEY) + try: + _MEILI_CLIENT.health() + except MeilisearchError as err: + _MEILI_CLIENT = None + raise ConnectionError("Unable to connect to Meilisearch") from err + return _MEILI_CLIENT + + +def clear_meilisearch_client(): + global _MEILI_CLIENT # pylint: disable=global-statement + + _MEILI_CLIENT = None + + +def _get_meili_api_key_uid(): + """ + Helper method to get the UID of the API key we're using for Meilisearch + """ + global _MEILI_API_KEY_UID # pylint: disable=global-statement + + if _MEILI_API_KEY_UID is not None: + return _MEILI_API_KEY_UID + + _MEILI_API_KEY_UID = _get_meilisearch_client().get_key(settings.MEILISEARCH_API_KEY).uid + + +def _wait_for_meili_task(info: TaskInfo) -> None: + """ + Simple helper method to wait for a Meilisearch task to complete + """ + client = _get_meilisearch_client() + current_status = client.get_task(info.task_uid) + while current_status.status in ("enqueued", "processing"): + time.sleep(1) + current_status = client.get_task(info.task_uid) + if current_status.status != "succeeded": + try: + err_reason = current_status.error['message'] + except (TypeError, KeyError): + err_reason = "Unknown error" + raise MeilisearchError(err_reason) + + +def _wait_for_meili_tasks(info_list: list[TaskInfo]) -> None: + """ + Simple helper method to wait for multiple Meilisearch tasks to complete + """ + while info_list: + info = info_list.pop() + _wait_for_meili_task(info) + + +def _index_exists(index_name: str) -> bool: + """ + Check if an index exists + """ + client = _get_meilisearch_client() + try: + client.get_index(index_name) + except MeilisearchError as err: + if err.code == "index_not_found": + return False + else: + raise err + return True + + +@contextmanager +def _using_temp_index(status_cb: Callable[[str], None] | None = None) -> Generator[str, None, None]: + """ + Create a new temporary Meilisearch index, populate it, then swap it to + become the active index. + """ + def nop(_): # pragma: no cover + pass + + if status_cb is None: + status_cb = nop + + client = _get_meilisearch_client() + status_cb("Checking index...") + with _index_rebuild_lock() as temp_index_name: + if _index_exists(temp_index_name): + status_cb("Temporary index already exists. Deleting it...") + _wait_for_meili_task(client.delete_index(temp_index_name)) + + status_cb("Creating new index...") + _wait_for_meili_task( + client.create_index(temp_index_name, {'primaryKey': 'id'}) + ) + new_index_created = client.get_index(temp_index_name).created_at + + yield temp_index_name + + if not _index_exists(INDEX_NAME): + # We have to create the "target" index before we can successfully swap the new one into it: + status_cb("Preparing to swap into index (first time)...") + _wait_for_meili_task(client.create_index(INDEX_NAME)) + status_cb("Swapping index...") + client.swap_indexes([{'indexes': [temp_index_name, INDEX_NAME]}]) + # If we're using an API key that's restricted to certain index prefix(es), we won't be able to get the status + # of this request unfortunately. https://github.com/meilisearch/meilisearch/issues/4103 + while True: + time.sleep(1) + if client.get_index(INDEX_NAME).created_at != new_index_created: + status_cb("Waiting for swap completion...") + else: + break + status_cb("Deleting old index...") + _wait_for_meili_task(client.delete_index(temp_index_name)) + + +def _recurse_children(block, fn, status_cb: Callable[[str], None] | None = None) -> None: + """ + Recurse the children of an XBlock and call the given function for each + + The main purpose of this is just to wrap the loading of each child in + try...except. Otherwise block.get_children() would do what we need. + """ + if block.has_children: + for child_id in block.children: + try: + child = block.get_child(child_id) + except Exception as err: # pylint: disable=broad-except + log.exception(err) + if status_cb is not None: + status_cb(f"Unable to load block {child_id}") + else: + fn(child) + + +def only_if_meilisearch_enabled(f): + """ + Only call `f` if meilisearch is enabled + """ + @wraps(f) + def wrapper(*args, **kwargs): + """Wraps the decorated function.""" + if is_meilisearch_enabled(): + return f(*args, **kwargs) + return wrapper + + +def is_meilisearch_enabled() -> bool: + """ + Returns whether Meilisearch is enabled + """ + if hasattr(settings, "MEILISEARCH_INDEX_PREFIX"): + return settings.MEILISEARCH_ENABLED + + return False + + +def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None: + """ + Rebuild the Meilisearch index from scratch + """ + def nop(_message): + pass + + if status_cb is None: + status_cb = nop + + client = _get_meilisearch_client() + store = modulestore() + + # Get the lists of libraries + status_cb("Counting libraries...") + lib_keys = [lib.library_key for lib in lib_api.ContentLibrary.objects.select_related('org').only('org', 'slug')] + num_libraries = len(lib_keys) + + # Get the list of courses + status_cb("Counting courses...") + with store.branch_setting(ModuleStoreEnum.Branch.draft_preferred): + all_courses = store.get_courses() + num_courses = len(all_courses) + + # Some counters so we can track our progress as indexing progresses: + num_contexts = num_courses + num_libraries + num_contexts_done = 0 # How many courses/libraries we've indexed + num_blocks_done = 0 # How many individual components/XBlocks we've indexed + + status_cb(f"Found {num_courses} courses and {num_libraries} libraries.") + with _using_temp_index(status_cb) as temp_index_name: + ############## Configure the index ############## + + # The following index settings are best changed on an empty index. + # Changing them on a populated index will "re-index all documents in the index, which can take some time" + # and use more RAM. Instead, we configure an empty index then populate it one course/library at a time. + + # Mark usage_key as unique (it's not the primary key for the index, but nevertheless must be unique): + client.index(temp_index_name).update_distinct_attribute(Fields.usage_key) + # Mark which attributes can be used for filtering/faceted search: + client.index(temp_index_name).update_filterable_attributes([ + Fields.block_type, + Fields.context_key, + Fields.org, + Fields.tags, + Fields.type, + ]) + + ############## Libraries ############## + status_cb("Indexing libraries...") + for lib_key in lib_keys: + status_cb(f"{num_contexts_done + 1}/{num_contexts}. Now indexing library {lib_key}") + docs = [] + for component in lib_api.get_library_components(lib_key): + metadata = lib_api.LibraryXBlockMetadata.from_component(lib_key, component) + doc = searchable_doc_for_library_block(metadata) + docs.append(doc) + num_blocks_done += 1 + if docs: + # Add all the docs in this library at once (usually faster than adding one at a time): + _wait_for_meili_task(client.index(temp_index_name).add_documents(docs)) + num_contexts_done += 1 + + ############## Courses ############## + status_cb("Indexing courses...") + for course in all_courses: + status_cb( + f"{num_contexts_done + 1}/{num_contexts}. Now indexing course {course.display_name} ({course.id})" + ) + docs = [] + + # Pre-fetch the course with all of its children: + course = store.get_course(course.id, depth=None) + + def add_with_children(block): + """ Recursively index the given XBlock/component """ + doc = searchable_doc_for_course_block(block) + docs.append(doc) # pylint: disable=cell-var-from-loop + _recurse_children(block, add_with_children) # pylint: disable=cell-var-from-loop + + _recurse_children(course, add_with_children) + + if docs: + # Add all the docs in this course at once (usually faster than adding one at a time): + _wait_for_meili_task(client.index(temp_index_name).add_documents(docs)) + num_contexts_done += 1 + num_blocks_done += len(docs) + + status_cb(f"Done! {num_blocks_done} blocks indexed across {num_contexts_done} courses and libraries.") + + +def upsert_xblock_index_doc( + usage_key: UsageKey, recursive: bool = True, update_metadata: bool = True, update_tags: bool = True +) -> None: + """ + Creates or updates the document for the given XBlock in the search index + + + Args: + usage_key (UsageKey): The usage key of the XBlock to index + recursive (bool): If True, also index all children of the XBlock + update_metadata (bool): If True, update the metadata of the XBlock + update_tags (bool): If True, update the tags of the XBlock + """ + current_rebuild_index_name = _get_running_rebuild_index_name() + + xblock = modulestore().get_item(usage_key) + client = _get_meilisearch_client() + + docs = [] + + def add_with_children(block): + """ Recursively index the given XBlock/component """ + doc = searchable_doc_for_course_block(block, include_metadata=update_metadata, include_tags=update_tags) + docs.append(doc) + if recursive: + _recurse_children(block, add_with_children) + + add_with_children(xblock) + + tasks = [] + if current_rebuild_index_name: + # If there is a rebuild in progress, the document will also be added to the new index. + tasks.append(client.index(current_rebuild_index_name).update_documents(docs)) + tasks.append(client.index(INDEX_NAME).update_documents(docs)) + + _wait_for_meili_tasks(tasks) + + +def delete_index_doc(usage_key: UsageKey) -> None: + """ + Deletes the document for the given XBlock from the search index + + Args: + usage_key (UsageKey): The usage key of the XBlock to be removed from the index + """ + current_rebuild_index_name = _get_running_rebuild_index_name() + + client = _get_meilisearch_client() + + tasks = [] + if current_rebuild_index_name: + # If there is a rebuild in progress, the document will also be added to the new index. + tasks.append(client.index(current_rebuild_index_name).delete_document(meili_id_from_opaque_key(usage_key))) + tasks.append(client.index(INDEX_NAME).delete_document(meili_id_from_opaque_key(usage_key))) + + _wait_for_meili_tasks(tasks) + + +def upsert_library_block_index_doc( + usage_key: UsageKey, update_metadata: bool = True, update_tags: bool = True +) -> None: + """ + Creates or updates the document for the given Library Block in the search index + + + Args: + usage_key (UsageKey): The usage key of the Library Block to index + update_metadata (bool): If True, update the metadata of the Library Block + update_tags (bool): If True, update the tags of the Library Block + """ + current_rebuild_index_name = _get_running_rebuild_index_name() + + library_block = lib_api.get_component_from_usage_key(usage_key) + library_block_metadata = lib_api.LibraryXBlockMetadata.from_component(usage_key.context_key, library_block) + client = _get_meilisearch_client() + + docs = [ + searchable_doc_for_library_block( + library_block_metadata, include_metadata=update_metadata, include_tags=update_tags + ) + ] + + tasks = [] + if current_rebuild_index_name: + # If there is a rebuild in progress, the document will also be added to the new index. + tasks.append(client.index(current_rebuild_index_name).update_documents(docs)) + tasks.append(client.index(INDEX_NAME).update_documents(docs)) + + _wait_for_meili_tasks(tasks) + +def generate_user_token(user): + """ + Returns a Meilisearch API key that only allows the user to search content that they have permission to view + """ + expires_at = datetime.now(tz=timezone.utc) + timedelta(days=7) + search_rules = { + INDEX_NAME: { + # TODO: Apply filters here based on the user's permissions, so they can only search for content + # that they have permission to view. Example: + # 'filter': 'org = BradenX' + } + } + # Note: the following is just generating a JWT. It doesn't actually make an API call to Meilisearch. + restricted_api_key = _get_meilisearch_client().generate_tenant_token( + api_key_uid=_get_meili_api_key_uid(), + search_rules=search_rules, + expires_at=expires_at, + ) + + return { + "url": settings.MEILISEARCH_PUBLIC_URL, + "index_name": INDEX_NAME, + "api_key": restricted_api_key, + } diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py index fc1388f82455..f42bd5db9965 100644 --- a/openedx/core/djangoapps/content/search/documents.py +++ b/openedx/core/djangoapps/content/search/documents.py @@ -6,7 +6,7 @@ import logging from django.utils.text import slugify -from opaque_keys.edx.keys import UsageKey, LearningContextKey +from opaque_keys.edx.keys import LearningContextKey, UsageKey from openedx.core.djangoapps.content_libraries import api as lib_api from openedx.core.djangoapps.content_tagging import api as tagging_api @@ -62,7 +62,7 @@ class DocType: library_block = "library_block" -def _meili_id_from_opaque_key(usage_key: UsageKey) -> str: +def meili_id_from_opaque_key(usage_key: UsageKey) -> str: """ Meilisearch requires each document to have a primary key that's either an integer or a string composed of alphanumeric characters (a-z A-Z 0-9), @@ -88,7 +88,6 @@ class implementation returns only: {"content": {"display_name": "..."}, "content_type": "..."} """ block_data = { - Fields.id: _meili_id_from_opaque_key(block.usage_key), Fields.usage_key: str(block.usage_key), Fields.block_id: str(block.usage_key.block_id), Fields.display_name: xblock_api.get_block_display_name(block), @@ -196,33 +195,58 @@ def _tags_for_content_object(object_id: UsageKey | LearningContextKey) -> dict: return {Fields.tags: result} -def searchable_doc_for_library_block(metadata: lib_api.LibraryXBlockMetadata) -> dict: +def searchable_doc_for_library_block( + xblock_metadata: lib_api.LibraryXBlockMetadata, include_metadata: bool = True, include_tags: bool = True +) -> dict: """ Generate a dictionary document suitable for ingestion into a search engine like Meilisearch or Elasticsearch, so that the given library block can be found using faceted search. + + Args: + xblock_metadata: The XBlock component metadata to index + include_metadata: If True, include the block's metadata in the doc + include_tags: If True, include the block's tags in the doc """ - library_name = lib_api.get_library(metadata.usage_key.context_key).title - doc = {} - try: - block = xblock_api.load_block(metadata.usage_key, user=None) - except Exception as err: # pylint: disable=broad-except - log.exception(f"Failed to load XBlock {metadata.usage_key}: {err}") - doc.update(_fields_from_block(block)) - doc.update(_tags_for_content_object(metadata.usage_key)) - doc[Fields.type] = DocType.library_block - # Add the breadcrumbs. In v2 libraries, the library itself is not a "parent" of the XBlocks so we add it here: - doc[Fields.breadcrumbs] = [{"display_name": library_name}] + library_name = lib_api.get_library(xblock_metadata.usage_key.context_key).title + block = xblock_api.load_block(xblock_metadata.usage_key, user=None) + + doc = { + Fields.id: meili_id_from_opaque_key(xblock_metadata.usage_key), + Fields.type: DocType.library_block, + } + + if include_metadata: + doc.update(_fields_from_block(block)) + # Add the breadcrumbs. In v2 libraries, the library itself is not a "parent" of the XBlocks so we add it here: + doc[Fields.breadcrumbs] = [{"display_name": library_name}] + + if include_tags: + doc.update(_tags_for_content_object(xblock_metadata.usage_key)) + return doc -def searchable_doc_for_course_block(block) -> dict: +def searchable_doc_for_course_block(block, include_metadata: bool = True, include_tags: bool = True) -> dict: """ Generate a dictionary document suitable for ingestion into a search engine like Meilisearch or Elasticsearch, so that the given course block can be found using faceted search. + + Args: + block: The XBlock instance to index + include_metadata: If True, include the block's metadata in the doc + include_tags: If True, include the block's tags in the doc """ - doc = _fields_from_block(block) - doc.update(_tags_for_content_object(block.usage_key)) - doc[Fields.type] = DocType.course_block + doc = { + Fields.id: meili_id_from_opaque_key(block.usage_key), + Fields.type: DocType.course_block, + } + + if include_metadata: + doc.update(_fields_from_block(block)) + + if include_tags: + doc.update(_tags_for_content_object(block.usage_key)) + return doc diff --git a/openedx/core/djangoapps/content/search/handlers.py b/openedx/core/djangoapps/content/search/handlers.py index e69de29bb2d1..db13a09bc6c0 100644 --- a/openedx/core/djangoapps/content/search/handlers.py +++ b/openedx/core/djangoapps/content/search/handlers.py @@ -0,0 +1,106 @@ +""" +Handlers for content indexing +""" + +import logging + +from django.dispatch import receiver +from openedx_events.content_authoring.data import LibraryBlockData, XBlockData +from openedx_events.content_authoring.signals import ( + LIBRARY_BLOCK_CREATED, + LIBRARY_BLOCK_DELETED, + LIBRARY_BLOCK_UPDATED, + XBLOCK_CREATED, + XBLOCK_DELETED, + XBLOCK_UPDATED +) + +from .api import only_if_meilisearch_enabled +from .tasks import ( + delete_library_block_index_doc, + delete_xblock_index_doc, + upsert_library_block_index_doc, + upsert_xblock_index_doc +) + +log = logging.getLogger(__name__) + + +@receiver(XBLOCK_CREATED) +@only_if_meilisearch_enabled +def xblock_created_handler(**kwargs) -> None: + """ + Create the index for the XBlock + """ + xblock_info = kwargs.get("xblock_info", None) + if not xblock_info or not isinstance(xblock_info, XBlockData): # pragma: no cover + log.error("Received null or incorrect data for event") + return + + upsert_xblock_index_doc.delay( + str(xblock_info.usage_key), + recursive=False, + update_metadata=True, + update_tags=False, + ) + + +@receiver(XBLOCK_UPDATED) +@only_if_meilisearch_enabled +def xblock_updated_handler(**kwargs) -> None: + """ + Update the index for the XBlock and its children + """ + xblock_info = kwargs.get("xblock_info", None) + if not xblock_info or not isinstance(xblock_info, XBlockData): # pragma: no cover + log.error("Received null or incorrect data for event") + return + + upsert_xblock_index_doc.delay( + str(xblock_info.usage_key), + recursive=True, # Update all children because the breadcrumb may have changed + update_metadata=True, + update_tags=False, + ) + + +@receiver(XBLOCK_DELETED) +@only_if_meilisearch_enabled +def xblock_deleted_handler(**kwargs) -> None: + """ + Delete the index for the XBlock + """ + xblock_info = kwargs.get("xblock_info", None) + if not xblock_info or not isinstance(xblock_info, XBlockData): # pragma: no cover + log.error("Received null or incorrect data for event") + return + + delete_xblock_index_doc.delay(str(xblock_info.usage_key)) + + +@receiver(LIBRARY_BLOCK_CREATED) +@only_if_meilisearch_enabled +def content_library_updated_handler(**kwargs) -> None: + """ + Create or update the index for the content library block + """ + library_block_data = kwargs.get("library_block", None) + if not library_block_data or not isinstance(library_block_data, LibraryBlockData): # pragma: no cover + log.error("Received null or incorrect data for event") + return + + upsert_library_block_index_doc.delay(str(library_block_data.usage_key), update_metadata=True, update_tags=False) + + +@receiver(LIBRARY_BLOCK_DELETED) +@only_if_meilisearch_enabled +def content_library_deleted_handler(**kwargs) -> None: + """ + Delete the index for the content library block + """ + library_block_data = kwargs.get("library_block", None) + if not library_block_data or not isinstance(library_block_data, LibraryBlockData): # pragma: no cover + log.error("Received null or incorrect data for event") + return + + delete_library_block_index_doc.delay(str(library_block_data.usage_key)) diff --git a/openedx/core/djangoapps/content/search/management/commands/meili_mixin.py b/openedx/core/djangoapps/content/search/management/commands/meili_mixin.py deleted file mode 100644 index 18c7681879cb..000000000000 --- a/openedx/core/djangoapps/content/search/management/commands/meili_mixin.py +++ /dev/null @@ -1,105 +0,0 @@ -""" -Mixin for Django management commands that interact with Meilisearch -""" -from contextlib import contextmanager -import time - -from django.conf import settings -from django.core.management import CommandError -import meilisearch -from meilisearch.errors import MeilisearchError -from meilisearch.models.task import TaskInfo - - -class MeiliCommandMixin: - """ - Mixin for Django management commands that interact with Meilisearch - """ - def get_meilisearch_client(self): - """ - Get the Meiliesearch client - """ - if hasattr(self, "_meili_client"): - return self._meili_client - # Connect to Meilisearch - if not settings.MEILISEARCH_ENABLED: - raise CommandError("MEILISEARCH_ENABLED is not set - search functionality disabled.") - - self._meili_client = meilisearch.Client(settings.MEILISEARCH_URL, settings.MEILISEARCH_API_KEY) - try: - self._meili_client.health() - except MeilisearchError as err: - self.stderr.write(err.message) # print this because 'raise...from...' doesn't print the details - raise CommandError("Unable to connect to Meilisearch") from err - return self._meili_client - - def wait_for_meili_task(self, info: TaskInfo): - """ - Simple helper method to wait for a Meilisearch task to complete - """ - client = self.get_meilisearch_client() - current_status = client.get_task(info.task_uid) - while current_status.status in ("enqueued", "processing"): - self.stdout.write("...") - time.sleep(1) - current_status = client.get_task(info.task_uid) - if current_status.status != "succeeded": - self.stderr.write(f"Task has status: {current_status.status}") - self.stderr.write(str(current_status.error)) - try: - err_reason = current_status.error['message'] - except (TypeError, KeyError): - err_reason = "Unknown error" - raise MeilisearchError(err_reason) - - def index_exists(self, index_name: str) -> bool: - """ - Check if an index exists - """ - client = self.get_meilisearch_client() - try: - client.get_index(index_name) - except MeilisearchError as err: - if err.code == "index_not_found": - return False - else: - raise err - return True - - @contextmanager - def using_temp_index(self, target_index): - """ - Create a new temporary Meilisearch index, populate it, then swap it to - become the active index. - """ - client = self.get_meilisearch_client() - self.stdout.write("Checking index...") - temp_index_name = target_index + "_new" - if self.index_exists(temp_index_name): - self.stdout.write("Temporary index already exists. Deleting it...") - self.wait_for_meili_task(client.delete_index(temp_index_name)) - - self.stdout.write("Creating new index...") - self.wait_for_meili_task( - client.create_index(temp_index_name, {'primaryKey': 'id'}) - ) - new_index_created = client.get_index(temp_index_name).created_at - - yield temp_index_name - - if not self.index_exists(target_index): - # We have to create the "target" index before we can successfully swap the new one into it: - self.stdout.write("Preparing to swap into index (first time)...") - self.wait_for_meili_task(client.create_index(target_index)) - self.stdout.write("Swapping index...") - client.swap_indexes([{'indexes': [temp_index_name, target_index]}]) - # If we're using an API key that's restricted to certain index prefix(es), we won't be able to get the status - # of this request unfortunately. https://github.com/meilisearch/meilisearch/issues/4103 - while True: - time.sleep(1) - if client.get_index(target_index).created_at != new_index_created: - self.stdout.write("Waiting for swap completion...") - else: - break - self.stdout.write("Deleting old index...") - self.wait_for_meili_task(client.delete_index(temp_index_name)) diff --git a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py index 0f52eea51d4d..3767ebcba6c9 100644 --- a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py +++ b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py @@ -5,28 +5,12 @@ See also cms/djangoapps/contentstore/management/commands/reindex_course.py which indexes LMS (published) courses in ElasticSearch. """ -import logging -import time - -from django.conf import settings from django.core.management import BaseCommand, CommandError -from openedx.core.djangoapps.content_libraries import api as lib_api -from openedx.core.djangoapps.content.search.documents import ( - Fields, - searchable_doc_for_course_block, - searchable_doc_for_library_block, - STUDIO_INDEX_NAME, -) -from xmodule.modulestore import ModuleStoreEnum -from xmodule.modulestore.django import modulestore -from .meili_mixin import MeiliCommandMixin - - -log = logging.getLogger(__name__) +from ... import api -class Command(MeiliCommandMixin, BaseCommand): +class Command(BaseCommand): """ Build or re-build the search index for courses and libraries (in Studio, i.e. Draft mode) @@ -41,119 +25,13 @@ def handle(self, *args, **options): """ Build a new search index for Studio, containing content from courses and libraries """ + if not api.is_meilisearch_enabled(): + raise CommandError("Meilisearch is not enabled. Please set MEILISEARCH_ENABLED to True in your settings.") + if not options["experimental"]: raise CommandError( "This command is experimental and not recommended for production. " "Use the --experimental argument to acknowledge and run it." ) - start_time = time.perf_counter() - client = self.get_meilisearch_client() - store = modulestore() - - # Get the lists of libraries - self.stdout.write("Counting libraries...") - lib_keys = [lib.library_key for lib in lib_api.ContentLibrary.objects.select_related('org').only('org', 'slug')] - num_libraries = len(lib_keys) - - # Get the list of courses - self.stdout.write("Counting courses...") - with store.branch_setting(ModuleStoreEnum.Branch.draft_preferred): - all_courses = store.get_courses() - num_courses = len(all_courses) - - # Some counters so we can track our progress as indexing progresses: - num_contexts = num_courses + num_libraries - num_contexts_done = 0 # How many courses/libraries we've indexed - num_blocks_done = 0 # How many individual components/XBlocks we've indexed - - self.stdout.write(f"Found {num_courses} courses and {num_libraries} libraries.") - index_name = settings.MEILISEARCH_INDEX_PREFIX + STUDIO_INDEX_NAME - with self.using_temp_index(index_name) as temp_index_name: - ############## Configure the index ############## - - # The following index settings are best changed on an empty index. - # Changing them on a populated index will "re-index all documents in the index, which can take some time" - # and use more RAM. Instead, we configure an empty index then populate it one course/library at a time. - - # Mark usage_key as unique (it's not the primary key for the index, but nevertheless must be unique): - client.index(temp_index_name).update_distinct_attribute(Fields.usage_key) - # Mark which attributes can be used for filtering/faceted search: - client.index(temp_index_name).update_filterable_attributes([ - Fields.block_type, - Fields.context_key, - Fields.org, - Fields.tags, - Fields.type, - ]) - # Mark which attributes are used for keyword search, in order of importance: - client.index(temp_index_name).update_searchable_attributes([ - Fields.display_name, - Fields.block_id, - Fields.content, - Fields.tags, - # Keyword search does _not_ search the course name, course ID, breadcrumbs, block type, or other fields. - ]) - ############## Libraries ############## - self.stdout.write("Indexing libraries...") - for lib_key in lib_keys: - self.stdout.write(f"{num_contexts_done + 1}/{num_contexts}. Now indexing library {lib_key}") - docs = [] - for component in lib_api.get_library_components(lib_key): - metadata = lib_api.LibraryXBlockMetadata.from_component(lib_key, component) - doc = searchable_doc_for_library_block(metadata) - docs.append(doc) - num_blocks_done += 1 - if docs: - # Add all the docs in this library at once (usually faster than adding one at a time): - self.wait_for_meili_task(client.index(temp_index_name).add_documents(docs)) - self.wait_for_meili_task(client.index(temp_index_name).add_documents(docs)) - num_contexts_done += 1 - - ############## Courses ############## - self.stdout.write("Indexing courses...") - for course in all_courses: - self.stdout.write( - f"{num_contexts_done + 1}/{num_contexts}. Now indexing course {course.display_name} ({course.id})" - ) - docs = [] - - # Pre-fetch the course with all of its children: - course = store.get_course(course.id, depth=None) - - def add_with_children(block): - """ Recursively index the given XBlock/component """ - doc = searchable_doc_for_course_block(block) - docs.append(doc) # pylint: disable=cell-var-from-loop - self.recurse_children(block, add_with_children) # pylint: disable=cell-var-from-loop - - self.recurse_children(course, add_with_children) - - if docs: - # Add all the docs in this course at once (usually faster than adding one at a time): - self.wait_for_meili_task(client.index(temp_index_name).add_documents(docs)) - num_contexts_done += 1 - num_blocks_done += len(docs) - - elapsed_time = time.perf_counter() - start_time - self.stdout.write( - f"Done! {num_blocks_done} blocks indexed across {num_contexts_done} courses " - f"and libraries in {elapsed_time:.0f}s." - ) - - def recurse_children(self, block, fn): - """ - Recurse the children of an XBlock and call the given function for each - - The main purpose of this is just to wrap the loading of each child in - try...except. Otherwise block.get_children() would do what we need. - """ - if block.has_children: - for child_id in block.children: - try: - child = block.get_child(child_id) - except Exception as err: # pylint: disable=broad-except - log.exception(err) - self.stderr.write(f"Unable to load block {child_id}") - else: - fn(child) + api.rebuild_index(self.stdout.write) diff --git a/openedx/core/djangoapps/content/search/tasks.py b/openedx/core/djangoapps/content/search/tasks.py new file mode 100644 index 000000000000..52d3fd3ebfe5 --- /dev/null +++ b/openedx/core/djangoapps/content/search/tasks.py @@ -0,0 +1,93 @@ +""" +Defines asynchronous celery task for content indexing +""" + +from __future__ import annotations + +import logging + +from celery import shared_task +from celery_utils.logged_task import LoggedTask +from edx_django_utils.monitoring import set_code_owner_attribute +from opaque_keys.edx.keys import UsageKey +from opaque_keys.edx.locator import LibraryUsageLocatorV2 + +from . import api + +log = logging.getLogger(__name__) + + +@shared_task(base=LoggedTask) +@set_code_owner_attribute +def upsert_xblock_index_doc(usage_key_str: str, recursive: bool, update_metadata: bool, update_tags: bool) -> bool: + """ + Celery task to update the content index document for an XBlock + """ + try: + usage_key = UsageKey.from_string(usage_key_str) + + log.info("Updating content index document for XBlock with id: %s", usage_key) + + api.upsert_xblock_index_doc(usage_key, recursive, update_metadata, update_tags) + + return True + except Exception as e: # pylint: disable=broad-except pragma: no cover + log.error("Error updating content index document for XBlock with id: %s. %s", usage_key_str, e) + return False + + +@shared_task(base=LoggedTask) +@set_code_owner_attribute +def delete_xblock_index_doc(usage_key_str: str) -> bool: + """ + Celery task to delete the content index document for an XBlock + """ + try: + usage_key = UsageKey.from_string(usage_key_str) + + log.info("Updating content index document for XBlock with id: %s", usage_key) + + api.delete_index_doc(usage_key) + + return True + except Exception as e: # pylint: disable=broad-except pragma: no cover + log.error("Error deleting content index document for XBlock with id: %s. %s", usage_key_str, e) + return False + + +@shared_task(base=LoggedTask) +@set_code_owner_attribute +def upsert_library_block_index_doc(usage_key_str: str, update_metadata: bool, update_tags: bool) -> bool: + """ + Celery task to update the content index document for a library block + """ + try: + usage_key = LibraryUsageLocatorV2.from_string(usage_key_str) + + log.info("Updating content index document for library block with id: %s", usage_key) + + api.upsert_library_block_index_doc(usage_key, update_metadata, update_tags) + + return True + except Exception as e: # pylint: disable=broad-except pragma: no cover + log.error("Error updating content index document for libray block with id: %s. %s", usage_key_str, e) + return False + + +@shared_task(base=LoggedTask) +@set_code_owner_attribute +def delete_library_block_index_doc(usage_key_str: str) -> bool: + """ + Celery task to delete the content index document for a library block + """ + try: + usage_key = LibraryUsageLocatorV2.from_string(usage_key_str) + + log.info("Deleting content index document for library block with id: %s", usage_key) + + api.delete_index_doc(usage_key) + + return True + except Exception as e: # pylint: disable=broad-except pragma: no cover + log.error("Error deleting content index document for library block with id: %s. %s", usage_key_str, e) + return False diff --git a/openedx/core/djangoapps/content/search/tests/test_api.py b/openedx/core/djangoapps/content/search/tests/test_api.py new file mode 100644 index 000000000000..92ec12db6bca --- /dev/null +++ b/openedx/core/djangoapps/content/search/tests/test_api.py @@ -0,0 +1,184 @@ +""" +Tests for the Studio content search API. +""" +from __future__ import annotations + +from unittest.mock import MagicMock, call, patch + +import ddt +from django.test import override_settings +from organizations.tests.factories import OrganizationFactory + +from common.djangoapps.student.tests.factories import UserFactory +from openedx.core.djangoapps.content_libraries import api as library_api +from openedx.core.djangolib.testing.utils import skip_unless_cms +from xmodule.modulestore.tests.django_utils import TEST_DATA_SPLIT_MODULESTORE, ModuleStoreTestCase + +from .. import api + +STUDIO_SEARCH_ENDPOINT_URL = "/api/content_search/v2/studio/" + + +@ddt.ddt +@skip_unless_cms +@patch("openedx.core.djangoapps.content.search.api._wait_for_meili_task", new=MagicMock(return_value=None)) +@patch("openedx.core.djangoapps.content.search.api.MeilisearchClient") +class TestSearchApi(ModuleStoreTestCase): + """ + Tests for the Studio content search and index API. + """ + + MODULESTORE = TEST_DATA_SPLIT_MODULESTORE + + def setUp(self): + super().setUp() + self.user = UserFactory.create() + self.user_id = self.user.id + + self.modulestore_patcher = patch( + "openedx.core.djangoapps.content.search.api.modulestore", return_value=self.store + ) + self.addCleanup(self.modulestore_patcher.stop) + self.modulestore_patcher.start() + + # Clear the Meilisearch client to avoid side effects from other tests + api.clear_meilisearch_client() + + # Create course + self.course = self.store.create_course( + "org1", + "test_course", + "test_run", + self.user_id, + fields={"display_name": "Test Course"}, + ) + + # Create XBlocks + self.sequential = self.store.create_child(self.user_id, self.course.location, "sequential", "test_sequential") + self.doc_sequential = { + 'id': 'block-v1org1test_coursetest_runtypesequentialblocktest_sequential-f702c144', + 'type': 'course_block', + 'usage_key': 'block-v1:org1+test_course+test_run+type@sequential+block@test_sequential', + 'block_id': 'test_sequential', + 'display_name': 'sequential', + 'block_type': 'sequential', + 'context_key': 'course-v1:org1+test_course+test_run', + 'org': 'org1', + 'breadcrumbs': [{'display_name': 'Test Course'}], + 'content': {} + } + self.store.create_child(self.user_id, self.sequential.location, "vertical", "test_vertical") + self.doc_vertical = { + 'id': 'block-v1org1test_coursetest_runtypeverticalblocktest_vertical-e76a10a4', + 'type': 'course_block', + 'usage_key': 'block-v1:org1+test_course+test_run+type@vertical+block@test_vertical', + 'block_id': 'test_vertical', + 'display_name': 'vertical', + 'block_type': 'vertical', + 'context_key': 'course-v1:org1+test_course+test_run', + 'org': 'org1', + 'breadcrumbs': [ + {'display_name': 'Test Course'}, + {'display_name': 'sequential'} + ], + 'content': {} + } + + # Create a content library: + self.library = library_api.create_library( + library_type=library_api.COMPLEX, + org=OrganizationFactory.create(short_name="org1"), + slug="lib", + title="Library", + ) + # Populate it with a problem: + self.problem = library_api.create_library_block(self.library.key, "problem", "p1") + self.doc_problem = { + "id": "lborg1libproblemp1-a698218e", + "usage_key": "lb:org1:lib:problem:p1", + "block_id": "p1", + "display_name": "Blank Problem", + "block_type": "problem", + "context_key": "lib:org1:lib", + "org": "org1", + "breadcrumbs": [{"display_name": "Library"}], + "content": {"problem_types": [], "capa_content": " "}, + "type": "library_block", + } + + @override_settings(MEILISEARCH_ENABLED=False) + def test_reindex_meilisearch_disabled(self, mock_meilisearch): + with self.assertRaises(RuntimeError): + api.rebuild_index() + + mock_meilisearch.return_value.swap_indexes.assert_not_called() + + @override_settings(MEILISEARCH_ENABLED=True) + def test_reindex_meilisearch(self, mock_meilisearch): + + api.rebuild_index() + mock_meilisearch.return_value.index.return_value.add_documents.assert_has_calls( + [ + call([self.doc_sequential, self.doc_vertical]), + call([self.doc_problem]), + ], + any_order=True, + ) + + @ddt.data( + True, + False + ) + @override_settings(MEILISEARCH_ENABLED=True) + def test_index_xblock_metadata(self, recursive, mock_meilisearch): + """ + Test indexing an XBlock. + """ + api.upsert_xblock_index_doc( + self.sequential.usage_key, + recursive=recursive, + update_metadata=True, + update_tags=False, + ) + + if recursive: + expected_docs = [self.doc_sequential, self.doc_vertical] + else: + expected_docs = [self.doc_sequential] + + mock_meilisearch.return_value.index.return_value.update_documents.assert_called_once_with(expected_docs) + + @override_settings(MEILISEARCH_ENABLED=True) + def test_delete_index_xblock(self, mock_meilisearch): + """ + Test deleting an XBlock doc from the index. + """ + api.delete_index_doc(self.sequential.usage_key) + + mock_meilisearch.return_value.index.return_value.delete_document.assert_called_once_with( + self.doc_sequential['id'] + ) + + @override_settings(MEILISEARCH_ENABLED=True) + def test_index_library_block_metadata(self, mock_meilisearch): + """ + Test indexing a Library Block. + """ + api.upsert_library_block_index_doc( + self.problem.usage_key, + update_metadata=True, + update_tags=False, + ) + + mock_meilisearch.return_value.index.return_value.update_documents.assert_called_once_with([self.doc_problem]) + + @override_settings(MEILISEARCH_ENABLED=True) + def test_delete_index_library_block(self, mock_meilisearch): + """ + Test deleting a Library Block doc from the index. + """ + api.delete_index_doc(self.problem.usage_key) + + mock_meilisearch.return_value.index.return_value.delete_document.assert_called_once_with( + self.doc_problem['id'] + ) diff --git a/openedx/core/djangoapps/content/search/tests/test_documents.py b/openedx/core/djangoapps/content/search/tests/test_documents.py index 97b2fb5d033f..44a679422453 100644 --- a/openedx/core/djangoapps/content/search/tests/test_documents.py +++ b/openedx/core/djangoapps/content/search/tests/test_documents.py @@ -51,7 +51,7 @@ def setUpClass(cls): tagging_api.add_tag_to_taxonomy(cls.subject_tags, tag="Jump Links", parent_tag_value="Hypertext") # Tag stuff: - tagging_api.tag_object(cls.problem_block.usage_key, cls.difficulty_tags, tags=["Easy"]) + tagging_api.tag_object(cls.problem_block.usage_key, cls.difficulty_tags, ["Easy"]) tagging_api.tag_object(cls.html_block_key, cls.subject_tags, tags=["Chinese", "Jump Links"]) tagging_api.tag_object(cls.html_block_key, cls.difficulty_tags, tags=["Normal"]) diff --git a/openedx/core/djangoapps/content/search/tests/test_handlers.py b/openedx/core/djangoapps/content/search/tests/test_handlers.py new file mode 100644 index 000000000000..1fa3f5316abc --- /dev/null +++ b/openedx/core/djangoapps/content/search/tests/test_handlers.py @@ -0,0 +1,154 @@ +from unittest.mock import MagicMock, patch + +from organizations.tests.factories import OrganizationFactory +from django.test import override_settings, LiveServerTestCase + +from common.djangoapps.student.tests.factories import UserFactory +from openedx.core.djangoapps.content_libraries import api as library_api +from xmodule.modulestore.tests.django_utils import TEST_DATA_SPLIT_MODULESTORE, ModuleStoreTestCase +from openedx.core.lib.blockstore_api.tests.base import BlockstoreAppTestMixin + +from .. import api + + +@patch("openedx.core.djangoapps.content.search.api._wait_for_meili_task", new=MagicMock(return_value=None)) +@patch("openedx.core.djangoapps.content.search.api.MeilisearchClient") +@override_settings(MEILISEARCH_ENABLED=True) +class TestUpdateIndexHandlers( + ModuleStoreTestCase, + BlockstoreAppTestMixin, + LiveServerTestCase, +): + """ + Test that the search index is updated when XBlocks and Library Blocks are modified + """ + + MODULESTORE = TEST_DATA_SPLIT_MODULESTORE + + def setUp(self): + super().setUp() + # Create user + self.user = UserFactory.create() + self.user_id = self.user.id + + self.orgA = OrganizationFactory.create(short_name="orgA") + + self.patcher = patch("openedx.core.djangoapps.content_tagging.tasks.modulestore", return_value=self.store) + self.addCleanup(self.patcher.stop) + self.patcher.start() + + api.clear_meilisearch_client() # Clear the Meilisearch client to avoid leaking state from other tests + + + def test_create_delete_xblock(self, meilisearch_client): + # Create course + course = self.store.create_course( + self.orgA.short_name, + "test_course", + "test_run", + self.user_id, + fields={"display_name": "Test Course"}, + ) + + # Create XBlocks + sequential = self.store.create_child(self.user_id, course.location, "sequential", "test_sequential") + meilisearch_client.return_value.index.return_value.update_documents.assert_called_with([ + { + 'id': 'block-v1orgatest_coursetest_runtypesequentialblocktest_sequential-0cdb9395', + 'type': 'course_block', + 'usage_key': 'block-v1:orgA+test_course+test_run+type@sequential+block@test_sequential', + 'block_id': 'test_sequential', + 'display_name': 'sequential', + 'block_type': 'sequential', + 'context_key': 'course-v1:orgA+test_course+test_run', + 'org': 'orgA', + 'breadcrumbs': [{'display_name': 'Test Course'}], 'content': {} + } + ]) + vertical = self.store.create_child(self.user_id, sequential.location, "vertical", "test_vertical") + meilisearch_client.return_value.index.return_value.update_documents.assert_called_with([ + { + 'id': 'block-v1orgatest_coursetest_runtypeverticalblocktest_vertical-011f143b', + 'type': 'course_block', + 'usage_key': 'block-v1:orgA+test_course+test_run+type@vertical+block@test_vertical', + 'block_id': 'test_vertical', + 'display_name': 'vertical', + 'block_type': 'vertical', + 'context_key': 'course-v1:orgA+test_course+test_run', + 'org': 'orgA', + 'breadcrumbs': [{'display_name': 'Test Course'}, {'display_name': 'sequential'}], + 'content': {} + } + ]) + + # Update the XBlock + sequential = self.store.get_item(sequential.location, self.user_id) # Refresh the XBlock + sequential.display_name = "Updated Sequential" + self.store.update_item(sequential, self.user_id) + + meilisearch_client.return_value.index.return_value.update_documents.assert_called_with([ + { + 'id': 'block-v1orgatest_coursetest_runtypesequentialblocktest_sequential-0cdb9395', + 'type': 'course_block', + 'usage_key': 'block-v1:orgA+test_course+test_run+type@sequential+block@test_sequential', + 'block_id': 'test_sequential', + 'display_name': 'Updated Sequential', + 'block_type': 'sequential', + 'context_key': 'course-v1:orgA+test_course+test_run', + 'org': 'orgA', + 'breadcrumbs': [{'display_name': 'Test Course'}], 'content': {} + }, + { + 'id': 'block-v1orgatest_coursetest_runtypeverticalblocktest_vertical-011f143b', + 'type': 'course_block', + 'usage_key': 'block-v1:orgA+test_course+test_run+type@vertical+block@test_vertical', + 'block_id': 'test_vertical', + 'display_name': 'vertical', + 'block_type': 'vertical', + 'context_key': 'course-v1:orgA+test_course+test_run', + 'org': 'orgA', + 'breadcrumbs': [{'display_name': 'Test Course'}, {'display_name': 'Updated Sequential'}], + 'content': {} + } + ]) + + # Delete the XBlock + self.store.delete_item(vertical.location, self.user_id) + + meilisearch_client.return_value.index.return_value.delete_document.assert_called_with( + 'block-v1orgatest_coursetest_runtypeverticalblocktest_vertical-011f143b' + ) + + + def test_create_delete_library_block(self, meilisearch_client): + # Create library + library = library_api.create_library( + org=self.orgA, + slug="lib_a", + title="Library Org A", + description="This is a library from Org A", + ) + + problem = library_api.create_library_block(library.key, "problem", "Problem1") + + meilisearch_client.return_value.index.return_value.update_documents.assert_called_with([ + { + 'id': 'lborgalib_aproblemproblem1-ca3186e9', + 'type': 'library_block', + 'usage_key': 'lb:orgA:lib_a:problem:Problem1', + 'block_id': 'Problem1', + 'display_name': 'Blank Problem', + 'block_type': 'problem', + 'context_key': 'lib:orgA:lib_a', + 'org': 'orgA', + 'breadcrumbs': [{'display_name': 'Library Org A'}], + 'content': {'problem_types': [], 'capa_content': ' '} + }, + ]) + + # Delete the Library Block + library_api.delete_library_block(problem.usage_key) + + meilisearch_client.return_value.index.return_value.delete_document.assert_called_with( + 'lborgalib_aproblemproblem1-ca3186e9' + ) diff --git a/openedx/core/djangoapps/content/search/tests/test_views.py b/openedx/core/djangoapps/content/search/tests/test_views.py index e117cf1b969a..ccd255f84e6d 100644 --- a/openedx/core/djangoapps/content/search/tests/test_views.py +++ b/openedx/core/djangoapps/content/search/tests/test_views.py @@ -1,17 +1,24 @@ """ Tests for the Studio content search REST API. """ +from __future__ import annotations + +from unittest.mock import MagicMock, patch + from django.test import override_settings -from rest_framework.test import APITestCase, APIClient -from unittest import mock +from rest_framework.test import APIClient, APITestCase from common.djangoapps.student.tests.factories import UserFactory from openedx.core.djangolib.testing.utils import skip_unless_cms +from .. import api + STUDIO_SEARCH_ENDPOINT_URL = "/api/content_search/v2/studio/" @skip_unless_cms +@patch("openedx.core.djangoapps.content.search.api._wait_for_meili_task", new=MagicMock(return_value=None)) +@patch("openedx.core.djangoapps.content.search.api.MeilisearchClient") class StudioSearchViewTest(APITestCase): """ General tests for the Studio search REST API. @@ -31,8 +38,12 @@ def setUp(self): super().setUp() self.client = APIClient() + # Clear the Meilisearch client to avoid side effects from other tests + api.clear_meilisearch_client() + + @override_settings(MEILISEARCH_ENABLED=False) - def test_studio_search_unathenticated_disabled(self): + def test_studio_search_unathenticated_disabled(self, _meilisearch_client): """ Whether or not Meilisearch is enabled, the API endpoint requires authentication. """ @@ -40,7 +51,7 @@ def test_studio_search_unathenticated_disabled(self): assert result.status_code == 401 @override_settings(MEILISEARCH_ENABLED=True) - def test_studio_search_unathenticated_enabled(self): + def test_studio_search_unathenticated_enabled(self, _meilisearch_client): """ Whether or not Meilisearch is enabled, the API endpoint requires authentication. """ @@ -48,7 +59,7 @@ def test_studio_search_unathenticated_enabled(self): assert result.status_code == 401 @override_settings(MEILISEARCH_ENABLED=False) - def test_studio_search_disabled(self): + def test_studio_search_disabled(self, _meilisearch_client): """ When Meilisearch is disabled, the Studio search endpoint gives a 404 """ @@ -57,7 +68,7 @@ def test_studio_search_disabled(self): assert result.status_code == 404 @override_settings(MEILISEARCH_ENABLED=True) - def test_studio_search_student_forbidden(self): + def test_studio_search_student_forbidden(self, _meilisearch_client): """ Until we implement fine-grained permissions, only global staff can use the Studio search endpoint. @@ -67,14 +78,13 @@ def test_studio_search_student_forbidden(self): assert result.status_code == 403 @override_settings(MEILISEARCH_ENABLED=True) - @mock.patch('openedx.core.djangoapps.content.search.views._get_meili_api_key_uid') - def test_studio_search_staff(self, mock_get_api_key_uid): + def test_studio_search_staff(self, _meilisearch_client): """ Global staff can get a restricted API key for Meilisearch using the REST API. """ + _meilisearch_client.return_value.generate_tenant_token.return_value = "api_key" self.client.login(username='staff', password='staff_pass') - mock_get_api_key_uid.return_value = "3203d764-370f-4e99-a917-d47ab7f29739" result = self.client.get(STUDIO_SEARCH_ENDPOINT_URL) assert result.status_code == 200 assert result.data["index_name"] == "studio_content" diff --git a/openedx/core/djangoapps/content/search/views.py b/openedx/core/djangoapps/content/search/views.py index 69b686a3434f..b6be2cfb0c79 100644 --- a/openedx/core/djangoapps/content/search/views.py +++ b/openedx/core/djangoapps/content/search/views.py @@ -1,34 +1,22 @@ """ REST API for content search """ -from datetime import datetime, timedelta, timezone import logging -from django.conf import settings from django.contrib.auth import get_user_model -import meilisearch from rest_framework.exceptions import NotFound, PermissionDenied from rest_framework.response import Response from rest_framework.views import APIView from common.djangoapps.student.roles import GlobalStaff from openedx.core.lib.api.view_utils import view_auth_classes -from openedx.core.djangoapps.content.search.documents import STUDIO_INDEX_NAME + +from . import api User = get_user_model() log = logging.getLogger(__name__) -def _get_meili_api_key_uid(): - """ - Helper method to get the UID of the API key we're using for Meilisearch - """ - if not hasattr(_get_meili_api_key_uid, "uid"): - client = meilisearch.Client(settings.MEILISEARCH_URL, settings.MEILISEARCH_API_KEY) - _get_meili_api_key_uid.uid = client.get_key(settings.MEILISEARCH_API_KEY).uid - return _get_meili_api_key_uid.uid - - @view_auth_classes(is_authenticated=True) class StudioSearchView(APIView): """ @@ -39,33 +27,13 @@ def get(self, request): """ Give user details on how they can search studio content """ - if not settings.MEILISEARCH_ENABLED: + if not api.is_meilisearch_enabled(): raise NotFound("Meilisearch features are not enabled.") if not GlobalStaff().has_user(request.user): # Until we enforce permissions properly (see below), this endpoint is restricted to global staff, # because it lets you search data from any course/library. raise PermissionDenied("For the moment, use of this search preview is restricted to global staff.") - client = meilisearch.Client(settings.MEILISEARCH_URL, settings.MEILISEARCH_API_KEY) - index_name = settings.MEILISEARCH_INDEX_PREFIX + STUDIO_INDEX_NAME - # Create an API key that only allows the user to search content that they have permission to view: - expires_at = datetime.now(tz=timezone.utc) + timedelta(days=7) - search_rules = { - index_name: { - # TODO: Apply filters here based on the user's permissions, so they can only search for content - # that they have permission to view. Example: - # 'filter': 'org = BradenX' - } - } - # Note: the following is just generating a JWT. It doesn't actually make an API call to Meilisearch. - restricted_api_key = client.generate_tenant_token( - api_key_uid=_get_meili_api_key_uid(), - search_rules=search_rules, - expires_at=expires_at, - ) + response_data = api.generate_user_token(request.user) - return Response({ - "url": settings.MEILISEARCH_PUBLIC_URL, - "index_name": index_name, - "api_key": restricted_api_key, - }) + return Response(response_data)