diff --git a/Pipfile b/Pipfile index d5b1eeb5..0daf69e9 100644 --- a/Pipfile +++ b/Pipfile @@ -17,6 +17,7 @@ tomli = "*" pytest = "~=7.4" pytest-django = "~=3.10" pytest-rerunfailures = "~=12.0" +requests-mock = "~=1.11" [packages] nodeenv = "==1.3.3" diff --git a/Pipfile.lock b/Pipfile.lock index 153202f7..d28be7a0 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "2fb4237dd4fb8a8f758ee395f171910347678442d57cc68b1b502aeeec09d8d1" + "sha256": "b9e6c6eef3b66e97de0663c14f43afed1285532f109cf9bd4ec505958d801fe7" }, "pipfile-spec": 6, "requires": { @@ -844,6 +844,14 @@ "markers": "python_version >= '3.7'", "version": "==2.31.0" }, + "requests-mock": { + "hashes": [ + "sha256:ef10b572b489a5f28e09b708697208c4a3b2b89ef80a9f01584340ea357ec3c4", + "sha256:f7fae383f228633f6bececebdab236c478ace2284d6292c6e7e2867b9ab74d15" + ], + "index": "pypi", + "version": "==1.11.0" + }, "requests-toolbelt": { "hashes": [ "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6", @@ -895,6 +903,14 @@ "index": "pypi", "version": "==8.0.4" }, + "six": { + "hashes": [ + "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", + "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.16.0" + }, "toml": { "hashes": [ "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", diff --git a/kolibri_explore_plugin/collectionviews.py b/kolibri_explore_plugin/collectionviews.py index 1b172a13..dc936924 100644 --- a/kolibri_explore_plugin/collectionviews.py +++ b/kolibri_explore_plugin/collectionviews.py @@ -82,12 +82,26 @@ def __init__(self): self.available = None super().__init__() + def get_latest_channels(self): + """Return set of channel id and latest version tuples""" + return { + (channel_id, max(self.get_channel_versions(channel_id))) + for channel_id in self.get_channel_ids() + } + def get_extra_channel_ids(self): all_channel_ids = _get_channel_ids_for_all_content_manifests( self.language ) return all_channel_ids.difference(self.get_channel_ids()) + def get_latest_extra_channels(self): + """Return set of extra channel id and latest version tuples""" + all_channels = _get_latest_channels_for_all_content_manifests( + self.language + ) + return all_channels.difference(self.get_latest_channels()) + def read_from_static_collection( self, grade, name, language, validate=False ): @@ -134,20 +148,34 @@ def get_channelimport_tasks(self): For all the channels in this content manifest. """ - return [ - get_remotechannelimport_task(channel_id) - for channel_id in self.get_channel_ids() - ] + tasks = [] + for channel_id, channel_version in self.get_latest_channels(): + metadata = get_channel_metadata(channel_id) + if metadata and metadata.version >= channel_version: + logger.debug( + f"Skipping channel import task for {channel_id} since " + "already present" + ) + continue + tasks.append(get_remotechannelimport_task(channel_id)) + return tasks def get_extra_channelimport_tasks(self): """Return a serializable object to create extra channelimport tasks For all channels featured in Endless Key content manifests. """ - return [ - get_remotechannelimport_task(channel_id) - for channel_id in self.get_extra_channel_ids() - ] + tasks = [] + for channel_id, channel_version in self.get_latest_extra_channels(): + metadata = get_channel_metadata(channel_id) + if metadata and metadata.version >= channel_version: + logger.debug( + f"Skipping extra channel import task for {channel_id} " + "since already present" + ) + continue + tasks.append(get_remotechannelimport_task(channel_id)) + return tasks def get_contentimport_tasks(self): """Return a serializable object to create contentimport tasks @@ -612,6 +640,17 @@ def _get_channel_ids_for_all_content_manifests(language): return channel_ids +def _get_latest_channels_for_all_content_manifests(language): + """Return set of all channel id and latest version tuples""" + channels = {} + for content_manifest in _content_manifests_by_language[language]: + for channel_id in content_manifest.get_channel_ids(): + version = max(content_manifest.get_channel_versions(channel_id)) + if version > channels.get(channel_id, -1): + channels[channel_id] = version + return set(channels.items()) + + @api_view(["GET"]) def get_collection_info(request): """Return the collection metadata and availability.""" diff --git a/kolibri_explore_plugin/jobs.py b/kolibri_explore_plugin/jobs.py index e384cd88..71d27077 100644 --- a/kolibri_explore_plugin/jobs.py +++ b/kolibri_explore_plugin/jobs.py @@ -30,7 +30,11 @@ class TaskType: def get_channel_metadata(channel_id): - return ChannelMetadata.objects.get(id=channel_id) + """Returns the ChannelMetadata object or None if it doesn't exist""" + try: + return ChannelMetadata.objects.get(id=channel_id) + except ChannelMetadata.DoesNotExist: + return None def get_applyexternaltags_task(node_id, tags): @@ -47,12 +51,11 @@ def get_remotechannelimport_task(channel_id, channel_name=None): if not channel_name: # Try to get the channel name from an existing channel database, # but this will fail on first import. - try: - channel_metadata = get_channel_metadata(channel_id) - except ChannelMetadata.DoesNotExist: - channel_name = "unknown" - else: + channel_metadata = get_channel_metadata(channel_id) + if channel_metadata: channel_name = channel_metadata.name + else: + channel_name = "unknown" return { "task": TaskType.REMOTECHANNELIMPORT, "params": { @@ -70,6 +73,8 @@ def get_remotecontentimport_task( ): if not channel_name: channel_metadata = get_channel_metadata(channel_id) + if not channel_metadata: + raise ValueError(f"Channel {channel_id} does not exist") channel_name = channel_metadata.name return { "task": TaskType.REMOTECONTENTIMPORT, diff --git a/kolibri_explore_plugin/test/test_collectionviews.py b/kolibri_explore_plugin/test/test_collectionviews.py index 23fc2620..5931c658 100644 --- a/kolibri_explore_plugin/test/test_collectionviews.py +++ b/kolibri_explore_plugin/test/test_collectionviews.py @@ -3,9 +3,12 @@ from itertools import product import pytest +import requests_mock +from django.core.management import call_command from django.urls import reverse from kolibri.core.content.models import ChannelMetadata from kolibri.core.content.models import ContentNode +from kolibri.core.content.models import LocalFile from rest_framework.test import APIClient from .utils import COLLECTIONSDIR @@ -188,3 +191,45 @@ def test_download_manager_clean(facility_user, grade, name): assert ChannelMetadata.objects.count() == 2 * num_packs assert ContentNode.objects.filter().count() == 12 * num_packs assert ContentNode.objects.filter(available=True).count() == 8 + + +@pytest.mark.usefixtures("channel_import_db", "worker", "content_server") +@pytest.mark.django_db +@pytest.mark.parametrize( + ("grade", "name"), + product( + collectionviews.COLLECTION_GRADES, collectionviews.COLLECTION_NAMES + ), +) +def test_download_manager_preload(facility_user, grade, name): + """Test collections downloads with preloaded content""" + # Import the channels in full. + manifest = collectionviews._content_manifests_by_grade_name[grade][name] + all_channels = set( + list(manifest.get_channel_ids()) + + list(manifest.get_extra_channel_ids()) + ) + for channel_id in all_channels: + call_command("importchannel", "network", channel_id) + call_command("importcontent", "--fail-on-error", "network", channel_id) + + # Keep track of the number of channels and files downloaded. Make + # sure all channels and files have been downloaded. + num_initial_channels = ChannelMetadata.objects.count() + num_initial_files = LocalFile.objects.filter(available=True).count() + assert num_initial_channels == len(all_channels) + assert LocalFile.objects.filter(available=False).count() == 0 + + # Run the downloader with requests blocked. Since no URLs are mocked, all + # requests will fail. Since the download manager retries tasks forever, it + # will eventually time out on any request. + with requests_mock.Mocker(): + manager = collectionviews.CollectionDownloadManager() + run_download_manager(manager, manifest, facility_user) + wait_for_background_tasks() + + # Check that no additional channels or files have been downloaded. + assert ChannelMetadata.objects.count() == num_initial_channels + assert ( + LocalFile.objects.filter(available=True).count() == num_initial_files + ) diff --git a/kolibri_explore_plugin/test/test_jobs.py b/kolibri_explore_plugin/test/test_jobs.py index c84426aa..3a2d7c1c 100644 --- a/kolibri_explore_plugin/test/test_jobs.py +++ b/kolibri_explore_plugin/test/test_jobs.py @@ -1,6 +1,5 @@ import pytest from django.core.management import call_command -from kolibri.core.content.models import ChannelMetadata from kolibri.core.tasks.job import State from .utils import wait_for_background_tasks @@ -70,7 +69,7 @@ def test_get_remotecontentimport_task(): ] # If the channel doesn't exist an exception will be raised. - with pytest.raises(ChannelMetadata.DoesNotExist): + with pytest.raises(ValueError, match=r"does not exist"): jobs.get_remotecontentimport_task(channel_id) # Import the channel and try again with no nodes specified.