From a8d7024d8a5cb68975a0dc4afee9097421c8bb99 Mon Sep 17 00:00:00 2001 From: Dan Nicholson Date: Mon, 13 Nov 2023 17:01:59 -0700 Subject: [PATCH 1/7] Ignore generated _version.py in flake8 configuration This is already ignored by pre-commit because it's not under version control, but if you run bare `flake8` it will check it. --- .flake8 | 1 + 1 file changed, 1 insertion(+) diff --git a/.flake8 b/.flake8 index ead275853..3d62ff6c3 100644 --- a/.flake8 +++ b/.flake8 @@ -1,5 +1,6 @@ [flake8] extend-exclude = + kolibri_explore_plugin/_version.py, build/, static/, dist/, From a98c58717a88425a738b8a9685dff4869dcf7ab2 Mon Sep 17 00:00:00 2001 From: Dan Nicholson Date: Tue, 14 Nov 2023 09:48:32 -0700 Subject: [PATCH 2/7] tests: Lower content directory logging to DEBUG These were helpful when I was developing that code, but now they just add a lot of noise to the test output. --- kolibri_explore_plugin/test/utils.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kolibri_explore_plugin/test/utils.py b/kolibri_explore_plugin/test/utils.py index db2500506..dfdbf904c 100644 --- a/kolibri_explore_plugin/test/utils.py +++ b/kolibri_explore_plugin/test/utils.py @@ -54,7 +54,7 @@ def create_contentdir(content_path, channels_path=CHANNELSDIR): storage_path.mkdir(parents=True, exist_ok=True) for json_path in iglob(f"{channels_path}/*.json"): - logger.info(f"Loading channel JSON {json_path}") + logger.debug(f"Loading channel JSON {json_path}") with open(json_path, "r") as f: data = json.load(f) @@ -67,16 +67,16 @@ def create_contentdir(content_path, channels_path=CHANNELSDIR): channel_id = channels[0]["id"] db_path = databases_path / f"{channel_id}.sqlite3" if db_path.exists(): - logger.info(f"Removing existing channel database {db_path}") + logger.debug(f"Removing existing channel database {db_path}") db_path.unlink() - logger.info(f"Creating channel database {db_path}") + logger.debug(f"Creating channel database {db_path}") bridge = Bridge(db_path, schema_version=CURRENT_SCHEMA_VERSION) bridge.Base.metadata.bind = bridge.engine bridge.Base.metadata.create_all() # Create the content files from the localfile _content entries. - logger.info(f"Creating channel {channel_id} content files") + logger.debug(f"Creating channel {channel_id} content files") for localfile in data["content_localfile"]: id = localfile["id"] size = localfile["file_size"] @@ -101,7 +101,7 @@ def create_contentdir(content_path, channels_path=CHANNELSDIR): localfile_dir.mkdir(parents=True, exist_ok=True) localfile_path = localfile_dir / f"{id}.{ext}" if localfile_path.exists(): - logger.info(f"Validating content file {localfile_path}") + logger.debug(f"Validating content file {localfile_path}") localfile_size = os.path.getsize(localfile_path) if localfile_size != size: raise ValueError( From c942c45b94a146a97ad868d757764dd60c0afde6 Mon Sep 17 00:00:00 2001 From: Dan Nicholson Date: Tue, 14 Nov 2023 09:31:56 -0700 Subject: [PATCH 3/7] tests: Also copy channel JSON files into content directory In order for `ContentServer` to support the `/api/public/v1/channels/lookup/` endpoint, it needs to be able to introspect the channel database. Ideally this would open the actual sqlite database and use Kolibri's routines, but that would be complicated. Instead, copy the input JSON files into the content directory so the server can get the channel data from them without using the databases. --- kolibri_explore_plugin/test/utils.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kolibri_explore_plugin/test/utils.py b/kolibri_explore_plugin/test/utils.py index dfdbf904c..c33fd76eb 100644 --- a/kolibri_explore_plugin/test/utils.py +++ b/kolibri_explore_plugin/test/utils.py @@ -8,6 +8,7 @@ import multiprocessing import os import queue +import shutil import threading import time from base64 import b64decode @@ -65,6 +66,13 @@ def create_contentdir(content_path, channels_path=CHANNELSDIR): ) channel_id = channels[0]["id"] + + # For convenience, copy the input JSON file so the data can be + # introspected without loading a sqlite database. + db_json_path = databases_path / f"{channel_id}.json" + logger.debug(f"Creating channel JSON data {db_json_path}") + shutil.copyfile(json_path, db_json_path) + db_path = databases_path / f"{channel_id}.sqlite3" if db_path.exists(): logger.debug(f"Removing existing channel database {db_path}") From 0fa2acfd220e3ec6ac517a78cf5bc6ed54ce3e79 Mon Sep 17 00:00:00 2001 From: Dan Nicholson Date: Mon, 13 Nov 2023 17:05:28 -0700 Subject: [PATCH 4/7] tests: Run ContentServer in same process When the `ContentServer` is run in a separate process with `multiprocessing`, none of the log messages are recorded. Instead, use a separate thread in the current process. All that needs to happen: * `serve_forever()` is run in a separate thread * `shutdown()` is called from the main thread * `server_close()` is called to close the socket Now that the log messages are visible, it's clear that the thread name in the handler log message is redundant since our pytest default format includes the thread name. --- kolibri_explore_plugin/test/utils.py | 61 +++++++++++++--------------- 1 file changed, 28 insertions(+), 33 deletions(-) diff --git a/kolibri_explore_plugin/test/utils.py b/kolibri_explore_plugin/test/utils.py index c33fd76eb..c7fb651b4 100644 --- a/kolibri_explore_plugin/test/utils.py +++ b/kolibri_explore_plugin/test/utils.py @@ -5,9 +5,7 @@ import functools import json import logging -import multiprocessing import os -import queue import shutil import threading import time @@ -178,8 +176,7 @@ class LoggingHTTPRequestHandler(SimpleHTTPRequestHandler): def log_message(self, format, *args): logger.debug( - "%s: %s - - [%s] %s", - threading.current_thread().name, + "%s - - [%s] %s", self.address_string(), self.log_date_time_string(), format % args, @@ -191,7 +188,8 @@ class ContentServer: def __init__(self, path): self.path = Path(path) - self.proc = None + self.server = None + self.thread = None self.address = None self.url = None @@ -205,43 +203,40 @@ def __enter__(self): def __exit__(self, exc_type, exc_value, traceback): self.stop() - def _run_server(self, path, queue): - handler_class = functools.partial( - LoggingHTTPRequestHandler, - directory=path, - ) - server = ThreadingHTTPServer(("127.0.0.1", 0), handler_class) - queue.put(server.server_address) - server.serve_forever() + def _run_server(self): + self.server.serve_forever() def start(self): """Start the HTTP server - A separate process is used so that the HTTP server can block. + A separate thread is used so that the HTTP server can block. """ - addr_queue = multiprocessing.Queue() - self.proc = multiprocessing.Process( - target=self._run_server, args=(self.path, addr_queue) + handler_class = functools.partial( + LoggingHTTPRequestHandler, + directory=self.path, ) - self.proc.start() - if not self.proc.is_alive(): - raise ExploreTestError(f"HTTP process {self.proc.pid} exited") - try: - self.address = addr_queue.get(True, 5) - except queue.Empty: - raise ExploreTestError( - "HTTP process did not write address to queue" - ) from None - + self.server = ThreadingHTTPServer(("127.0.0.1", 0), handler_class) + self.address = self.server.server_address self.url = f"http://{self.address[0]}:{self.address[1]}" + + self.thread = threading.Thread(target=self._run_server, daemon=True) + self.thread.start() + if not self.thread.is_alive(): + raise ExploreTestError(f"HTTP thread {self.thread.name} exited") logger.debug( - f"Serving {self.path} on {self.url} from process {self.proc.pid}" + f"Serving {self.path} on {self.url} from thread {self.thread.name}" ) def stop(self): """Stop the HTTP server""" - if self.proc is not None: - if self.proc.is_alive(): - logger.debug(f"Stopping HTTP server process {self.proc.pid}") - self.proc.terminate() - self.proc = None + if self.server is not None: + if self.thread is not None: + if self.thread.is_alive(): + logger.debug( + f"Stopping HTTP server thread {self.thread.name}" + ) + self.server.shutdown() + self.thread = None + + self.server.server_close() + self.server = None From 56d1191e2e26469bc8750070cc1d5032c4b7dab4 Mon Sep 17 00:00:00 2001 From: Dan Nicholson Date: Tue, 14 Nov 2023 10:42:11 -0700 Subject: [PATCH 5/7] Download: Skip unneeded content import tasks Unfortunately, even if Kolibri already has all the content nodes, it will still probe the remote server for channel metadata. Since that fails when the device is offline, skip creating the tasks if it appears all the content nodes are available. This uses the same `get_import_export_data` helper that Kolibri uses when determining nodes to download. That's an expensive query, but it appears that's the only way to reliably determine if a download is needed or not. Fixes: #890 --- kolibri_explore_plugin/collectionviews.py | 45 ++++++++++++++++--- .../test/test_collectionviews.py | 13 ++++++ 2 files changed, 53 insertions(+), 5 deletions(-) diff --git a/kolibri_explore_plugin/collectionviews.py b/kolibri_explore_plugin/collectionviews.py index dc9369243..c8ac6e4ad 100644 --- a/kolibri_explore_plugin/collectionviews.py +++ b/kolibri_explore_plugin/collectionviews.py @@ -12,6 +12,9 @@ from kolibri.core.content.utils.content_manifest import ( ContentManifestParseError, ) +from kolibri.core.content.utils.import_export_content import ( + get_import_export_data, +) from kolibri.core.tasks.job import State as JobState from kolibri.core.tasks.main import job_storage from kolibri.utils import conf @@ -189,6 +192,20 @@ def get_contentimport_tasks(self): node_ids = list( self._get_node_ids_for_channel(channel_metadata, channel_id) ) + + # Check if the desired nodes are already available. + num_resources, _, _ = get_import_export_data( + channel_id, + node_ids=node_ids, + available=False, + ) + if num_resources == 0: + logger.debug( + f"Skipping content import task for {channel_id} " + "since all resources already present" + ) + continue + tasks.append( get_remotecontentimport_task( channel_id, channel_metadata.name, node_ids @@ -219,12 +236,30 @@ def get_contentthumbnail_tasks(self): For all the channels in this content manifest. """ - return [ - get_remotecontentimport_task( - channel_id, node_ids=[], all_thumbnails=True + tasks = [] + + for channel_id in self.get_channel_ids(): + # Check if the desired thumbnail nodes are already available. + num_resources, _, _ = get_import_export_data( + channel_id, + node_ids=[], + available=False, + all_thumbnails=True, ) - for channel_id in self.get_channel_ids() - ] + if num_resources == 0: + logger.debug( + f"Skipping content thumbnail task for {channel_id} " + "since all resources already present" + ) + continue + + tasks.append( + get_remotecontentimport_task( + channel_id, node_ids=[], all_thumbnails=True + ) + ) + + return tasks def _get_node_ids_for_channel(self, channel_metadata, channel_id): """Get node IDs regardless of the version diff --git a/kolibri_explore_plugin/test/test_collectionviews.py b/kolibri_explore_plugin/test/test_collectionviews.py index 5931c6581..cac2ebb77 100644 --- a/kolibri_explore_plugin/test/test_collectionviews.py +++ b/kolibri_explore_plugin/test/test_collectionviews.py @@ -9,12 +9,14 @@ from kolibri.core.content.models import ChannelMetadata from kolibri.core.content.models import ContentNode from kolibri.core.content.models import LocalFile +from kolibri.core.tasks import main as tasks_main from rest_framework.test import APIClient from .utils import COLLECTIONSDIR from .utils import ExploreTestTimeoutError from .utils import wait_for_background_tasks from kolibri_explore_plugin import collectionviews +from kolibri_explore_plugin.jobs import TaskType @pytest.mark.django_db @@ -220,6 +222,11 @@ def test_download_manager_preload(facility_user, grade, name): assert num_initial_channels == len(all_channels) assert LocalFile.objects.filter(available=False).count() == 0 + # Clear all the jobs to check if any downloading jobs were created + # later. + job_storage = tasks_main.job_storage + job_storage.clear(force=True) + # Run the downloader with requests blocked. Since no URLs are mocked, all # requests will fail. Since the download manager retries tasks forever, it # will eventually time out on any request. @@ -233,3 +240,9 @@ def test_download_manager_preload(facility_user, grade, name): assert ( LocalFile.objects.filter(available=True).count() == num_initial_files ) + + # Check that no channel or content import jobs were created. + channel_jobs = job_storage.filter_jobs(func=TaskType.REMOTECHANNELIMPORT) + assert channel_jobs == [] + content_jobs = job_storage.filter_jobs(func=TaskType.REMOTECONTENTIMPORT) + assert content_jobs == [] From d1134949813c6562effbcc0bade44fbe70a79772 Mon Sep 17 00:00:00 2001 From: Dan Nicholson Date: Tue, 14 Nov 2023 11:36:35 -0700 Subject: [PATCH 6/7] Download: Import extra channels and thumbnails with single task In order to import an extra channel and its thumbnails, the storage hook would detect a completed channel import task and then create a thumbnail import task for that channel dynamically. However, now that channel import tasks are skipped if the channel is already available, no completed channel import task would arrive to trigger the thumbnail task creation. That's an unlikely scenario since the installation is expected to come with either no content or fully populated channels, but it can be handled better. Instead of using 2 tasks, use a single `remoteimport` task that combines `remotechannelimport` and `remotecontentimport`. The hook action is kept in case there are existing installations that hadn't completed the background tasks yet, but hopefully it can be removed someday. --- kolibri_explore_plugin/collectionviews.py | 32 +++++-- kolibri_explore_plugin/jobs.py | 42 ++++++++++ .../test/test_collectionviews.py | 2 + kolibri_explore_plugin/test/test_jobs.py | 83 +++++++++++++++++++ 4 files changed, 153 insertions(+), 6 deletions(-) diff --git a/kolibri_explore_plugin/collectionviews.py b/kolibri_explore_plugin/collectionviews.py index c8ac6e4ad..7863df051 100644 --- a/kolibri_explore_plugin/collectionviews.py +++ b/kolibri_explore_plugin/collectionviews.py @@ -29,6 +29,7 @@ from .jobs import get_channel_metadata from .jobs import get_remotechannelimport_task from .jobs import get_remotecontentimport_task +from .jobs import get_remoteimport_task from .models import BackgroundTask logger = logging.getLogger(__name__) @@ -166,18 +167,37 @@ def get_channelimport_tasks(self): def get_extra_channelimport_tasks(self): """Return a serializable object to create extra channelimport tasks - For all channels featured in Endless Key content manifests. + For all channels featured in Endless Key content manifests. In addition + to the channel metadata, all thumbnails are downloaded. """ tasks = [] for channel_id, channel_version in self.get_latest_extra_channels(): + # Check if the channel metadata and thumbnails are already + # available. metadata = get_channel_metadata(channel_id) if metadata and metadata.version >= channel_version: - logger.debug( - f"Skipping extra channel import task for {channel_id} " - "since already present" + # The channel metadata is available. Now check if the thumbnail + # nodes are already available. + num_resources, _, _ = get_import_export_data( + channel_id, + node_ids=[], + available=False, + all_thumbnails=True, ) - continue - tasks.append(get_remotechannelimport_task(channel_id)) + if num_resources == 0: + logger.debug( + f"Skipping extra channel import task for {channel_id} " + "since channel metadata and all resources already " + "present" + ) + continue + + tasks.append( + get_remoteimport_task( + channel_id, node_ids=[], all_thumbnails=True + ) + ) + return tasks def get_contentimport_tasks(self): diff --git a/kolibri_explore_plugin/jobs.py b/kolibri_explore_plugin/jobs.py index 71d27077e..955f115b9 100644 --- a/kolibri_explore_plugin/jobs.py +++ b/kolibri_explore_plugin/jobs.py @@ -27,6 +27,7 @@ class TaskType: APPLYEXTERNALTAGS = "kolibri_explore_plugin.tasks.applyexternaltags" REMOTECHANNELIMPORT = "kolibri.core.content.tasks.remotechannelimport" REMOTECONTENTIMPORT = "kolibri_explore_plugin.tasks.remotecontentimport" + REMOTEIMPORT = "kolibri.core.content.tasks.remoteimport" def get_channel_metadata(channel_id): @@ -89,6 +90,33 @@ def get_remotecontentimport_task( } +def get_remoteimport_task( + channel_id, + channel_name=None, + node_ids=None, + all_thumbnails=False, +): + if not channel_name: + # Try to get the channel name from an existing channel database, + # but this will fail on first import. + channel_metadata = get_channel_metadata(channel_id) + if channel_metadata: + channel_name = channel_metadata.name + else: + channel_name = "unknown" + return { + "task": TaskType.REMOTEIMPORT, + "params": { + "channel_id": channel_id, + "channel_name": channel_name, + "node_ids": node_ids, + "exclude_node_ids": [], + "all_thumbnails": all_thumbnails, + "fail_on_error": True, + }, + } + + def get_content_task_user(): """Get a Kolibri user for content task usage""" return FacilityUser.objects.filter( @@ -178,9 +206,23 @@ def storage_update_hook(job, orm_job, state=None, **kwargs): elif state == State.COMPLETED: # If the completed task is a channel import, create the # associated thumbnail download task to be run later. + # + # FIXME: Previously the extra channels and their thumbnails were + # imported using 2 tasks. In order to keep the thumbnail task from + # being created before the channel was imported, the thumbnail task was + # created on the fly here. Now this is done with a single combined + # remoteimport task and this is no longer needed. However, it's kept + # for now in case there are existing installations that had started the + # background tasks but not completed them. Drop this at some point. + # + # https://github.com/endlessm/kolibri-explore-plugin/issues/890 if bg_task.func == TaskType.REMOTECHANNELIMPORT: bg_task_params = json.loads(bg_task.params) channel_id = bg_task_params["channel_id"] + logger.warning( + f"Creating thumbnail task for {channel_id} legacy background " + "channel import task" + ) thumbnail_task_data = get_remotecontentimport_task( channel_id, node_ids=[], all_thumbnails=True ) diff --git a/kolibri_explore_plugin/test/test_collectionviews.py b/kolibri_explore_plugin/test/test_collectionviews.py index cac2ebb77..538e84cbb 100644 --- a/kolibri_explore_plugin/test/test_collectionviews.py +++ b/kolibri_explore_plugin/test/test_collectionviews.py @@ -246,3 +246,5 @@ def test_download_manager_preload(facility_user, grade, name): assert channel_jobs == [] content_jobs = job_storage.filter_jobs(func=TaskType.REMOTECONTENTIMPORT) assert content_jobs == [] + import_jobs = job_storage.filter_jobs(func=TaskType.REMOTEIMPORT) + assert import_jobs == [] diff --git a/kolibri_explore_plugin/test/test_jobs.py b/kolibri_explore_plugin/test/test_jobs.py index 3a2d7c1c5..856f9ac08 100644 --- a/kolibri_explore_plugin/test/test_jobs.py +++ b/kolibri_explore_plugin/test/test_jobs.py @@ -132,6 +132,89 @@ def test_get_remotecontentimport_task(): } +@pytest.mark.usefixtures("channel_import_db", "content_server") +@pytest.mark.django_db +def test_get_remoteimport_task(): + channel_id = "b51baf46133045e3bce4d2d872a8f71d" + node_ids = [ + "5a24503255ce43d98ebcb25d2b60f024", + "91a1bfc0ede544979f861909b7862537", + ] + + # No nodes specified. + task = jobs.get_remoteimport_task(channel_id) + assert task == { + "task": jobs.TaskType.REMOTEIMPORT, + "params": { + "channel_id": channel_id, + "channel_name": "unknown", + "node_ids": None, + "exclude_node_ids": [], + "all_thumbnails": False, + "fail_on_error": True, + }, + } + + # Specify the nodes. + task = jobs.get_remoteimport_task(channel_id, node_ids=node_ids) + assert task == { + "task": jobs.TaskType.REMOTEIMPORT, + "params": { + "channel_id": channel_id, + "channel_name": "unknown", + "node_ids": node_ids, + "exclude_node_ids": [], + "all_thumbnails": False, + "fail_on_error": True, + }, + } + + # Override the channel name. + task = jobs.get_remoteimport_task(channel_id, channel_name="foo") + assert task == { + "task": jobs.TaskType.REMOTEIMPORT, + "params": { + "channel_id": channel_id, + "channel_name": "foo", + "node_ids": None, + "exclude_node_ids": [], + "all_thumbnails": False, + "fail_on_error": True, + }, + } + + # Specify an empty node list and all_thumbnails. + task = jobs.get_remoteimport_task( + channel_id, node_ids=[], all_thumbnails=True + ) + assert task == { + "task": jobs.TaskType.REMOTEIMPORT, + "params": { + "channel_id": channel_id, + "channel_name": "unknown", + "node_ids": [], + "exclude_node_ids": [], + "all_thumbnails": True, + "fail_on_error": True, + }, + } + + # After importing the channel, the channel name will be known. + importchannel(channel_id) + task = jobs.get_remoteimport_task(channel_id) + assert task == { + "task": jobs.TaskType.REMOTEIMPORT, + "params": { + "channel_id": channel_id, + "channel_name": "testing", + "node_ids": None, + "exclude_node_ids": [], + "all_thumbnails": False, + "fail_on_error": True, + }, + } + + @pytest.mark.usefixtures( "channel_import_db", "content_server", "facility_user", "worker" ) From ffaa29cc77a1c621e3278c4be4ad22dc00f65fbf Mon Sep 17 00:00:00 2001 From: Dan Nicholson Date: Mon, 13 Nov 2023 17:12:14 -0700 Subject: [PATCH 7/7] tests: Don't stub Kolibri server probing Mocking these interfaces was hiding the fact that Kolibri was still making network requests when they weren't expected. Instead, have `ContentServer` handle them so tests that aren't supposed to make network requests fail. --- kolibri_explore_plugin/test/conftest.py | 15 --- kolibri_explore_plugin/test/utils.py | 145 +++++++++++++++++++++++- 2 files changed, 142 insertions(+), 18 deletions(-) diff --git a/kolibri_explore_plugin/test/conftest.py b/kolibri_explore_plugin/test/conftest.py index 7f49fb152..f93f0d0e5 100644 --- a/kolibri_explore_plugin/test/conftest.py +++ b/kolibri_explore_plugin/test/conftest.py @@ -77,9 +77,6 @@ def contentdir(serverdir): @pytest.fixture def content_server(serverdir, contentdir, monkeypatch): """HTTP content server using test data""" - from kolibri.core.discovery.utils.network.client import NetworkClient - from kolibri.core.content.utils import resource_import - with ContentServer(serverdir) as server: # Override the Kolibri content server URL. monkeypatch.setitem( @@ -88,18 +85,6 @@ def content_server(serverdir, contentdir, monkeypatch): server.url, ) - # Don't introspect the server for info. - monkeypatch.setattr( - NetworkClient, - "build_for_address", - lambda addr: NetworkClient(addr), - ) - monkeypatch.setattr( - resource_import, - "lookup_channel_listing_status", - lambda channel_id, baseurl: None, - ) - yield server diff --git a/kolibri_explore_plugin/test/utils.py b/kolibri_explore_plugin/test/utils.py index c7fb651b4..d7fb0eae8 100644 --- a/kolibri_explore_plugin/test/utils.py +++ b/kolibri_explore_plugin/test/utils.py @@ -6,15 +6,19 @@ import json import logging import os +import re import shutil import threading import time from base64 import b64decode from glob import iglob from hashlib import md5 +from http import HTTPStatus from http.server import SimpleHTTPRequestHandler from http.server import ThreadingHTTPServer +from io import BytesIO from pathlib import Path +from urllib.parse import urlparse from django.db import OperationalError from kolibri.core.tasks.job import State @@ -171,8 +175,143 @@ def wait_for_background_tasks(timeout=30): time.sleep(0.5) -class LoggingHTTPRequestHandler(SimpleHTTPRequestHandler): - """SimpleHTTPRequestHandler with logging""" +class ContentHTTPRequestHandler(SimpleHTTPRequestHandler): + """HTTP request handler for Kolibri content server""" + + # Kolibri tries to access the raw socket in some scenarios, and that's not + # possible with HTTP/1.0 since the socket is closed immediately after the + # response is sent. + protocol_version = "HTTP/1.1" + + # A list of path regex and handler tuples for routing requests. + ROUTES = [ + (re.compile(r"^/api/public/info/$"), "_send_device_info"), + ( + re.compile( + r"^/api/public/v1/channels/lookup/(?P[^/]+)$" + ), + "_send_channel_lookup", + ), + ] + + def send_head(self): + url_parts = urlparse(self.path) + for regex, handler in self.ROUTES: + match = regex.match(url_parts.path) + if not match: + continue + func = getattr(self, handler) + return func(match) + + return super().send_head() + + def _send_device_info(self, match): + """Send server device information + + See kolibri.core.device.utils.get_device_info. + """ + from kolibri import __version__ as kolibri_version + + data = { + "application": "studio", + "kolibri_version": kolibri_version, + "instance_id": "952d412212d549eb9b73a86f426d8a49", + "device_name": "Test Studio", + "operating_system": None, + } + content = json.dumps(data).encode("utf-8") + + self.send_response(HTTPStatus.OK) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(content))) + self.end_headers() + return BytesIO(content) + + def _send_channel_lookup(self, match): + """Send channel information + + See kolibri.core.public.api.get_public_channel_lookup, + kolibri.core.content.serializers.PublicChannelSerializer, + kolibri.core.content.base_models.ChannelMetadata and + kolibri.core.content.models.ChannelMetadata. + """ + channel_id = match.group("channel_id") + channel = self._get_channel_data(channel_id) + + if channel: + metadata = channel["content_channelmetadata"][0] + root_node = next( + node + for node in channel["content_contentnode"] + if node["id"] == metadata["root_id"] + ) + root_lang_id = root_node.get("lang_id") + if root_lang_id: + root_lang = next( + lang + for lang in channel["content_language"] + if lang["id"] == root_lang_id + ) + root_lang_code = root_lang["lang_code"] + else: + root_lang_code = None + + included_languages = [ + lang["id"] for lang in channel.get("content_language", []) + ] + total_resource_count = len( + [ + node + for node in channel["content_contentnode"] + if node["kind"] != "topic" + ] + ) + published_size = sum( + [f["file_size"] for f in channel["content_localfile"]] + ) + + data = [ + { + "id": metadata["id"], + "name": metadata["name"], + "language": root_lang_code, + "included_languages": included_languages, + "description": metadata.get("description", ""), + "tagline": metadata.get("tagline", None), + "total_resource_count": total_resource_count, + "version": metadata["version"], + "published_size": published_size, + "last_published": metadata.get("last_updated"), + "icon_encoding": metadata.get("thumbnail", ""), + "matching_tokens": [], + "public": True, + }, + ] + status = HTTPStatus.OK + else: + data = { + "id": "NOT_FOUND", + "metadata": {"view": ""}, + } + status = HTTPStatus.NOT_FOUND + + content = json.dumps(data).encode("utf-8") + self.send_response(status) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(content))) + self.end_headers() + return BytesIO(content) + + def _get_channel_data(self, channel_id): + json_path = os.path.join( + self.directory, + f"content/databases/{channel_id}.json", + ) + try: + with open(json_path) as f: + return json.load(f) + except FileNotFoundError: + return None def log_message(self, format, *args): logger.debug( @@ -212,7 +351,7 @@ def start(self): A separate thread is used so that the HTTP server can block. """ handler_class = functools.partial( - LoggingHTTPRequestHandler, + ContentHTTPRequestHandler, directory=self.path, ) self.server = ThreadingHTTPServer(("127.0.0.1", 0), handler_class)