feat: introduce landlock based sandboxing

Co-authored-by: Quentin Kaiser <[email protected]>
onekey-sec · Oct 10, 2024 · 681a54b · 681a54b
1 parent f9344c8
commit 681a54b
Show file tree

Hide file tree

Showing 7 changed files with 205 additions and 4 deletions.
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -26,7 +26,7 @@ lz4 = "^4.3.2"
 lief = "^0.15.1"
 cryptography = ">=41.0,<44.0"
 treelib = "^1.7.0"
-unblob-native = "^0.1.1"
+unblob-native = "^0.1.2"
 jefferson = "^0.4.5"
 rich = "^13.3.5"
 pyfatfs = "^1.0.5"

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -1,3 +1,4 @@
+import sys
 from pathlib import Path
 from typing import Iterable, List, Optional, Tuple, Type
 from unittest import mock
@@ -425,3 +426,27 @@ def test_clear_skip_magics(
     assert sorted(process_file_mock.call_args.args[0].skip_magic) == sorted(
         skip_magic
     ), fail_message
+
+
+@pytest.mark.skipif(sys.platform != "linux", reason="Sandboxing only works on Linux")
+def test_sandbox_escape(tmp_path: Path):
+    runner = CliRunner()
+
+    in_path = tmp_path / "input"
+    in_path.touch()
+    extract_dir = tmp_path / "extract-dir"
+    params = ["--extract-dir", str(extract_dir), str(in_path)]
+
+    unrelated_file = tmp_path / "unrelated"
+
+    process_file_mock = mock.MagicMock(
+        side_effect=lambda *_args, **_kwargs: unrelated_file.write_text(
+            "sandbox escape"
+        )
+    )
+    with mock.patch.object(unblob.cli, "process_file", process_file_mock):
+        result = runner.invoke(unblob.cli.cli, params)
+
+    assert result.exit_code != 0
+    assert isinstance(result.exception, PermissionError)
+    process_file_mock.assert_called_once()
diff --git a/tests/test_sandbox.py b/tests/test_sandbox.py
@@ -0,0 +1,57 @@
+import sys
+from pathlib import Path
+
+import pytest
+
+from unblob.processing import ExtractionConfig
+from unblob.sandbox import Sandbox
+
+pytestmark = pytest.mark.skipif(
+    sys.platform != "linux", reason="Sandboxing only works on Linux"
+)
+
+
+@pytest.fixture
+def log_path(tmp_path):
+    return tmp_path / "unblob.log"
+
+
+@pytest.fixture
+def extraction_config(extraction_config, tmp_path):
+    extraction_config.extract_root = tmp_path / "extract" / "root"
+    # parent has to exist
+    extraction_config.extract_root.parent.mkdir()
+    return extraction_config
+
+
+@pytest.fixture
+def sandbox(extraction_config: ExtractionConfig, log_path: Path):
+    return Sandbox(extraction_config, log_path, None)
+
+
+def test_necessary_resources_can_be_created_in_sandbox(
+    sandbox: Sandbox, extraction_config: ExtractionConfig, log_path: Path
+):
+    directory_in_extract_root = extraction_config.extract_root / "path" / "to" / "dir"
+    file_in_extract_root = directory_in_extract_root / "file"
+
+    sandbox.run(extraction_config.extract_root.mkdir, parents=True)
+    sandbox.run(directory_in_extract_root.mkdir, parents=True)
+
+    sandbox.run(file_in_extract_root.touch)
+    sandbox.run(file_in_extract_root.write_text, "file content")
+
+    # log-file is already opened
+    log_path.touch()
+    sandbox.run(log_path.write_text, "log line")
+
+
+def test_access_outside_sandbox_is_not_possible(sandbox: Sandbox, tmp_path: Path):
+    unrelated_dir = tmp_path / "unrelated" / "path"
+    unrelated_file = tmp_path / "unrelated-file"
+
+    with pytest.raises(PermissionError):
+        sandbox.run(unrelated_dir.mkdir, parents=True)
+
+    with pytest.raises(PermissionError):
+        sandbox.run(unrelated_file.touch)
diff --git a/unblob/cli.py b/unblob/cli.py
@@ -33,6 +33,7 @@
     ExtractionConfig,
     process_file,
 )
+from .sandbox import Sandbox
 from .ui import NullProgressReporter, RichConsoleProgressReporter
 
 logger = get_logger()
@@ -301,7 +302,8 @@ def cli(
     )
 
     logger.info("Start processing file", file=file)
-    process_results = process_file(config, file, report_file)
+    sandbox = Sandbox(config, log_path, report_file)
+    process_results = sandbox.run(process_file, config, file, report_file)
     if verbose == 0:
         if skip_extraction:
             print_scan_report(process_results)

diff --git a/unblob/processing.py b/unblob/processing.py
@@ -110,7 +110,6 @@ def get_extract_dir_for(self, path: Path) -> Path:
         return extract_dir.expanduser().resolve()
 
 
-@terminate_gracefully
 def process_file(
     config: ExtractionConfig, input_path: Path, report_file: Optional[Path] = None
 ) -> ProcessResult:

diff --git a/unblob/sandbox.py b/unblob/sandbox.py
@@ -0,0 +1,118 @@
+import ctypes
+import sys
+import threading
+from pathlib import Path
+from typing import Callable, Iterable, Optional, Type, TypeVar
+
+from structlog import get_logger
+from unblob_native.sandbox import (
+    AccessFS,
+    SandboxError,
+    restrict_access,
+)
+
+if sys.version_info >= (3, 10):
+    from typing import ParamSpec
+else:
+    from typing_extensions import ParamSpec
+
+from unblob.processing import ExtractionConfig
+
+logger = get_logger()
+
+P = ParamSpec("P")
+R = TypeVar("R")
+
+
+class Sandbox:
+    """Configures restricted file-systems to run functions in.
+
+    When calling ``run()``, a separate thread will be configured with
+    minimum required file-system permissions. All subprocesses spawned
+    from that thread will honor the restrictions.
+    """
+
+    def __init__(
+        self,
+        config: ExtractionConfig,
+        log_path: Path,
+        report_file: Optional[Path],
+        extra_restrictions: Iterable[AccessFS] = (),
+    ):
+        self.restrictions = [
+            # Python, shared libraries, extractor binaries and so on
+            AccessFS.read("/"),
+            # Multiprocessing
+            AccessFS.read_write("/dev/shm"),  # noqa: S108
+            # Extracted contents
+            AccessFS.read_write(config.extract_root),
+            AccessFS.make_dir(config.extract_root.parent),
+            AccessFS.read_write(log_path),
+            *extra_restrictions,
+        ]
+
+        if report_file:
+            self.restrictions += [
+                AccessFS.read_write(report_file),
+                AccessFS.make_reg(report_file.parent),
+            ]
+
+    def run(self, callback: Callable[P, R], *args: P.args, **kwargs: P.kwargs) -> R:
+        """Run callback with restricted filesystem access."""
+        exception = None
+        result = None
+
+        def _run_in_thread(callback, *args, **kwargs):
+            nonlocal exception, result
+
+            self._try_enter_sandbox()
+            try:
+                result = callback(*args, **kwargs)
+            except BaseException as e:
+                exception = e
+
+        thread = threading.Thread(
+            target=_run_in_thread, args=(callback, *args), kwargs=kwargs
+        )
+        thread.start()
+
+        try:
+            thread.join()
+        except KeyboardInterrupt:
+            raise_in_thread(thread, KeyboardInterrupt)
+            thread.join()
+
+        if exception:
+            raise exception  # pyright: ignore[reportGeneralTypeIssues]
+        return result  # pyright: ignore[reportReturnType]
+
+    def _try_enter_sandbox(self):
+        try:
+            restrict_access(*self.restrictions)
+        except SandboxError:
+            logger.warning(
+                "Sandboxing FS access is unavailable on this system, skipping."
+            )
+
+
+def raise_in_thread(thread: threading.Thread, exctype: Type) -> None:
+    if thread.ident is None:
+        raise RuntimeError("Thread is not started")
+
+    res = ctypes.pythonapi.PyThreadState_SetAsyncExc(
+        ctypes.c_ulong(thread.ident), ctypes.py_object(exctype)
+    )
+
+    # success
+    if res == 1:
+        return
+
+    # Need to revert the call to restore interpreter state
+    ctypes.pythonapi.PyThreadState_SetAsyncExc(ctypes.c_ulong(thread.ident), None)
+
+    # Thread could have exited since
+    if res == 0:
+        return
+
+    # Something bad have happened
+    raise RuntimeError("Could not raise exception in thread", thread.ident)