Merge size checks onto common isolation provider

An overdue dead code removal was finally done, combined with the merging of part of the containers and Qubes isolation provider regarding size (and width/height) checks.
freedomofpress · Nov 1, 2023 · 5174482 · 5174482
1 parent ba53c06
commit 5174482
Show file tree

Hide file tree

Showing 7 changed files with 64 additions and 93 deletions.
diff --git a/dangerzone/conversion/errors.py b/dangerzone/conversion/errors.py
@@ -118,6 +118,11 @@ class InterruptedConversion(ConversionException):
     )
 
 
+class PixelFilesMismatch(ConversionException):
+    error_code = ERROR_SHIFT + 70
+    error_message = "The number of pages received is different than expected"
+
+
 class UnexpectedConversionError(PDFtoPPMException):
     error_code = ERROR_SHIFT + 100
     error_message = "Some unexpected error occurred while converting the document"

diff --git a/dangerzone/isolation_provider/base.py b/dangerzone/isolation_provider/base.py
@@ -110,83 +110,20 @@ def convert_pixels_to_png(
         """
         Reconstruct PPM files and save as PNG to save space
         """
+        if not (1 <= width <= errors.MAX_PAGE_WIDTH):
+            raise errors.MaxPageWidthException()
+        if not (1 <= height <= errors.MAX_PAGE_HEIGHT):
+            raise errors.MaxPageHeightException()
+
         ppm_header = f"P6\n{width} {height}\n255\n".encode()
         ppm_data = io.BytesIO(ppm_header + rgb_data)
         png_path = Path(tempdir) / f"page-{page}.png"
 
+        # Verify the exact data was received
+        if len(rgb_data) != width * height * 3:
+            raise errors.InterruptedConversion()
+
         try:
             Image.open(ppm_data).save(png_path, "PNG")
         except UnidentifiedImageError as e:
             raise errors.PPMtoPNGError() from e
-
-
-# From global_common:
-
-# def validate_convert_to_pixel_output(self, common, output):
-#     """
-#     Take the output from the convert to pixels tasks and validate it. Returns
-#     a tuple like: (success (boolean), error_message (str))
-#     """
-#     max_image_width = 10000
-#     max_image_height = 10000
-
-#     # Did we hit an error?
-#     for line in output.split("\n"):
-#         if (
-#             "failed:" in line
-#             or "The document format is not supported" in line
-#             or "Error" in line
-#         ):
-#             return False, output
-
-#     # How many pages was that?
-#     num_pages = None
-#     for line in output.split("\n"):
-#         if line.startswith("Document has "):
-#             num_pages = line.split(" ")[2]
-#             break
-#     if not num_pages or not num_pages.isdigit() or int(num_pages) <= 0:
-#         return False, "Invalid number of pages returned"
-#     num_pages = int(num_pages)
-
-#     # Make sure we have the files we expect
-#     expected_filenames = []
-#     for i in range(1, num_pages + 1):
-#         expected_filenames += [
-#             f"page-{i}.rgb",
-#             f"page-{i}.width",
-#             f"page-{i}.height",
-#         ]
-#     expected_filenames.sort()
-#     actual_filenames = os.listdir(common.pixel_dir.name)
-#     actual_filenames.sort()
-
-#     if expected_filenames != actual_filenames:
-#         return (
-#             False,
-#             f"We expected these files:\n{expected_filenames}\n\nBut we got these files:\n{actual_filenames}",
-#         )
-
-#     # Make sure the files are the correct sizes
-#     for i in range(1, num_pages + 1):
-#         with open(f"{common.pixel_dir.name}/page-{i}.width") as f:
-#             w_str = f.read().strip()
-#         with open(f"{common.pixel_dir.name}/page-{i}.height") as f:
-#             h_str = f.read().strip()
-#         w = int(w_str)
-#         h = int(h_str)
-#         if (
-#             not w_str.isdigit()
-#             or not h_str.isdigit()
-#             or w <= 0
-#             or w > max_image_width
-#             or h <= 0
-#             or h > max_image_height
-#         ):
-#             return False, f"Page {i} has invalid geometry"
-
-#         # Make sure the RGB file is the correct size
-#         if os.path.getsize(f"{common.pixel_dir.name}/page-{i}.rgb") != w * h * 3:
-#             return False, f"Page {i} has an invalid RGB file size"
-
-#     return True, True
diff --git a/dangerzone/isolation_provider/container.py b/dangerzone/isolation_provider/container.py
@@ -306,6 +306,8 @@ def _convert_with_tmpdirs(
             )
 
         num_pages = len(glob.glob(f"{pixel_dir}/page-*.rgb"))
+        self.verify_received_pixel_files(pixel_dir, num_pages)
+
         for page in range(1, num_pages + 1):
             filename_base = f"{pixel_dir}/page-{page}"
             rgb_filename = f"{filename_base}.rgb"
@@ -379,6 +381,25 @@ def _convert_with_tmpdirs(
 
         return success
 
+    def verify_received_pixel_files(
+        self, pixel_dir: pathlib.Path, num_pages: int
+    ) -> None:
+        """Make sure we have the files we expect"""
+
+        expected_filenames = ["captured_output.txt"]
+        for i in range(1, num_pages + 1):
+            expected_filenames += [
+                f"page-{i}.rgb",
+                f"page-{i}.width",
+                f"page-{i}.height",
+            ]
+        expected_filenames.sort()
+        actual_filenames = os.listdir(pixel_dir)
+        actual_filenames.sort()
+
+        if expected_filenames != actual_filenames:
+            raise errors.PixelFilesMismatch()
+
     def get_max_parallel_conversions(self) -> int:
         # FIXME hardcoded 1 until timeouts are more limited and better handled
         # https://github.com/freedomofpress/dangerzone/issues/257

diff --git a/dangerzone/isolation_provider/qubes.py b/dangerzone/isolation_provider/qubes.py
@@ -109,10 +109,6 @@ def __convert(
 
                 width = read_int(self.proc.stdout, timeout=sw.remaining)
                 height = read_int(self.proc.stdout, timeout=sw.remaining)
-                if not (1 <= width <= errors.MAX_PAGE_WIDTH):
-                    raise errors.MaxPageWidthException()
-                if not (1 <= height <= errors.MAX_PAGE_HEIGHT):
-                    raise errors.MaxPageHeightException()
 
                 num_pixels = width * height * 3  # three color channels
                 untrusted_pixels = read_bytes(

diff --git a/tests/isolation_provider/base.py b/tests/isolation_provider/base.py
@@ -9,7 +9,13 @@
 from dangerzone.isolation_provider import base
 from dangerzone.isolation_provider.qubes import running_on_qubes
 
-from .. import pdf_11k_pages, sanitized_text, uncommon_text
+from .. import (
+    pdf_11k_pages,
+    sample_bad_height,
+    sample_bad_width,
+    sanitized_text,
+    uncommon_text,
+)
 
 
 @pytest.mark.skipif(
@@ -68,3 +74,18 @@ def test_max_pages_received(
         with pytest.raises(errors.MaxPagesException):
             success = provider._convert(doc, ocr_lang=None)
             assert not success
+
+    def test_max_dimensions(
+        self,
+        sample_bad_width: str,
+        sample_bad_height: str,
+        provider: base.IsolationProvider,
+        mocker: MockerFixture,
+    ) -> None:
+        provider.progress_callback = mocker.MagicMock()
+        with pytest.raises(errors.MaxPageWidthException):
+            success = provider._convert(Document(sample_bad_width), ocr_lang=None)
+            assert not success
+        with pytest.raises(errors.MaxPageHeightException):
+            success = provider._convert(Document(sample_bad_height), ocr_lang=None)
+            assert not success
diff --git a/tests/isolation_provider/test_container.py b/tests/isolation_provider/test_container.py
@@ -9,7 +9,13 @@
 from dangerzone.isolation_provider.container import Container
 
 # XXX Fixtures used in abstract Test class need to be imported regardless
-from .. import pdf_11k_pages, sanitized_text, uncommon_text
+from .. import (
+    pdf_11k_pages,
+    sample_bad_height,
+    sample_bad_width,
+    sanitized_text,
+    uncommon_text,
+)
 from .base import IsolationProviderTest
 
 

diff --git a/tests/isolation_provider/test_qubes.py b/tests/isolation_provider/test_qubes.py
@@ -45,21 +45,6 @@ def test_max_pages_client_side_enforcement(
             success = provider._convert(doc, ocr_lang=None)
             assert not success
 
-    def test_max_dimensions(
-        self,
-        sample_bad_width: str,
-        sample_bad_height: str,
-        provider: Qubes,
-        mocker: MockerFixture,
-    ) -> None:
-        provider.progress_callback = mocker.MagicMock()
-        with pytest.raises(errors.MaxPageWidthException):
-            success = provider._convert(Document(sample_bad_width), ocr_lang=None)
-            assert not success
-        with pytest.raises(errors.MaxPageHeightException):
-            success = provider._convert(Document(sample_bad_height), ocr_lang=None)
-            assert not success
-
     def test_out_of_ram(
         self,
         provider: Qubes,