Skip to content

Commit

Permalink
Merge size checks onto common isolation provider
Browse files Browse the repository at this point in the history
An overdue dead code removal was finally done, combined with the merging
of part of the containers and Qubes isolation provider regarding size
(and width/height) checks.
  • Loading branch information
deeplow committed Nov 1, 2023
1 parent ba53c06 commit 5174482
Show file tree
Hide file tree
Showing 7 changed files with 64 additions and 93 deletions.
5 changes: 5 additions & 0 deletions dangerzone/conversion/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,11 @@ class InterruptedConversion(ConversionException):
)


class PixelFilesMismatch(ConversionException):
error_code = ERROR_SHIFT + 70
error_message = "The number of pages received is different than expected"


class UnexpectedConversionError(PDFtoPPMException):
error_code = ERROR_SHIFT + 100
error_message = "Some unexpected error occurred while converting the document"
Expand Down
81 changes: 9 additions & 72 deletions dangerzone/isolation_provider/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,83 +110,20 @@ def convert_pixels_to_png(
"""
Reconstruct PPM files and save as PNG to save space
"""
if not (1 <= width <= errors.MAX_PAGE_WIDTH):
raise errors.MaxPageWidthException()
if not (1 <= height <= errors.MAX_PAGE_HEIGHT):
raise errors.MaxPageHeightException()

ppm_header = f"P6\n{width} {height}\n255\n".encode()
ppm_data = io.BytesIO(ppm_header + rgb_data)
png_path = Path(tempdir) / f"page-{page}.png"

# Verify the exact data was received
if len(rgb_data) != width * height * 3:
raise errors.InterruptedConversion()

try:
Image.open(ppm_data).save(png_path, "PNG")
except UnidentifiedImageError as e:
raise errors.PPMtoPNGError() from e


# From global_common:

# def validate_convert_to_pixel_output(self, common, output):
# """
# Take the output from the convert to pixels tasks and validate it. Returns
# a tuple like: (success (boolean), error_message (str))
# """
# max_image_width = 10000
# max_image_height = 10000

# # Did we hit an error?
# for line in output.split("\n"):
# if (
# "failed:" in line
# or "The document format is not supported" in line
# or "Error" in line
# ):
# return False, output

# # How many pages was that?
# num_pages = None
# for line in output.split("\n"):
# if line.startswith("Document has "):
# num_pages = line.split(" ")[2]
# break
# if not num_pages or not num_pages.isdigit() or int(num_pages) <= 0:
# return False, "Invalid number of pages returned"
# num_pages = int(num_pages)

# # Make sure we have the files we expect
# expected_filenames = []
# for i in range(1, num_pages + 1):
# expected_filenames += [
# f"page-{i}.rgb",
# f"page-{i}.width",
# f"page-{i}.height",
# ]
# expected_filenames.sort()
# actual_filenames = os.listdir(common.pixel_dir.name)
# actual_filenames.sort()

# if expected_filenames != actual_filenames:
# return (
# False,
# f"We expected these files:\n{expected_filenames}\n\nBut we got these files:\n{actual_filenames}",
# )

# # Make sure the files are the correct sizes
# for i in range(1, num_pages + 1):
# with open(f"{common.pixel_dir.name}/page-{i}.width") as f:
# w_str = f.read().strip()
# with open(f"{common.pixel_dir.name}/page-{i}.height") as f:
# h_str = f.read().strip()
# w = int(w_str)
# h = int(h_str)
# if (
# not w_str.isdigit()
# or not h_str.isdigit()
# or w <= 0
# or w > max_image_width
# or h <= 0
# or h > max_image_height
# ):
# return False, f"Page {i} has invalid geometry"

# # Make sure the RGB file is the correct size
# if os.path.getsize(f"{common.pixel_dir.name}/page-{i}.rgb") != w * h * 3:
# return False, f"Page {i} has an invalid RGB file size"

# return True, True
21 changes: 21 additions & 0 deletions dangerzone/isolation_provider/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,8 @@ def _convert_with_tmpdirs(
)

num_pages = len(glob.glob(f"{pixel_dir}/page-*.rgb"))
self.verify_received_pixel_files(pixel_dir, num_pages)

for page in range(1, num_pages + 1):
filename_base = f"{pixel_dir}/page-{page}"
rgb_filename = f"{filename_base}.rgb"
Expand Down Expand Up @@ -379,6 +381,25 @@ def _convert_with_tmpdirs(

return success

def verify_received_pixel_files(
self, pixel_dir: pathlib.Path, num_pages: int
) -> None:
"""Make sure we have the files we expect"""

expected_filenames = ["captured_output.txt"]
for i in range(1, num_pages + 1):
expected_filenames += [
f"page-{i}.rgb",
f"page-{i}.width",
f"page-{i}.height",
]
expected_filenames.sort()
actual_filenames = os.listdir(pixel_dir)
actual_filenames.sort()

if expected_filenames != actual_filenames:
raise errors.PixelFilesMismatch()

def get_max_parallel_conversions(self) -> int:
# FIXME hardcoded 1 until timeouts are more limited and better handled
# https://github.com/freedomofpress/dangerzone/issues/257
Expand Down
4 changes: 0 additions & 4 deletions dangerzone/isolation_provider/qubes.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,6 @@ def __convert(

width = read_int(self.proc.stdout, timeout=sw.remaining)
height = read_int(self.proc.stdout, timeout=sw.remaining)
if not (1 <= width <= errors.MAX_PAGE_WIDTH):
raise errors.MaxPageWidthException()
if not (1 <= height <= errors.MAX_PAGE_HEIGHT):
raise errors.MaxPageHeightException()

num_pixels = width * height * 3 # three color channels
untrusted_pixels = read_bytes(
Expand Down
23 changes: 22 additions & 1 deletion tests/isolation_provider/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,13 @@
from dangerzone.isolation_provider import base
from dangerzone.isolation_provider.qubes import running_on_qubes

from .. import pdf_11k_pages, sanitized_text, uncommon_text
from .. import (
pdf_11k_pages,
sample_bad_height,
sample_bad_width,
sanitized_text,
uncommon_text,
)


@pytest.mark.skipif(
Expand Down Expand Up @@ -68,3 +74,18 @@ def test_max_pages_received(
with pytest.raises(errors.MaxPagesException):
success = provider._convert(doc, ocr_lang=None)
assert not success

def test_max_dimensions(
self,
sample_bad_width: str,
sample_bad_height: str,
provider: base.IsolationProvider,
mocker: MockerFixture,
) -> None:
provider.progress_callback = mocker.MagicMock()
with pytest.raises(errors.MaxPageWidthException):
success = provider._convert(Document(sample_bad_width), ocr_lang=None)
assert not success
with pytest.raises(errors.MaxPageHeightException):
success = provider._convert(Document(sample_bad_height), ocr_lang=None)
assert not success
8 changes: 7 additions & 1 deletion tests/isolation_provider/test_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,13 @@
from dangerzone.isolation_provider.container import Container

# XXX Fixtures used in abstract Test class need to be imported regardless
from .. import pdf_11k_pages, sanitized_text, uncommon_text
from .. import (
pdf_11k_pages,
sample_bad_height,
sample_bad_width,
sanitized_text,
uncommon_text,
)
from .base import IsolationProviderTest


Expand Down
15 changes: 0 additions & 15 deletions tests/isolation_provider/test_qubes.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,21 +45,6 @@ def test_max_pages_client_side_enforcement(
success = provider._convert(doc, ocr_lang=None)
assert not success

def test_max_dimensions(
self,
sample_bad_width: str,
sample_bad_height: str,
provider: Qubes,
mocker: MockerFixture,
) -> None:
provider.progress_callback = mocker.MagicMock()
with pytest.raises(errors.MaxPageWidthException):
success = provider._convert(Document(sample_bad_width), ocr_lang=None)
assert not success
with pytest.raises(errors.MaxPageHeightException):
success = provider._convert(Document(sample_bad_height), ocr_lang=None)
assert not success

def test_out_of_ram(
self,
provider: Qubes,
Expand Down

0 comments on commit 5174482

Please sign in to comment.