From 32b30d403a8b9e961514d4efef6bd34a4e03b608 Mon Sep 17 00:00:00 2001 From: robinvandermolen Date: Thu, 19 Dec 2024 14:09:10 +0100 Subject: [PATCH 1/6] :test_tube: [#4795] Add test for msg file upload --- src/openforms/formio/tests/files/test.msg | Bin 0 -> 11264 bytes src/openforms/formio/tests/test_validators.py | 17 +++++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 src/openforms/formio/tests/files/test.msg diff --git a/src/openforms/formio/tests/files/test.msg b/src/openforms/formio/tests/files/test.msg new file mode 100644 index 0000000000000000000000000000000000000000..b597796de62bb5b61b4584d863f83cb1cf44672c GIT binary patch literal 11264 zcmeHNO>7&-6`ti%f0XqX)pqR0Y6>Cd|&G%a($OuHp^yNiBL^?8k9P0fl`}FR|IVnZPLgFSezxwJM;1h7Sh@&4?Tt0 zt2FtqB2Fb^?l1{wmU_>zB|=+Z1!Y{>=@F)Ba=H)7!ix0zzaoNfnU+@r-)75fj-C#? z?D08EyrwA*QA|)-y8M-G`A3L<;P0bSq}wWM5Mcc)UR{{d+j!9nEkr|>Z4@Eb4vLsp ztZ__P7sYOh_fe!Rfb~&CT|cGh6ZxI+<&nVMT>tWXAT_8OUU^0n>p-3tXmmH$f4Q`) z0#_84<5yHQL>2nX zs`z`UEo>VEKm0*-AhFDT(Ft$z)6DQr?z{$aDK%>PIL`#T8#u$y6bBlyXGgbgd{AN(oU>#F?2 zo|pE&AHJ*o_4gmiyDGl)A7~i9{GU+NKgbh0rHYTWvWFsqKmREu`3L16d({sGx*z@| zaj&%h3Lgymq{=^ZzyA6IzS{m9lz-`4LIkzt4}8eK?*1cm8GE`a|L||??|;CD4_;UQ z1+J6Z2H{!^tt%t+3E5|KKaw;e37TjrY7NqM`N$P z^NZenKP|F9X+1OKPHAhW>DGOoZpJyC!6x~~jAhw0n_*z~3f+^V%w}U0O>uW_6K#2S z_Hpv2QK}MBImZ~~*_?BX4964j+Rsd^ zt9RDya;hc>b#$85CiPcNNf6A+w)ay1nq7bQdjC9?nT|hYIt{P?9x?#lrg-MrspU6g zFTSSJO^TH-T^>jd|0StUI>vWf#x-5LwGmkhy{doK)*o8y`3K8{&!236y6vsL!`S6mIUpVhX%r^x$&|54Zf(qO|$%fnY# zUS*t8%gfiomp9J}{+9;3680p<;C1Vp9H&~?s|f!1YWGi)cUAmjgbN=G!5?2rA?-G< z@}&D$274OzwJQFj)K-oE|Be6m2;svoK&ZukLTKNJivDfto+|%G3Adj7-Ftq_e_j0# zj1ON$Fz?GhMu7RRi?6|dIz&;Ge|3C(e^3{{DgLD#nn}U849)b@=(`ddX`_8K^WNRS zClf#fnac5uv9<&`1Wf*RHOAVv(K@@;gL)Zs_D?~}vM398Bk%r3*iUf4i-~$EFP-%aq5k$s^oTLGZL5#617dz$`9U7v6yta2??#xl zy!5envB8J1zq8|K$p4)w#XtPSe>RXx=rUN4Pik)~%qo g^ibbcFZB-Ryt}Tx5dGoY1x Date: Thu, 19 Dec 2024 17:07:36 +0100 Subject: [PATCH 2/6] :bug: [#4795] Invert validation for .msg files The SDK cannot reliably determine which content type belongs to a .msg file, most notably on Linux and MacOS because the extension is not in the mime type database. This manifests as a file being uploaded with empty content-type. To allow these files to go through, the serializer must allow empty values for the 'type' field which contains the detected content type, and the backend must perform additional processing to determine the file type. We can do this by falling back to the generic case of 'binary file' (application/octet-stream) content type, and let libmagic figure out which extensions belong to the magic bytes, i.e. we look at the magic bytes to figure out what kind of file was provided, and we check the provided file extensions against the list of valid extensions for the detected file type. --- src/openforms/formio/api/validators.py | 41 ++++++++++++++-------- src/openforms/formio/components/vanilla.py | 9 +---- 2 files changed, 27 insertions(+), 23 deletions(-) diff --git a/src/openforms/formio/api/validators.py b/src/openforms/formio/api/validators.py index 34dcd5aa13..af072a4d13 100644 --- a/src/openforms/formio/api/validators.py +++ b/src/openforms/formio/api/validators.py @@ -1,4 +1,5 @@ import logging +from pathlib import Path from typing import Iterable from django.core.files.uploadedfile import UploadedFile @@ -56,29 +57,40 @@ def __init__(self, allowed_mime_types: Iterable[str] | None = None): def __call__(self, value: UploadedFile) -> None: head = value.read(2048) - ext = value.name.split(".")[-1] - mime_type = magic.from_buffer(head, mime=True) + ext = Path(value.name or "").suffix[1:] + detected_mime_type = magic.from_buffer(head, mime=True) + provided_mime_type = value.content_type or "application/octet-stream" # gh #2520 # application/x-ole-storage on Arch with shared-mime-info 2.0+155+gf4e7cbc-1 - if mime_type in ["application/CDFV2", "application/x-ole-storage"]: + if detected_mime_type in ["application/CDFV2", "application/x-ole-storage"]: whole_file = head + value.read() - mime_type = magic.from_buffer(whole_file, mime=True) + detected_mime_type = magic.from_buffer(whole_file, mime=True) - if mime_type == "image/heif": - mime_type = "image/heic" + if detected_mime_type == "image/heif": + detected_mime_type = "image/heic" if not ( self.any_allowed - or mimetype_allowed(mime_type, self._regular_mimes, self._wildcard_mimes) + or mimetype_allowed( + detected_mime_type, self._regular_mimes, self._wildcard_mimes + ) ): raise serializers.ValidationError( _("The provided file is not a valid file type.") ) + if not ext: + raise serializers.ValidationError( + _( + "Could not determine the file type. Please make sure the file name " + "has an extension." + ) + ) + # Contents is allowed. Do extension or submitted content_type agree? - if value.content_type == "application/octet-stream": - m = magic.Magic(extension=True) + if provided_mime_type == "application/octet-stream": + m = magic.Magic(extension=True) # pyright: ignore[reportCallIssue] extensions = m.from_buffer(head).split("/") # magic db doesn't know any more specific extension(s), so accept the # file @@ -101,27 +113,26 @@ def __call__(self, value: UploadedFile) -> None: # If the file does not strictly follow the conventions of CSV (e.g. non-standard delimiters), # may not be considered as a valid CSV. elif ( - value.content_type == "text/csv" - and mime_type == "text/plain" + provided_mime_type == "text/csv" + and detected_mime_type == "text/plain" and ext == "csv" ): return - elif mime_type == "image/heic" and value.content_type in ( + elif detected_mime_type == "image/heic" and provided_mime_type in ( "image/heic", "image/heif", ): return - # gh #4658 # Windows use application/x-zip-compressed as a mimetype for .zip files, which # is deprecated but still we need to support it. Instead, the common case for # zip files is application/zip or application/zip-compressed mimetype. - elif mime_type == "application/zip" and value.content_type in ( + elif detected_mime_type == "application/zip" and provided_mime_type in ( "application/zip-compressed", "application/x-zip-compressed", ): return - elif mime_type != value.content_type: + elif provided_mime_type != detected_mime_type: raise serializers.ValidationError( _("The provided file is not a {file_type}.").format( filename=value.name, file_type=f".{ext}" diff --git a/src/openforms/formio/components/vanilla.py b/src/openforms/formio/components/vanilla.py index 109cb0560d..882705cff5 100644 --- a/src/openforms/formio/components/vanilla.py +++ b/src/openforms/formio/components/vanilla.py @@ -338,14 +338,7 @@ class FileSerializer(serializers.Serializer): originalName = serializers.CharField(trim_whitespace=False) size = serializers.IntegerField(min_value=0) storage = serializers.ChoiceField(choices=["url"]) - type = serializers.CharField( - error_messages={ - "blank": _( - "Could not determine the file type. Please make sure the file name " - "has an extension." - ), - } - ) + type = serializers.CharField(required=True, allow_blank=True) url = serializers.URLField() data = FileDataSerializer() # type: ignore From 929f96117e4d90268203db233385de8a35934023 Mon Sep 17 00:00:00 2001 From: robinvandermolen Date: Mon, 23 Dec 2024 09:57:19 +0100 Subject: [PATCH 3/6] :white_check_mark: [#4795] Add e2e test for msg file upload --- src/openforms/tests/e2e/data/test.msg | Bin 0 -> 11264 bytes src/openforms/tests/e2e/test_file_upload.py | 61 ++++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 src/openforms/tests/e2e/data/test.msg diff --git a/src/openforms/tests/e2e/data/test.msg b/src/openforms/tests/e2e/data/test.msg new file mode 100644 index 0000000000000000000000000000000000000000..b597796de62bb5b61b4584d863f83cb1cf44672c GIT binary patch literal 11264 zcmeHNO>7&-6`ti%f0XqX)pqR0Y6>Cd|&G%a($OuHp^yNiBL^?8k9P0fl`}FR|IVnZPLgFSezxwJM;1h7Sh@&4?Tt0 zt2FtqB2Fb^?l1{wmU_>zB|=+Z1!Y{>=@F)Ba=H)7!ix0zzaoNfnU+@r-)75fj-C#? z?D08EyrwA*QA|)-y8M-G`A3L<;P0bSq}wWM5Mcc)UR{{d+j!9nEkr|>Z4@Eb4vLsp ztZ__P7sYOh_fe!Rfb~&CT|cGh6ZxI+<&nVMT>tWXAT_8OUU^0n>p-3tXmmH$f4Q`) z0#_84<5yHQL>2nX zs`z`UEo>VEKm0*-AhFDT(Ft$z)6DQr?z{$aDK%>PIL`#T8#u$y6bBlyXGgbgd{AN(oU>#F?2 zo|pE&AHJ*o_4gmiyDGl)A7~i9{GU+NKgbh0rHYTWvWFsqKmREu`3L16d({sGx*z@| zaj&%h3Lgymq{=^ZzyA6IzS{m9lz-`4LIkzt4}8eK?*1cm8GE`a|L||??|;CD4_;UQ z1+J6Z2H{!^tt%t+3E5|KKaw;e37TjrY7NqM`N$P z^NZenKP|F9X+1OKPHAhW>DGOoZpJyC!6x~~jAhw0n_*z~3f+^V%w}U0O>uW_6K#2S z_Hpv2QK}MBImZ~~*_?BX4964j+Rsd^ zt9RDya;hc>b#$85CiPcNNf6A+w)ay1nq7bQdjC9?nT|hYIt{P?9x?#lrg-MrspU6g zFTSSJO^TH-T^>jd|0StUI>vWf#x-5LwGmkhy{doK)*o8y`3K8{&!236y6vsL!`S6mIUpVhX%r^x$&|54Zf(qO|$%fnY# zUS*t8%gfiomp9J}{+9;3680p<;C1Vp9H&~?s|f!1YWGi)cUAmjgbN=G!5?2rA?-G< z@}&D$274OzwJQFj)K-oE|Be6m2;svoK&ZukLTKNJivDfto+|%G3Adj7-Ftq_e_j0# zj1ON$Fz?GhMu7RRi?6|dIz&;Ge|3C(e^3{{DgLD#nn}U849)b@=(`ddX`_8K^WNRS zClf#fnac5uv9<&`1Wf*RHOAVv(K@@;gL)Zs_D?~}vM398Bk%r3*iUf4i-~$EFP-%aq5k$s^oTLGZL5#617dz$`9U7v6yta2??#xl zy!5envB8J1zq8|K$p4)w#XtPSe>RXx=rUN4Pik)~%qo g^ibbcFZB-Ryt}Tx5dGoY1x instead of openforms/static/sdk + @sync_to_async + def setUpTestData(): + # set up a form + form = FormFactory.create( + name="Form with file upload", + slug="form-with-file-upload", + generate_minimal_setup=True, + formstep__form_definition__name="First step", + formstep__form_definition__slug="first-step", + formstep__form_definition__configuration={ + "components": [ + { + "type": "file", + "key": "fileUpload", + "label": "File Upload", + "storage": "url", + "validate": { + "required": True, + }, + } + ] + }, + translation_enabled=False, # force Dutch + ask_privacy_consent=False, + ask_statement_of_truth=False, + ) + return form + + form = await setUpTestData() + form_url = str( + furl(self.live_server_url) + / reverse("forms:form-detail", kwargs={"slug": form.slug}) + ) + + with patch("openforms.utils.validators.allow_redirect_url", return_value=True): + async with browser_page() as page: + await page.goto(form_url) + + await page.get_by_role("button", name="Formulier starten").click() + + async with page.expect_file_chooser() as fc_info: + await page.get_by_text("blader").click() + + file_chooser = await fc_info.value + await file_chooser.set_files(TEST_FILES / "test.msg") + + await page.wait_for_load_state("networkidle") + + uploaded_file = page.get_by_role("link", name="test.msg") + await expect(uploaded_file).to_be_visible() + + await page.get_by_role("button", name="Volgende").click() + await page.get_by_role("button", name="Verzenden").click() + await expect( + page.get_by_text("Een moment geduld", exact=False) + ).to_be_visible() From f842798c5a147b03f028c4d0e7cd05b6bf557cd4 Mon Sep 17 00:00:00 2001 From: robinvandermolen Date: Mon, 23 Dec 2024 11:27:47 +0100 Subject: [PATCH 4/6] :white_check_mark: [#4795] Fix tests The validator now rejects (temporary) file uploads that don't have an extension, as this prevents validating that the extension and content type match. This used to pass and would then be caught later when linking the file upload component and temporary file upload. --- src/openforms/formio/tests/test_validators.py | 17 +++++++++++++++++ .../formio/tests/validation/test_file.py | 4 ++-- .../tests/e2e/test_input_validation.py | 6 +++--- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/openforms/formio/tests/test_validators.py b/src/openforms/formio/tests/test_validators.py index 728545cc05..e9965b6076 100644 --- a/src/openforms/formio/tests/test_validators.py +++ b/src/openforms/formio/tests/test_validators.py @@ -108,6 +108,23 @@ def test_mime_type_inferred_from_magic(self): except ValidationError as e: self.fail(f"Valid file failed validation: {e}") + def test_unknown_file_type(self): + file = SimpleUploadedFile( + "unknown-type", + b"test", + content_type="application/octet-stream", # see e2e test SingleFileTests.test_unknown_file_type + ) + validator = validators.MimeTypeValidator( + allowed_mime_types=None + ) # allows any mime type + + with self.assertRaisesMessage( + ValidationError, + "Could not determine the file type. Please make sure the file name " + "has an extension.", + ): + validator(file) + def test_star_wildcard_in_allowed_mimetypes(self): validator = validators.MimeTypeValidator({"*"}) diff --git a/src/openforms/formio/tests/validation/test_file.py b/src/openforms/formio/tests/validation/test_file.py index c656687d5f..61470f6696 100644 --- a/src/openforms/formio/tests/validation/test_file.py +++ b/src/openforms/formio/tests/validation/test_file.py @@ -602,10 +602,10 @@ def test_attach_upload_validates_unknown_file_type(self): } is_valid, errors = validate_formio_data(component, data, submission=submission) - error = extract_error(errors["foo"][0], "type") + error = extract_error(errors["foo"][0], "non_field_errors") self.assertFalse(is_valid) - self.assertEqual(error.code, "blank") + self.assertEqual(error.code, "invalid") self.assertEqual( error, _( diff --git a/src/openforms/tests/e2e/test_input_validation.py b/src/openforms/tests/e2e/test_input_validation.py index 8dbc9f4830..ba0320fe22 100644 --- a/src/openforms/tests/e2e/test_input_validation.py +++ b/src/openforms/tests/e2e/test_input_validation.py @@ -940,7 +940,7 @@ def test_unknown_file_type(self): # The frontend validation will *not* create a TemporaryFileUpload, # as the frontend will block the upload because of the invalid file type. # However the user could do an handcrafted API call. - # For this reason, we manually create an invalid TemporaryFileUpload + # For this reason, we manually try to create an invalid TemporaryFileUpload # and use it for the `api_value`: with open(TEST_FILES / "unknown-type", "rb") as infile: @@ -966,8 +966,8 @@ def test_unknown_file_type(self): ], ) - # Make sure the frontend did not create one: - self.assertEqual(TemporaryFileUpload.objects.count(), 1) + # Make sure that no temporary files were created + self.assertEqual(TemporaryFileUpload.objects.count(), 0) class SingleAddressNLTests(ValidationsTestCase): From 0f179f59285c94b21ebea65793c36c0097e62fe0 Mon Sep 17 00:00:00 2001 From: Sergei Maertens Date: Mon, 30 Dec 2024 12:13:30 +0100 Subject: [PATCH 5/6] :pencil2: Fix typo/spelling --- src/openforms/tests/e2e/test_input_validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/openforms/tests/e2e/test_input_validation.py b/src/openforms/tests/e2e/test_input_validation.py index ba0320fe22..79255f447d 100644 --- a/src/openforms/tests/e2e/test_input_validation.py +++ b/src/openforms/tests/e2e/test_input_validation.py @@ -939,7 +939,7 @@ def test_unknown_file_type(self): # The frontend validation will *not* create a TemporaryFileUpload, # as the frontend will block the upload because of the invalid file type. - # However the user could do an handcrafted API call. + # However the user could do a handcrafted API call. # For this reason, we manually try to create an invalid TemporaryFileUpload # and use it for the `api_value`: From f3bf8417020c42623888b32e53c014e1076b8aab Mon Sep 17 00:00:00 2001 From: Sergei Maertens Date: Mon, 30 Dec 2024 12:42:37 +0100 Subject: [PATCH 6/6] :pencil2: Fix typo in Dutch translation --- src/openforms/conf/locale/nl/LC_MESSAGES/django.po | 2 +- src/openforms/tests/e2e/test_input_validation.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/openforms/conf/locale/nl/LC_MESSAGES/django.po b/src/openforms/conf/locale/nl/LC_MESSAGES/django.po index 4cdc9abeef..5f4855401f 100644 --- a/src/openforms/conf/locale/nl/LC_MESSAGES/django.po +++ b/src/openforms/conf/locale/nl/LC_MESSAGES/django.po @@ -5384,7 +5384,7 @@ msgid "" "extension." msgstr "" "Het bestandstype kon niet bepaald worden. Controleer of de bestandsnaam met " -"een extensie eindigt (bijvoorbeel '.pdf' of '.png')." +"een extensie eindigt (bijvoorbeeld '.pdf' of '.png')." #: openforms/formio/components/vanilla.py:365 #, python-brace-format diff --git a/src/openforms/tests/e2e/test_input_validation.py b/src/openforms/tests/e2e/test_input_validation.py index 79255f447d..0deb90fa9a 100644 --- a/src/openforms/tests/e2e/test_input_validation.py +++ b/src/openforms/tests/e2e/test_input_validation.py @@ -955,7 +955,7 @@ def test_unknown_file_type(self): ui_files=[TEST_FILES / "unknown-type"], expected_ui_error=( "Het bestandstype kon niet bepaald worden. Controleer of de " - "bestandsnaam met een extensie eindigt (bijvoorbeel '.pdf' of " + "bestandsnaam met een extensie eindigt (bijvoorbeeld '.pdf' of " "'.png')." ), api_value=[