diff --git a/integration_tests/test_flask_json_response_type.py b/integration_tests/test_flask_json_response_type.py new file mode 100644 index 000000000..adc196a88 --- /dev/null +++ b/integration_tests/test_flask_json_response_type.py @@ -0,0 +1,36 @@ +from core_codemods.flask_json_response_type import FlaskJsonResponseType +from integration_tests.base_test import ( + BaseIntegrationTest, + original_and_expected_from_code_path, +) + + +class TestFlaskJsonResponseType(BaseIntegrationTest): + codemod = FlaskJsonResponseType + code_path = "tests/samples/flask_json_response_type.py" + original_code, expected_new_code = original_and_expected_from_code_path( + code_path, + [ + ( + 5, + """ return make_response(json_response, mimetype="application/json")\n""", + ), + ], + ) + + # fmt: off + expected_diff =( + """--- \n""" + """+++ \n""" + """@@ -3,4 +3,4 @@\n""" + """ \n""" + """ def foo(request):\n""" + """ json_response = json.dumps({ "user_input": request.GET.get("input") })\n""" + """- return make_response(json_response)\n""" + """+ return make_response(json_response, mimetype="application/json")\n""" + ) + # fmt: on + + expected_line_change = "6" + change_description = FlaskJsonResponseType.CHANGE_DESCRIPTION + num_changed_files = 1 diff --git a/src/codemodder/scripts/generate_docs.py b/src/codemodder/scripts/generate_docs.py index 041945592..6fa85ee8e 100644 --- a/src/codemodder/scripts/generate_docs.py +++ b/src/codemodder/scripts/generate_docs.py @@ -154,6 +154,10 @@ class DocMetadata: importance="Medium", guidance_explained="This change will only restrict the response type and will not alter the response data itself. Thus we deem it safe.", ), + "flask-json-response-type": DocMetadata( + importance="Medium", + guidance_explained="This change will only restrict the response type and will not alter the response data itself. Thus we deem it safe.", + ), } diff --git a/src/core_codemods/__init__.py b/src/core_codemods/__init__.py index 1b1f6a135..113275d81 100644 --- a/src/core_codemods/__init__.py +++ b/src/core_codemods/__init__.py @@ -32,6 +32,7 @@ from .file_resource_leak import FileResourceLeak from .django_receiver_on_top import DjangoReceiverOnTop from .django_json_response_type import DjangoJsonResponseType +from .flask_json_response_type import FlaskJsonResponseType registry = CodemodCollection( origin="pixee", @@ -70,5 +71,6 @@ DjangoReceiverOnTop, NumpyNanEquality, DjangoJsonResponseType, + FlaskJsonResponseType, ], ) diff --git a/src/core_codemods/docs/pixee_python_flask-json-response-type.md b/src/core_codemods/docs/pixee_python_flask-json-response-type.md new file mode 100644 index 000000000..789bf8146 --- /dev/null +++ b/src/core_codemods/docs/pixee_python_flask-json-response-type.md @@ -0,0 +1,13 @@ +The default `mimetype` for `make_response` in Flask is `'text/html'`. This is true even when the response contains JSON data. +If the JSON contains (unsanitized) user-supplied input, a malicious user may supply HTML code which leaves the application vulnerable to cross-site scripting (XSS). +This fix explicitly sets the response type to `application/json` when the response body is JSON data to avoid this vulnerability. Our changes look something like this: + +```diff +from flask import make_response +import json + +def foo(request): + json_response = json.dumps({ "user_input": request.GET.get("input") }) +- return make_response(json_response) ++ return make_response(json_response, mimetype="application/json") +``` diff --git a/src/core_codemods/flask_json_response_type.py b/src/core_codemods/flask_json_response_type.py new file mode 100644 index 000000000..4c3e5d22b --- /dev/null +++ b/src/core_codemods/flask_json_response_type.py @@ -0,0 +1,51 @@ +import libcst as cst + +from codemodder.codemods.base_codemod import ReviewGuidance +from codemodder.codemods.api import SemgrepCodemod + + +class FlaskJsonResponseType(SemgrepCodemod): + NAME = "flask-json-response-type" + SUMMARY = "Set content type to `json/application` for `flask.make_response` with JSON data" + REVIEW_GUIDANCE = ReviewGuidance.MERGE_WITHOUT_REVIEW + DESCRIPTION = "Sets `mimetype` to `json/application`." + REFERENCES = [ + { + "url": "https://tedboy.github.io/flask/generated/flask.jsonify.html", + "description": "", + }, + { + "url": "https://cheatsheetseries.owasp.org/cheatsheets/Cross_Site_Scripting_Prevention_Cheat_Sheet.html#output-encoding-for-javascript-contexts", + "description": "", + }, + ] + + @classmethod + def rule(cls): + return """ + rules: + - id: flask-json-response-type + mode: taint + pattern-sources: + - pattern: json.dumps(...) + pattern-sinks: + - patterns: + - pattern: flask.make_response(...) + - pattern-not: flask.make_response(...,mimetype=...,...) + """ + + def on_result_found(self, _, updated_node): + return self.update_arg_target( + updated_node, + [ + *updated_node.args, + cst.Arg( + value=cst.parse_expression('"application/json"'), + keyword=cst.Name("mimetype"), + equal=cst.AssignEqual( + whitespace_before=cst.SimpleWhitespace(""), + whitespace_after=cst.SimpleWhitespace(""), + ), + ), + ], + ) diff --git a/tests/codemods/test_django_json_response_type.py b/tests/codemods/test_django_json_response_type.py index 294870009..924ca4dda 100644 --- a/tests/codemods/test_django_json_response_type.py +++ b/tests/codemods/test_django_json_response_type.py @@ -85,7 +85,7 @@ def test_no_json_input(self, tmpdir): import json def foo(request): - dict_reponse = { "user_input": request.GET.get("input") } + dict_response = { "user_input": request.GET.get("input") } return HttpResponse(dict_response) """ self.run_and_assert(tmpdir, dedent(input_code), dedent(input_code)) diff --git a/tests/codemods/test_flask_json_response_type.py b/tests/codemods/test_flask_json_response_type.py new file mode 100644 index 000000000..7ca1efcc7 --- /dev/null +++ b/tests/codemods/test_flask_json_response_type.py @@ -0,0 +1,92 @@ +from core_codemods.flask_json_response_type import FlaskJsonResponseType +from tests.codemods.base_codemod_test import BaseSemgrepCodemodTest +from textwrap import dedent + + +class TestFlaskJsonResponseType(BaseSemgrepCodemodTest): + codemod = FlaskJsonResponseType + + def test_name(self): + assert self.codemod.name() == "flask-json-response-type" + + def test_simple(self, tmpdir): + input_code = """\ + from flask import make_response + import json + + def foo(request): + json_response = json.dumps({ "user_input": request.GET.get("input") }) + return make_response(json_response) + """ + expected = """\ + from flask import make_response + import json + + def foo(request): + json_response = json.dumps({ "user_input": request.GET.get("input") }) + return make_response(json_response, mimetype="application/json") + """ + self.run_and_assert(tmpdir, dedent(input_code), dedent(expected)) + assert len(self.file_context.codemod_changes) == 1 + + def test_alias(self, tmpdir): + input_code = """\ + from flask import make_response as response + import json + + def foo(request): + json_response = json.dumps({ "user_input": request.GET.get("input") }) + return response(json_response) + """ + expected = """\ + from flask import make_response as response + import json + + def foo(request): + json_response = json.dumps({ "user_input": request.GET.get("input") }) + return response(json_response, mimetype="application/json") + """ + self.run_and_assert(tmpdir, dedent(input_code), dedent(expected)) + assert len(self.file_context.codemod_changes) == 1 + + def test_direct(self, tmpdir): + input_code = """\ + from flask import make_response + import json + + def foo(request): + return make_response(json.dumps({ "user_input": request.GET.get("input") })) + """ + expected = """\ + from flask import make_response + import json + + def foo(request): + return make_response(json.dumps({ "user_input": request.GET.get("input") }), mimetype="application/json") + """ + self.run_and_assert(tmpdir, dedent(input_code), dedent(expected)) + assert len(self.file_context.codemod_changes) == 1 + + def test_mimetype_set(self, tmpdir): + input_code = """\ + from flask import make_response + import json + + def foo(request): + json_response = json.dumps({ "user_input": request.GET.get("input") }) + return make_response(json_response, mimetype='application/json') + """ + self.run_and_assert(tmpdir, dedent(input_code), dedent(input_code)) + assert len(self.file_context.codemod_changes) == 0 + + def test_no_json_input(self, tmpdir): + input_code = """\ + from flask import make_response + import json + + def foo(request): + dict_response = { "user_input": request.GET.get("input") } + return make_response(dict_response) + """ + self.run_and_assert(tmpdir, dedent(input_code), dedent(input_code)) + assert len(self.file_context.codemod_changes) == 0 diff --git a/tests/samples/flask_json_response_type.py b/tests/samples/flask_json_response_type.py new file mode 100644 index 000000000..8d488720a --- /dev/null +++ b/tests/samples/flask_json_response_type.py @@ -0,0 +1,6 @@ +from flask import make_response +import json + +def foo(request): + json_response = json.dumps({ "user_input": request.GET.get("input") }) + return make_response(json_response)