Skip to content

Commit

Permalink
flask-json-response-type codemod with tests and docs
Browse files Browse the repository at this point in the history
  • Loading branch information
andrecsilva committed Dec 11, 2023
1 parent 44bccc1 commit d543abf
Show file tree
Hide file tree
Showing 9 changed files with 544 additions and 2 deletions.
36 changes: 36 additions & 0 deletions integration_tests/test_flask_json_response_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from core_codemods.flask_json_response_type import FlaskJsonResponseType
from integration_tests.base_test import (
BaseIntegrationTest,
original_and_expected_from_code_path,
)


class TestFlaskJsonResponseType(BaseIntegrationTest):
codemod = FlaskJsonResponseType
code_path = "tests/samples/flask_json_response_type.py"
original_code, expected_new_code = original_and_expected_from_code_path(
code_path,
[
(
8,
""" return (make_response(json_response), {'Content-Type': 'application/json'})\n""",
),
],
)

# fmt: off
expected_diff =(
"""--- \n"""
"""+++ \n"""
"""@@ -6,4 +6,4 @@\n"""
""" @app.route("/test")\n"""
""" def foo(request):\n"""
""" json_response = json.dumps({ "user_input": request.GET.get("input") })\n"""
"""- return make_response(json_response)\n"""
"""+ return (make_response(json_response), {'Content-Type': 'application/json'})\n"""
)
# fmt: on

expected_line_change = "9"
change_description = FlaskJsonResponseType.CHANGE_DESCRIPTION
num_changed_files = 1
70 changes: 70 additions & 0 deletions src/codemodder/codemods/utils_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,20 @@ def is_value_of_assignment(
return parent
return None

def is_target_of_assignment(
self, expr
) -> Optional[cst.AnnAssign | cst.Assign | cst.WithItem | cst.NamedExpr]:
"""
Tests if expr is the value in an assignment.
"""
parent = self.get_metadata(ParentNodeProvider, expr)
parent = parent if isinstance(parent, cst.AssignTarget) else None
gparent = self.get_metadata(ParentNodeProvider, parent) if parent else None
match gparent:
case cst.AnnAssign() | cst.Assign() | cst.WithItem() | cst.NamedExpr():
return gparent
return None

def has_attr_called(self, node: cst.CSTNode) -> Optional[cst.Name]:
"""
Checks if node is part of an expression of the form: <node>.call().
Expand Down Expand Up @@ -298,6 +312,21 @@ def is_attribute_value(self, node: cst.CSTNode) -> Optional[cst.Attribute]:
return maybe_parent
return None

def find_immediate_function_def(
self, node: cst.CSTNode
) -> Optional[cst.FunctionDef]:
"""
Find if node is inside a function definition. In case of nested functions it returns the most immediate one.
"""
# We disregard nested functions, we consider only the immediate one
ancestors = self.path_to_root(node)
first_fdef = None
for ancestor in ancestors:
if isinstance(ancestor, cst.FunctionDef):
first_fdef = ancestor
break
return first_fdef

def path_to_root(self, node: cst.CSTNode) -> list[cst.CSTNode]:
"""
Returns node's path to root. Includes self.
Expand Down Expand Up @@ -338,6 +367,47 @@ def get_parent(self, node: cst.CSTNode) -> Optional[cst.CSTNode]:
return None


class NameAndAncestorResolutionMixin(NameResolutionMixin, AncestorPatternsMixin):
METADATA_DEPENDENCIES: Tuple[Any, ...] = (
ScopeProvider,
ParentNodeProvider,
)

def extract_value(self, node: cst.AnnAssign | cst.Assign | cst.WithItem):
match node:
case cst.AnnAssign(value=value) | cst.Assign(value=value) | cst.WithItem(
item=value
) | cst.NamedExpr(value=value):
return value
return None

def resolve_expression(self, node: cst.BaseExpression) -> cst.BaseExpression:
"""
If the expression is a Name, transitively resolves the name to another type of expression. Otherwise returns self.
"""
maybe_expr = None
match node:
case cst.Name():
maybe_expr = self._resolve_name_transitive(node)
if maybe_expr:
return maybe_expr
return node

def _resolve_name_transitive(self, node: cst.Name) -> Optional[cst.BaseExpression]:
maybe_assignment = self.find_single_assignment(node)
if maybe_assignment:
if maybe_target_assignment := self.is_target_of_assignment(
maybe_assignment.node
):
value = self.extract_value(maybe_target_assignment)
match value:
case cst.Name():
return self._resolve_name_transitive(value)
case _:
return value
return None


def iterate_left_expressions(node: cst.BaseExpression):
yield node
match node:
Expand Down
4 changes: 4 additions & 0 deletions src/codemodder/scripts/generate_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,10 @@ class DocMetadata:
importance="Low",
guidance_explained="This change fixes deprecated uses and is safe.",
),
"flask-json-response-type": DocMetadata(
importance="Medium",
guidance_explained="This change will only restrict the response type and will not alter the response data itself. Thus we deem it safe.",
),
}


Expand Down
2 changes: 2 additions & 0 deletions src/core_codemods/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from .file_resource_leak import FileResourceLeak
from .django_receiver_on_top import DjangoReceiverOnTop
from .django_json_response_type import DjangoJsonResponseType
from .flask_json_response_type import FlaskJsonResponseType

registry = CodemodCollection(
origin="pixee",
Expand Down Expand Up @@ -72,5 +73,6 @@
DjangoReceiverOnTop,
NumpyNanEquality,
DjangoJsonResponseType,
FlaskJsonResponseType,
],
)
4 changes: 2 additions & 2 deletions src/core_codemods/django_json_response_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@

class DjangoJsonResponseType(SemgrepCodemod):
NAME = "django-json-response-type"
SUMMARY = "Set content type to `json/application` for `django.http.HttpResponse` with JSON data"
SUMMARY = "Set content type to `application/json` for `django.http.HttpResponse` with JSON data"
REVIEW_GUIDANCE = ReviewGuidance.MERGE_WITHOUT_REVIEW
DESCRIPTION = "Sets `content_type` to `json/application`."
DESCRIPTION = "Sets `content_type` to `application/json`."
REFERENCES = [
{
"url": "https://docs.djangoproject.com/en/4.0/ref/request-response/#django.http.HttpResponse.__init__",
Expand Down
16 changes: 16 additions & 0 deletions src/core_codemods/docs/pixee_python_flask-json-response-type.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
The default `mimetype` for `make_response` in Flask is `'text/html'`. This is true even when the response contains JSON data.
If the JSON contains (unsanitized) user-supplied input, a malicious user may supply HTML code which leaves the application vulnerable to cross-site scripting (XSS).
This fix explicitly sets the response type to `application/json` when the response body is JSON data to avoid this vulnerability. Our changes look something like this:

```diff
from flask import make_response, Flask
import json

app = Flask(__name__)

@app.route("/test")
def foo(request):
json_response = json.dumps({ "user_input": request.GET.get("input") })
- return make_response(json_response)
+ return (make_response(json_response), {'Content-Type':'application/json'}
```
193 changes: 193 additions & 0 deletions src/core_codemods/flask_json_response_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
from typing import Optional, Union
import libcst as cst
from codemodder.codemods.api import BaseCodemod

from codemodder.codemods.base_codemod import ReviewGuidance

from codemodder.codemods.utils_mixin import NameAndAncestorResolutionMixin


class FlaskJsonResponseType(BaseCodemod, NameAndAncestorResolutionMixin):
NAME = "flask-json-response-type"
SUMMARY = "Set content type to `application/json` for `flask.make_response` with JSON data"
REVIEW_GUIDANCE = ReviewGuidance.MERGE_WITHOUT_REVIEW
DESCRIPTION = "Sets `mimetype` to `application/json`."
REFERENCES = [
{
"url": "https://flask.palletsprojects.com/en/2.3.x/patterns/javascript/#return-json-from-views",
"description": "",
},
{
"url": "https://cheatsheetseries.owasp.org/cheatsheets/Cross_Site_Scripting_Prevention_Cheat_Sheet.html#output-encoding-for-javascript-contexts",
"description": "",
},
]

content_type_key = "Content-Type"
json_content_type = "application/json"

def leave_Return(
self, original_node: cst.Return, updated_node: cst.Return
) -> Union[
cst.BaseSmallStatement,
cst.FlattenSentinel[cst.BaseSmallStatement],
cst.RemovalSentinel,
]:
if original_node.value:
# is inside a function def with a route decorator
maybe_function_def = self.find_immediate_function_def(original_node)
maybe_has_decorator = (
self._has_route_decorator(maybe_function_def)
if maybe_function_def
else None
)
if maybe_has_decorator:
# json.dumps(...)
maybe_json_dumps = self._is_json_dumps_call(original_node.value)
if maybe_json_dumps:
self.add_change(original_node, self.CHANGE_DESCRIPTION)
return updated_node.with_changes(
value=self._wrap_into_tuple_with_content_type(
original_node.value
)
)

# make_response(json.dumps(...),...)
maybe_make_response = self.is_make_response_with_json(
original_node.value
)
if maybe_make_response:
self.add_change(original_node, self.CHANGE_DESCRIPTION)
return updated_node.with_changes(
value=self._wrap_into_tuple_with_content_type(
original_node.value
)
)

# tuple case
match original_node.value:
case cst.Tuple():
maybe_fixed_tuple = self._fix_response_tuple(
original_node.value
)
if maybe_fixed_tuple:
self.add_change(original_node, self.CHANGE_DESCRIPTION)
return updated_node.with_changes(value=maybe_fixed_tuple)
return updated_node

def _is_string_or_int(self, node: cst.BaseExpression):
expr = self.resolve_expression(node)
if expr and isinstance(expr, cst.SimpleString | cst.Integer):
return True
return False

def _fix_response_tuple(self, node: cst.Tuple) -> Optional[cst.Tuple]:
elements = list(node.elements)
# (make_response | json.dumps, ..., {...})
if len(elements) == 3:
last = elements[-1].value
match last:
case cst.Dict() if not self._has_content_type_key(last):
elements[-1] = cst.Element(
self._add_key_value(
last,
cst.SimpleString(f"'{self.content_type_key}'"),
cst.SimpleString(f"'{self.json_content_type}'"),
)
)
return node.with_changes(elements=elements)
# (make_response | json.dumps, string|number)
# (make_response | json.dumps, {...})
if len(elements) == 2:
last = elements[-1].value
expr = self.resolve_expression(last)
match expr:
case cst.Dict() if not self._has_content_type_key(expr):
if last == expr:
elements[-1] = cst.Element(
self._add_key_value(
expr,
cst.SimpleString(f"'{self.content_type_key}'"),
cst.SimpleString(f"'{self.json_content_type}'"),
)
)
return node.with_changes(elements=elements)
# TODO last != expr case.
case cst.Integer() | cst.SimpleString():
elements.append(cst.Element(self._build_dict()))
return node.with_changes(elements=elements)
return None

def _wrap_into_tuple_with_content_type(self, node: cst.BaseExpression) -> cst.Tuple:
return cst.Tuple([cst.Element(node), cst.Element(self._build_dict())])

def _build_dict(self) -> cst.Dict:
return cst.Dict(
[
cst.DictElement(
cst.SimpleString(f"'{self.content_type_key}'"),
cst.SimpleString(f"'{self.json_content_type}'"),
)
]
)

def _has_route_decorator(self, node: cst.FunctionDef) -> bool:
# We cannot guarantee that this decorator originates from a flask app object
# thus we just check for the name
for decorator in node.decorators:
match decorator.decorator:
case cst.Call(func=cst.Attribute() as func):
if func.attr.value == "route":
return True
return False

def _is_json_dumps_call(self, node: cst.BaseExpression) -> Optional[cst.Call]:
expr = node
match node:
case cst.Name():
expr = self._resolve_name_transitive(node)
match expr:
case cst.Call():
true_name = self.find_base_name(expr)
if true_name == "json.dumps":
return expr
return None

def is_make_response_with_json(
self, node: cst.BaseExpression
) -> Optional[cst.Call]:
expr = node
match node:
case cst.Name():
expr = self._resolve_name_transitive(node)
match expr:
case cst.Call():
true_name = self.find_base_name(expr)
if true_name != "flask.make_response":
return None
first_arg = expr.args[0].value if expr.args else None
if first_arg and self._is_json_dumps_call(first_arg):
return expr
return None

def _has_content_type_key(self, dict_expr: cst.Dict):
for element in dict_expr.elements:
match element:
case cst.StarredDictElement():
return True
case cst.DictElement(key=key):
match key:
case cst.SimpleString():
if key.raw_value == self.content_type_key:
return True
# it may use variable or other expreesions that resolves to Content-Type
case _:
return True
return False

def _add_key_value(
self, dict_expr: cst.Dict, key: cst.BaseExpression, value: cst.BaseExpression
) -> cst.Dict:
elements = list(dict_expr.elements)
elements.append(cst.DictElement(key, value))
return dict_expr.with_changes(elements=elements)
Loading

0 comments on commit d543abf

Please sign in to comment.