Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

File Resource Leak Codemod #147

Merged
merged 7 commits into from
Nov 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions integration_tests/test_file_resource_leak.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from core_codemods.file_resource_leak import FileResourceLeak
from integration_tests.base_test import (
BaseIntegrationTest,
original_and_expected_from_code_path,
)


class TestFileResourceLeak(BaseIntegrationTest):
codemod = FileResourceLeak
code_path = "tests/samples/file_resource_leak.py"
original_code, expected_new_code = original_and_expected_from_code_path(
code_path,
[
(2, """with open(path, 'w', encoding='utf-8') as file:\n"""),
(3, """ pass\n"""),
(4, """ file.write('Hello World')\n"""),
],
)

# fmt: off
expected_diff =(
"""--- \n"""
"""+++ \n"""
"""@@ -1,5 +1,5 @@\n"""
""" import tempfile\n"""
""" path = tempfile.NamedTemporaryFile().name\n"""
"""-file = open(path, 'w', encoding='utf-8')\n"""
"""-pass\n"""
"""-file.write('Hello World')\n"""
"""+with open(path, 'w', encoding='utf-8') as file:\n"""
"""+ pass\n"""
"""+ file.write('Hello World')\n""")
# fmt: on

expected_line_change = "3"
change_description = FileResourceLeak.CHANGE_DESCRIPTION
num_changed_files = 1
36 changes: 35 additions & 1 deletion src/codemodder/codemods/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
from pathlib import Path
from typing import Optional, Any

from libcst import matchers
from libcst import MetadataDependent, matchers
from libcst.codemod import CodemodContext
from libcst.matchers import MatcherDecoratableTransformer
import libcst as cst


Expand Down Expand Up @@ -104,6 +106,38 @@
return updated_node


class MetadataPreservingTransformer(
MatcherDecoratableTransformer, cst.MetadataDependent
):
"""
The CSTTransformer equivalent of ContextAwareVisitor. Will preserve metadata passed through a context. You should not chain more than one of these, otherwise metadata will not reflect the state of the tree.
"""

def __init__(self, context: CodemodContext) -> None:
MetadataDependent.__init__(self)
MatcherDecoratableTransformer.__init__(self)
self.context = context
dependencies = self.get_inherited_dependencies()
if dependencies:
wrapper = self.context.wrapper
if wrapper is None:
raise ValueError(

Check warning on line 124 in src/codemodder/codemods/utils.py

View check run for this annotation

Codecov / codecov/patch

src/codemodder/codemods/utils.py#L124

Added line #L124 was not covered by tests
f"Attempting to instantiate {self.__class__.__name__} outside of "
+ "an active transform. This means that metadata hasn't been "
+ "calculated and we cannot successfully create this visitor."
)
for dep in dependencies:
if dep not in wrapper._metadata:
raise ValueError(

Check warning on line 131 in src/codemodder/codemods/utils.py

View check run for this annotation

Codecov / codecov/patch

src/codemodder/codemods/utils.py#L131

Added line #L131 was not covered by tests
f"Attempting to access metadata {dep.__name__} that was not a "
+ "declared dependency of parent transform! This means it is "
+ "not possible to compute this value. Please ensure that all "
+ f"parent transforms of {self.__class__.__name__} declare "
+ f"{dep.__name__} as a metadata dependency."
)
self.metadata = {dep: wrapper._metadata[dep] for dep in dependencies}


def is_django_settings_file(file_path: Path):
if "settings.py" not in file_path.name:
return False
Expand Down
140 changes: 138 additions & 2 deletions src/codemodder/codemods/utils_mixin.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
from typing import Any, Optional, Tuple, Union
from typing import Any, Collection, Optional, Tuple, Union
import libcst as cst
from libcst import MetadataDependent, matchers
from libcst.helpers import get_full_name_for_node
from libcst.metadata import (
Access,
Assignment,
BaseAssignment,
BuiltinAssignment,
ImportAssignment,
ParentNodeProvider,
ScopeProvider,
)
from libcst.metadata.scope_provider import GlobalScope
Expand Down Expand Up @@ -110,7 +112,6 @@
"""
scope = self.get_metadata(ScopeProvider, node)
if node in scope.accesses:
# pylint: disable=protected-access
return set(next(iter(scope.accesses[node])).referents)
return set()

Expand Down Expand Up @@ -159,6 +160,141 @@
return matchers.matches(node.func, matchers.Name())
return False

def find_accesses(self, node) -> Collection[Access]:
scope = self.get_metadata(ScopeProvider, node, None)
if scope:
return scope.accesses[node]
return {}

Check warning on line 167 in src/codemodder/codemods/utils_mixin.py

View check run for this annotation

Codecov / codecov/patch

src/codemodder/codemods/utils_mixin.py#L167

Added line #L167 was not covered by tests


class AncestorPatternsMixin(MetadataDependent):
METADATA_DEPENDENCIES: Tuple[Any, ...] = (ParentNodeProvider,)

def is_value_of_assignment(
self, expr
) -> Optional[cst.AnnAssign | cst.Assign | cst.WithItem | cst.NamedExpr]:
"""
Tests if expr is the value in an assignment.
"""
parent = self.get_metadata(ParentNodeProvider, expr)
match parent:
case cst.AnnAssign(value=value) | cst.Assign(value=value) | cst.WithItem(
item=value
) | cst.NamedExpr(
value=value
) if expr == value: # type: ignore
return parent
return None

def has_attr_called(self, node: cst.CSTNode) -> Optional[cst.Name]:
"""
Checks if node is part of an expression of the form: <node>.call().
"""
maybe_attr = self.is_attribute_value(node)
maybe_call = self.is_call_func(maybe_attr) if maybe_attr else None
if maybe_attr and maybe_call:
return maybe_attr.attr
return None

def is_argument_of_call(self, node: cst.CSTNode) -> Optional[cst.Arg]:
"""
Checks if the node is an argument of a call.
"""
maybe_parent = self.get_parent(node)
match maybe_parent:
case cst.Arg(value=node):
return maybe_parent
return None

def is_yield_value(self, node: cst.CSTNode) -> Optional[cst.Yield]:
"""
Checks if the node is the value of a Yield statement.
"""
maybe_parent = self.get_parent(node)
match maybe_parent:
case cst.Yield(value=node):
return maybe_parent
return None

def is_return_value(self, node: cst.CSTNode) -> Optional[cst.Return]:
"""
Checks if the node is the value of a Return statement.
"""
maybe_parent = self.get_parent(node)
match maybe_parent:
case cst.Return(value=node):
return maybe_parent
return None

def is_with_item(self, node: cst.CSTNode) -> Optional[cst.WithItem]:
"""
Checks if the node is the name of a WithItem.
"""
maybe_parent = self.get_parent(node)
match maybe_parent:
case cst.WithItem(item=node):
return maybe_parent
return None

def is_call_func(self, node: cst.CSTNode) -> Optional[cst.Call]:
"""
Checks if the node is the func of an Call.
"""
maybe_parent = self.get_parent(node)
match maybe_parent:
case cst.Call(func=node):
return maybe_parent
return None

Check warning on line 247 in src/codemodder/codemods/utils_mixin.py

View check run for this annotation

Codecov / codecov/patch

src/codemodder/codemods/utils_mixin.py#L247

Added line #L247 was not covered by tests

def is_attribute_value(self, node: cst.CSTNode) -> Optional[cst.Attribute]:
"""
Checks if node is the value of an Attribute.
"""
maybe_parent = self.get_parent(node)
match maybe_parent:
case cst.Attribute(value=node):
return maybe_parent
return None

def path_to_root(self, node: cst.CSTNode) -> list[cst.CSTNode]:
"""
Returns node's path to root. Includes self.
"""
path = []
maybe_parent = node
while maybe_parent:
path.append(maybe_parent)
maybe_parent = self.get_parent(maybe_parent)
return path

def path_to_root_as_set(self, node: cst.CSTNode) -> set[cst.CSTNode]:
"""
Returns the set of nodes in node's path to root. Includes self.
"""
path = set()
maybe_parent = node
while maybe_parent:
path.add(maybe_parent)
maybe_parent = self.get_parent(maybe_parent)
return path

def is_ancestor(self, node: cst.CSTNode, other_node: cst.CSTNode) -> bool:
"""
Tests if other_node is an ancestor of node in the CST.
"""
path = self.path_to_root_as_set(node)
return other_node in path

def get_parent(self, node: cst.CSTNode) -> Optional[cst.CSTNode]:
"""
Retrieves the parent of node. Will return None for the root.
"""
try:
return self.get_metadata(ParentNodeProvider, node, None)
except Exception:
pass
return None

Check warning on line 296 in src/codemodder/codemods/utils_mixin.py

View check run for this annotation

Codecov / codecov/patch

src/codemodder/codemods/utils_mixin.py#L294-L296

Added lines #L294 - L296 were not covered by tests


def iterate_left_expressions(node: cst.BaseExpression):
yield node
Expand Down
4 changes: 4 additions & 0 deletions src/codemodder/scripts/generate_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,10 @@ class DocMetadata:
importance="Medium",
guidance_explained="Our change fixes explicitly insecure session configuration for a Flask application. However, there may be valid cases to use these insecure configurations, such as for testing or backward compatibility.",
),
"fix-file-resource-leak": DocMetadata(
importance="High",
guidance_explained="We believe this change is safe and will only close file resources that are not referenced outside of the with statement block.",
),
}


Expand Down
2 changes: 2 additions & 0 deletions src/core_codemods/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from .use_walrus_if import UseWalrusIf
from .with_threading_lock import WithThreadingLock
from .secure_flask_session_config import SecureFlaskSessionConfig
from .file_resource_leak import FileResourceLeak

registry = CodemodCollection(
origin="pixee",
Expand Down Expand Up @@ -62,5 +63,6 @@
WithThreadingLock,
SQLQueryParameterization,
SecureFlaskSessionConfig,
FileResourceLeak,
],
)
12 changes: 12 additions & 0 deletions src/core_codemods/docs/pixee_python_fix-file-resource-leak.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
This codemod wraps assignments of `open` calls in a with statement. Without explicit closing, these resources will be "leaked" and won't be re-claimed until garbage collection. In situations where these resources are leaked rapidly (either through malicious repetitive action or unusually spiky usage), connection pool or file handle exhaustion will occur. These types of failures tend to be catastrophic, resulting in downtime and many times affect downstream applications.

Our changes look something like this:

```diff
import tempfile
path = tempfile.NamedTemporaryFile().name
-file = open(path, 'w', encoding='utf-8')
-file.write('Hello World')
+with open(path, 'w', encoding='utf-8') as file:
+ file.write('Hello World')
```
Loading
Loading