diff --git a/src/codemodder/codemods/utils.py b/src/codemodder/codemods/utils.py index f547cc09..2e9d5e1d 100644 --- a/src/codemodder/codemods/utils.py +++ b/src/codemodder/codemods/utils.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Union +from typing import Optional, Union from libcst import matchers import libcst as cst @@ -44,3 +44,12 @@ def get_call_name(call: cst.Call) -> str: return call.func.attr.value # It's a simple Name return call.func.value + + +def get_function_name_node(call: cst.Call) -> Optional[cst.Name]: + match call.func: + case cst.Name(): + return call.func + case cst.Attribute(): + return call.func.attr + return None diff --git a/src/core_codemods/docs/pixee_python_sql-parameterization.md b/src/core_codemods/docs/pixee_python_sql-parameterization.md index 846d0147..b3c8603f 100644 --- a/src/core_codemods/docs/pixee_python_sql-parameterization.md +++ b/src/core_codemods/docs/pixee_python_sql-parameterization.md @@ -13,11 +13,3 @@ cursor = connection.cursor() - cursor.execute("SELECT * from USERS WHERE name ='" + name + "'") + cursor.execute("SELECT * from USERS WHERE name =?", (name, )) ``` - -If you have feedback on this codemod, [please let us know](mailto:feedback@pixee.ai)! - -## F.A.Q. - -### Why is this codemod marked as Merge With Cursory Review - -Python has a wealth of database drivers that all use the same interface. Different drivers may require different string tokens used for parameterization, and Python's dynamic typing makes it quite hard, and sometimes impossible, to detect which driver is being used just by looking at the code. diff --git a/src/core_codemods/sql_parameterization.py b/src/core_codemods/sql_parameterization.py index 5c25028b..c4bd7f40 100644 --- a/src/core_codemods/sql_parameterization.py +++ b/src/core_codemods/sql_parameterization.py @@ -20,6 +20,7 @@ from codemodder.codemods.transformations.remove_empty_string_concatenation import ( RemoveEmptyStringConcatenation, ) +from codemodder.codemods.utils import get_function_name_node from codemodder.codemods.utils_mixin import NameResolutionMixin parameter_token = "?" @@ -29,20 +30,22 @@ literal = literal_number | literal_string -class Append: +class SequenceExtension: def __init__(self, sequence: list[cst.CSTNode]) -> None: self.sequence = sequence -class Prepend: - def __init__(self, sequence: list[cst.CSTNode]) -> None: - self.sequence = sequence +class Append(SequenceExtension): + pass + + +class Prepend(SequenceExtension): + pass class ReplaceNodes(cst.CSTTransformer): """ - Replace nodes with their corresponding values in a given dict. - You can replace the entire node or some attributes of it via a dict(). Addionally if the attribute is a sequence, you may pass Append(l)/Prepend(l), where l is a list of nodes, to append or prepend, respectivelly. + Replace nodes with their corresponding values in a given dict. The replacements dictionary should either contain a mapping from a node to another node to be replaced, or a dict mapping each attribute, by name, to a new value. Additionally if the attribute is a sequence, you may pass Append(l)/Prepend(l), where l is a list of nodes, to append or prepend, respectively. """ def __init__( @@ -80,8 +83,9 @@ def on_leave(self, original_node, updated_node): class SQLQueryParameterization(BaseCodemod, Codemod): + SUMMARY = "Parameterize SQL queries." METADATA = CodemodMetadata( - DESCRIPTION=("Parameterize SQL queries."), + DESCRIPTION=SUMMARY, NAME="sql-parameterization", REVIEW_GUIDANCE=ReviewGuidance.MERGE_AFTER_CURSORY_REVIEW, REFERENCES=[ @@ -95,19 +99,13 @@ class SQLQueryParameterization(BaseCodemod, Codemod): }, ], ) - SUMMARY = "Parameterize SQL queries." - CHANGE_DESCRIPTION = "" + CHANGE_DESCRIPTION = "Parameterized SQL query execution." METADATA_DEPENDENCIES = ( PositionProvider, ScopeProvider, ParentNodeProvider, ) - METADATA_DEPENDENCIES = ( - ScopeProvider, - ParentNodeProvider, - PositionProvider, - ) def __init__(self, context: CodemodContext, *codemod_args) -> None: self.changed_nodes: dict[ @@ -175,9 +173,6 @@ def transform_module_impl(self, tree: cst.Module) -> cst.Module: return result - # Replace node entirelly -> CSTNode - # update some attribute -> dict[str,any] - # apend or prepend an argument -> Append([]) / Preprend([]) def _fix_injection( self, start: cst.CSTNode, middle: list[cst.CSTNode], end: cst.CSTNode ): @@ -352,7 +347,7 @@ def leave_Module(self, original_node: cst.Module): end = leaves.pop() if any(map(self._is_injectable, middle)): self.injection_patterns.append((start, middle, end)) - # end may contain the start of anothe literal, put it back + # end may contain the start of another literal, put it back # should not be a single quote # TODO think of a better solution here @@ -458,7 +453,7 @@ def _has_keyword(self, string: str) -> bool: return False def leave_Call(self, original_node: cst.Call) -> None: - maybe_call_name = _get_function_name_node(original_node) + maybe_call_name = get_function_name_node(original_node) if maybe_call_name and maybe_call_name.value == "execute": # TODO don't parameterize if there are parameters already # may be temporary until I figure out if named parameter will work on most drivers @@ -473,12 +468,3 @@ def leave_Call(self, original_node: cst.Call) -> None: expr.value ): self.calls[original_node] = query_visitor.leaves - - -def _get_function_name_node(call: cst.Call) -> Optional[cst.Name]: - match call.func: - case cst.Name(): - return call.func - case cst.Attribute(): - return call.func.attr - return None diff --git a/src/scripts/generate_docs.py b/src/scripts/generate_docs.py index 7861c946..3d4d4777 100644 --- a/src/scripts/generate_docs.py +++ b/src/scripts/generate_docs.py @@ -118,6 +118,10 @@ class DocMetadata: importance="Low", guidance_explained="We believe this replacement is safe and should not result in any issues.", ), + "sql-parameterization": DocMetadata( + importance="High", + guidance_explained="Python has a wealth of database drivers that all use the same `dbapi2` interface detailed in [PEP249](https://peps.python.org/pep-0249/). Different drivers may require different string tokens used for parameterization, and Python's dynamic typing makes it quite hard, and sometimes impossible, to detect which driver is being used just by looking at the code.", + ), } diff --git a/tests/codemods/test_sql_parameterization.py b/tests/codemods/test_sql_parameterization.py index a972c676..425b16c7 100644 --- a/tests/codemods/test_sql_parameterization.py +++ b/tests/codemods/test_sql_parameterization.py @@ -235,6 +235,7 @@ def foo(self, cursor, name, phone): assert len(self.file_context.codemod_changes) == 0 def test_wont_change_class_attribute(self, tmpdir): + # query may be accesed from outside the module by importing A input_code = """\ import sqlite3 @@ -250,6 +251,7 @@ def foo(self, name, cursor): assert len(self.file_context.codemod_changes) == 0 def test_wont_change_module_variable(self, tmpdir): + # query may be accesed from outside the module by importing it input_code = """\ import sqlite3 diff --git a/tests/transformations/test_remove_empty_string_concatenation.py b/tests/transformations/test_remove_empty_string_concatenation.py index cfc6d631..22de7a5b 100644 --- a/tests/transformations/test_remove_empty_string_concatenation.py +++ b/tests/transformations/test_remove_empty_string_concatenation.py @@ -47,6 +47,19 @@ def test_both(self): self.assertCodemod(before, after) + def test_multiple(self): + before = """ + "" + "whatever" + "" + "hello" + "" + "world" + """ + + after = """ + "whatever" + "hello" + "world" + """ + + self.assertCodemod(before, after) + def test_concatenated_string_right(self): before = """ "hello" "" @@ -69,6 +82,19 @@ def test_concatenated_string_left(self): self.assertCodemod(before, after) + def test_concatenated_string_multiple(self): + before = """ + "" "whatever" "" + "hello" "" "world" + """ + + after = """ + "whatever" + "hello" "world" + """ + + self.assertCodemod(before, after) + def test_multiple_mixed(self): before = ( """