diff --git a/integration_tests/test_use_set_literal.py b/integration_tests/test_use_set_literal.py new file mode 100644 index 00000000..031ed035 --- /dev/null +++ b/integration_tests/test_use_set_literal.py @@ -0,0 +1,29 @@ +from core_codemods.use_set_literal import UseSetLiteral +from integration_tests.base_test import ( + BaseIntegrationTest, + original_and_expected_from_code_path, +) + + +class TestUseSetLiteral(BaseIntegrationTest): + codemod = UseSetLiteral + code_path = "tests/samples/set_literal.py" + + original_code, expected_new_code = original_and_expected_from_code_path( + code_path, + [(0, "x = {1, 2, 3}\n"), (1, "y = set()\n")], + ) + + expected_diff = """\ +--- ++++ +@@ -1,2 +1,2 @@ +-x = set([1, 2, 3]) +-y = set([]) ++x = {1, 2, 3} ++y = set() +""" + + expected_line_change = "1" + num_changes = 2 + change_description = UseSetLiteral.CHANGE_DESCRIPTION diff --git a/src/codemodder/scripts/generate_docs.py b/src/codemodder/scripts/generate_docs.py index ff8f0d11..d86f2153 100644 --- a/src/codemodder/scripts/generate_docs.py +++ b/src/codemodder/scripts/generate_docs.py @@ -182,6 +182,10 @@ class DocMetadata: importance="High", guidance_explained="In most cases setting `shell=False` is correct and leads to much safer code. However there are valid use cases for `shell=True` when using shell functionality like pipes or wildcard is required. In such cases it is important to run only trusted, validated commands.", ), + "use-set-literal": DocMetadata( + importance="Low", + guidance_explained="We believe this change is safe and will not cause any issues.", + ), } diff --git a/src/core_codemods/__init__.py b/src/core_codemods/__init__.py index cceb8428..38a3e84c 100644 --- a/src/core_codemods/__init__.py +++ b/src/core_codemods/__init__.py @@ -27,6 +27,7 @@ from .url_sandbox import UrlSandbox from .use_defused_xml import UseDefusedXml from .use_generator import UseGenerator +from .use_set_literal import UseSetLiteral from .use_walrus_if import UseWalrusIf from .with_threading_lock import WithThreadingLock from .secure_flask_session_config import SecureFlaskSessionConfig @@ -72,6 +73,7 @@ UrlSandbox, UseDefusedXml, UseGenerator, + UseSetLiteral, UseWalrusIf, WithThreadingLock, SQLQueryParameterization, diff --git a/src/core_codemods/docs/pixee_python_use-set-literal.md b/src/core_codemods/docs/pixee_python_use-set-literal.md new file mode 100644 index 00000000..d5e27527 --- /dev/null +++ b/src/core_codemods/docs/pixee_python_use-set-literal.md @@ -0,0 +1,7 @@ +This codemod converts Python set constructions using literal list arguments into more efficient and readable set literals. It simplifies expressions like `set([1, 2, 3])` to `{1, 2, 3}`, enhancing both performance and code clarity. + +Our changes look like this: +```diff +-x = set([1, 2, 3]) ++x = {1, 2, 3} +``` diff --git a/src/core_codemods/use_set_literal.py b/src/core_codemods/use_set_literal.py new file mode 100644 index 00000000..f4ef023a --- /dev/null +++ b/src/core_codemods/use_set_literal.py @@ -0,0 +1,33 @@ +import libcst as cst + +from codemodder.codemods.api import BaseCodemod, ReviewGuidance +from codemodder.codemods.utils_mixin import NameResolutionMixin + + +class UseSetLiteral(BaseCodemod, NameResolutionMixin): + NAME = "use-set-literal" + SUMMARY = "Use Set Literals Instead of Sets from Lists" + REVIEW_GUIDANCE = ReviewGuidance.MERGE_WITHOUT_REVIEW + DESCRIPTION = "Replace sets from lists with set literals" + REFERENCES: list = [] + + def leave_Call(self, original_node: cst.Call, updated_node: cst.Call): + if not self.filter_by_path_includes_or_excludes( + self.node_position(original_node) + ): + return updated_node + + match original_node.func: + case cst.Name("set"): + if self.is_builtin_function(original_node): + match original_node.args: + case [cst.Arg(value=cst.List(elements=elements))]: + self.report_change(original_node) + + # Can't use set literal for empty set + if len(elements) == 0: + return updated_node.with_changes(args=[]) + + return cst.Set(elements=elements) + + return updated_node diff --git a/tests/codemods/test_use_set_literal.py b/tests/codemods/test_use_set_literal.py new file mode 100644 index 00000000..29ee31aa --- /dev/null +++ b/tests/codemods/test_use_set_literal.py @@ -0,0 +1,48 @@ +from core_codemods.use_set_literal import UseSetLiteral +from tests.codemods.base_codemod_test import BaseCodemodTest + + +class TestUseSetLiteral(BaseCodemodTest): + codemod = UseSetLiteral + + def test_simple(self, tmpdir): + original_code = """ + x = set([1, 2, 3]) + """ + expected_code = """ + x = {1, 2, 3} + """ + self.run_and_assert(tmpdir, original_code, expected_code) + assert self.file_context and len(self.file_context.codemod_changes) == 1 + + def test_empty_list(self, tmpdir): + original_code = """ + x = set([]) + """ + expected_code = """ + x = set() + """ + self.run_and_assert(tmpdir, original_code, expected_code) + assert self.file_context and len(self.file_context.codemod_changes) == 1 + + def test_already_empty(self, tmpdir): + original_code = """ + x = set() + """ + self.run_and_assert(tmpdir, original_code, original_code) + assert self.file_context and len(self.file_context.codemod_changes) == 0 + + def test_not_builtin(self, tmpdir): + original_code = """ + from whatever import set + x = set([1, 2, 3]) + """ + self.run_and_assert(tmpdir, original_code, original_code) + assert self.file_context and len(self.file_context.codemod_changes) == 0 + + def test_not_list_literal(self, tmpdir): + original_code = """ + x = set(some_previously_defined_list) + """ + self.run_and_assert(tmpdir, original_code, original_code) + assert self.file_context and len(self.file_context.codemod_changes) == 0 diff --git a/tests/samples/set_literal.py b/tests/samples/set_literal.py new file mode 100644 index 00000000..6592aec8 --- /dev/null +++ b/tests/samples/set_literal.py @@ -0,0 +1,2 @@ +x = set([1, 2, 3]) +y = set([])