pixee · clavedeluna · Oct 17, 2023 · Oct 17, 2023 · Oct 17, 2023 · Oct 19, 2023
@@ -67,19 +67,23 @@ def setup_method(self):
                 "You must register the codemod to a CodemodCollection."
             ) from exc
 
+    @property
+    def relative_code_path(self):
+        return os.path.relpath(self.code_path, SAMPLES_DIR)
+
     def _assert_run_fields(self, run, output_path):
         assert run["vendor"] == "pixee"
         assert run["tool"] == "codemodder-python"
         assert run["version"] == __VERSION__
         assert run["elapsed"] != ""
         assert (
             run["commandLine"]
-            == f"codemodder {SAMPLES_DIR} --output {output_path} --codemod-include={self.codemod_wrapper.name} --path-include={self.code_path}"
+            == f"codemodder {SAMPLES_DIR} --output {output_path} --codemod-include={self.codemod_wrapper.name} --path-include={self.relative_code_path}"
         )
         assert run["directory"] == os.path.abspath(SAMPLES_DIR)
         assert run["sarifs"] == []
 
-    def _assert_results_fields(self, results, output_path):
+    def _assert_results_fields(self, results):
         assert len(results) == 1
         result = results[0]
         assert result["codemod"] == self.codemod_wrapper.id
@@ -94,9 +98,11 @@ def _assert_results_fields(self, results, output_path):
         # A codemod may change multiple files. For now we will
         # assert the resulting data for one file only.
         change = [
-            result for result in result["changeset"] if result["path"] == output_path
+            result
+            for result in result["changeset"]
+            if result["path"] == self.relative_code_path
         ][0]
-        assert change["path"] == output_path
+        assert change["path"] == self.relative_code_path
         assert change["diff"] == self.expected_diff
 
         assert len(change["changes"]) == self.num_changes
@@ -114,10 +120,7 @@ def _assert_codetf_output(self):
         run = codetf["run"]
         self._assert_run_fields(run, self.output_path)
         results = codetf["results"]
-        # CodeTf2 spec requires relative paths
-        self._assert_results_fields(
-            results, os.path.relpath(self.code_path, SAMPLES_DIR)
-        )
+        self._assert_results_fields(results)
 
     def check_code_before(self):
         with open(self.code_path, "r", encoding="utf-8") as f:
@@ -145,7 +148,7 @@ def test_file_rewritten(self):
             "--output",
             self.output_path,
             f"--codemod-include={self.codemod_wrapper.name}",
-            f"--path-include={self.code_path}",
+            f"--path-include={self.relative_code_path}",
         ]
 
         self.check_code_before()

@@ -36,13 +36,26 @@ def file_line_patterns(file_path: str | Path, patterns: Sequence[str]):
     ]
 
 
-def filter_files(names: Sequence[str], patterns: Sequence[str], exclude: bool = False):
+def filter_files(
+    names: Sequence[str],
+    parent_path: str,
+    patterns: Sequence[str],
+    exclude: bool = False,
+):
     patterns = (
         [x.split(":")[0] for x in (patterns or [])]
         if not exclude
         # An excluded line should not cause the entire file to be excluded
         else [x for x in (patterns or []) if ":" not in x]
     )
+
+    # TODO: handle case when parent path is "."
+    patterns = [
+        str(Path(parent_path) / Path(pat))
+        if not pat.startswith("*")
+        else parent_path + pat
+        for pat in patterns
+    ]
     return itertools.chain(*[fnmatch.filter(names, pattern) for pattern in patterns])
 
 
@@ -65,15 +78,18 @@ def match_files(
     that match the criteria of both exclude and include patterns.
     """
     all_files = [str(path) for path in Path(parent_path).rglob("*")]
+
     included_files = set(
         filter_files(
             all_files,
+            str(parent_path),
             include_paths if include_paths is not None else DEFAULT_INCLUDED_PATHS,
         )
     )
     excluded_files = set(
         filter_files(
             all_files,
+            str(parent_path),
             exclude_paths if exclude_paths is not None else DEFAULT_EXCLUDED_PATHS,
             exclude=True,
         )

@@ -3,7 +3,6 @@
 import logging
 import os
 import sys
-from pathlib import Path
 
 import libcst as cst
 from libcst.codemod import CodemodContext
@@ -129,13 +128,7 @@ def run(original_args) -> int:
 
     log_section("startup")
     logger.info("codemodder: python/%s", __VERSION__)
-
-    context = CodemodExecutionContext(
-        Path(argv.directory),
-        argv.dry_run,
-        argv.verbose,
-        codemod_registry,
-    )
+    context = CodemodExecutionContext(argv, codemod_registry)
 
     # TODO: this should be a method of CodemodExecutionContext
     codemods_to_run = codemod_registry.match_codemods(

@@ -1,3 +1,4 @@
+import argparse
 import logging
 from pathlib import Path
 import itertools
@@ -21,14 +22,14 @@ class CodemodExecutionContext:  # pylint: disable=too-many-instance-attributes
 
     def __init__(
         self,
-        directory: Path,
-        dry_run: bool,
-        verbose: bool,
+        cli_args: argparse.Namespace,
         registry: CodemodRegistry,
     ):
-        self.directory = directory
-        self.dry_run = dry_run
-        self.verbose = verbose
+        self.directory = Path(cli_args.directory)
+        self.dry_run = cli_args.dry_run
+        self.verbose = cli_args.verbose
+        self.path_include: list = cli_args.path_include
+        self.path_exclude: list = cli_args.path_exclude
         self._results_by_codemod = {}
         self._failures_by_codemod = {}
         self.dependencies = {}

@@ -26,6 +26,24 @@ def run(execution_context: CodemodExecutionContext, yaml_files: List[Path]) -> d
             "-o",
             temp_sarif_file.name,
         ]
+
+        if execution_context.path_exclude:
+            command.extend(
+                itertools.chain.from_iterable(
+                    map(
+                        lambda f: ["--exclude", f"{execution_context.directory}{f}"],
+                        execution_context.path_exclude,
+                    )
+                )
+            )
+        if execution_context.path_include:
+            # Note: parent path is not passed with --include
+            command.extend(
+                itertools.chain.from_iterable(
+                    map(lambda f: ["--include", str(f)], execution_context.path_include)
+                )
+            )
+
         command.extend(
             itertools.chain.from_iterable(
                 map(lambda f: ["--config", str(f)], yaml_files)

@@ -1,15 +1,16 @@
 # pylint: disable=no-member,not-callable,attribute-defined-outside-init
+import argparse
 from collections import defaultdict
 import os
 from pathlib import Path
 from textwrap import dedent
 from typing import ClassVar
-
 import libcst as cst
 from libcst.codemod import CodemodContext
 import mock
 
 from codemodder.context import CodemodExecutionContext
+from codemodder.code_directory import DEFAULT_INCLUDED_PATHS, DEFAULT_EXCLUDED_PATHS
 from codemodder.file_context import FileContext
 from codemodder.registry import CodemodRegistry, CodemodCollection
 from codemodder.semgrep import run as semgrep_run
@@ -21,18 +22,23 @@ class BaseCodemodTest:
     def setup_method(self):
         self.file_context = None
 
+    def _make_context(self, root):
+        cli_args = argparse.Namespace(
+            directory=root,
+            dry_run=True,
+            verbose=False,
+            path_include=DEFAULT_INCLUDED_PATHS,
+            path_exclude=DEFAULT_EXCLUDED_PATHS,
+        )
+        return CodemodExecutionContext(cli_args, registry=mock.MagicMock())
+
     def run_and_assert(self, tmpdir, input_code, expected):
         tmp_file_path = tmpdir / "code.py"
         self.run_and_assert_filepath(tmpdir, tmp_file_path, input_code, expected)
 
     def run_and_assert_filepath(self, root, file_path, input_code, expected):
         input_tree = cst.parse_module(dedent(input_code))
-        self.execution_context = CodemodExecutionContext(
-            directory=root,
-            dry_run=True,
-            verbose=False,
-            registry=mock.MagicMock(),
-        )
+        self.execution_context = self._make_context(root)
         self.file_context = FileContext(
             file_path,
             [],
@@ -74,12 +80,7 @@ def results_by_id_filepath(self, input_code, file_path):
         return semgrep_run(self.execution_context, results[0].yaml_files)
 
     def run_and_assert_filepath(self, root, file_path, input_code, expected):
-        self.execution_context = CodemodExecutionContext(
-            directory=root,
-            dry_run=True,
-            verbose=False,
-            registry=mock.MagicMock(),
-        )
+        self.execution_context = self._make_context(root)
         input_tree = cst.parse_module(input_code)
         all_results = self.results_by_id_filepath(input_code, file_path)
         results = all_results[str(file_path)]

@@ -23,7 +23,11 @@ def dir_structure(tmp_path_factory):
     (tests_dir / "test_make_request.py").touch()
     (tests_dir / "test_insecure_random.py").touch()
 
-    assert len(list(base_dir.rglob("*"))) == 9
+    sub_tests_dir = tests_dir / "tests"
+    sub_tests_dir.mkdir()
+    (sub_tests_dir / "something.py").touch()
+
+    assert len(list(base_dir.rglob("*"))) == 11
 
     return base_dir
 
@@ -35,11 +39,18 @@ def _assert_expected(self, result_files, expected_files):
         file_names.sort()
         assert file_names == expected_files
 
-    def test_all_py_files_match(self, dir_structure):
+    def test_all_py_files_match_except_tests_dir(self, dir_structure):
         expected = ["empty_for_testing.py", "insecure_random.py", "make_request.py"]
         files = match_files(dir_structure)
         self._assert_expected(files, expected)
 
+    def test_tests_not_excluded(self, dir_structure):
+        expected = ["test_insecure_random.py", "test_make_request.py"]
+        # anything in foo/tests will be analyzed but anything in
+        # foo/tests/tests will not be analyzed by default
+        files = match_files(dir_structure / "tests")
+        self._assert_expected(files, expected)
+
     def test_match_excluded(self, dir_structure):
         expected = ["empty_for_testing.py", "insecure_random.py"]
         files = match_files(dir_structure, ["**/tests/**", "*request.py"])
@@ -102,40 +113,36 @@ def test_match_excluded_precedence_over_included(self, dir_structure):
         self._assert_expected(files, expected)
 
     def test_test_directory_not_excluded(self, dir_structure):
-        expected = ["test_insecure_random.py", "test_make_request.py"]
+        expected = ["something.py", "test_insecure_random.py", "test_make_request.py"]
         files = match_files(
             dir_structure, exclude_paths=["**/samples/**", "**/more_samples/**"]
         )
         self._assert_expected(files, expected)
 
-    def test_include_test_overridden_by_default_excludes(self, mocker):
-        mocker.patch(
-            "codemodder.code_directory.Path.rglob",
-            return_value=[
-                "foo/tests/test_insecure_random.py",
-                "foo/tests/test_make_request.py",
-            ],
-        )
-        mocker.patch(
-            "codemodder.code_directory.Path.is_file",
-            return_value=True,
-        )
-        files = match_files(Path("."), include_paths=["**/tests/**"])
+    def test_include_test_overridden_by_default_excludes(self, dir_structure):
+        files = match_files(dir_structure, include_paths=["**/tests/**"])
         self._assert_expected(files, [])
 
-    def test_include_test_without_default_includes(self, mocker):
-        files = ["foo/tests/test_insecure_random.py", "foo/tests/test_make_request.py"]
-        mocker.patch(
-            "codemodder.code_directory.Path.rglob",
-            return_value=files,
-        )
-        mocker.patch(
-            "codemodder.code_directory.Path.is_file",
-            return_value=True,
-        )
-        result = match_files(Path("."), exclude_paths=[])
-        assert result == [Path(x) for x in files]
+    def test_include_test_without_default_excludes(self, dir_structure):
+        expected = [
+            "empty_for_testing.py",
+            "insecure_random.py",
+            "make_request.py",
+            "something.py",
+            "test_insecure_random.py",
+            "test_make_request.py",
+        ]
+        files = match_files(dir_structure, exclude_paths=[])
+        self._assert_expected(files, expected)
 
     def test_extract_line_from_pattern(self):
         lines = file_line_patterns(Path("insecure_random.py"), ["insecure_*.py:3"])
         assert lines == [3]
+
+    def test_include_specific_file(self, dir_structure):
+        expected = ["empty_for_testing.py"]
+        files = match_files(
+            dir_structure / "samples" / "more_samples",
+            include_paths=["empty_for_testing.py"],
+        )
+        self._assert_expected(files, expected)