diff --git a/integration_tests/base_test.py b/integration_tests/base_test.py index 21ed2e0f..b41326d3 100644 --- a/integration_tests/base_test.py +++ b/integration_tests/base_test.py @@ -67,6 +67,10 @@ def setup_method(self): "You must register the codemod to a CodemodCollection." ) from exc + @property + def relative_code_path(self): + return os.path.relpath(self.code_path, SAMPLES_DIR) + def _assert_run_fields(self, run, output_path): assert run["vendor"] == "pixee" assert run["tool"] == "codemodder-python" @@ -74,12 +78,12 @@ def _assert_run_fields(self, run, output_path): assert run["elapsed"] != "" assert ( run["commandLine"] - == f"codemodder {SAMPLES_DIR} --output {output_path} --codemod-include={self.codemod_wrapper.name} --path-include={self.code_path}" + == f"codemodder {SAMPLES_DIR} --output {output_path} --codemod-include={self.codemod_wrapper.name} --path-include={self.relative_code_path}" ) assert run["directory"] == os.path.abspath(SAMPLES_DIR) assert run["sarifs"] == [] - def _assert_results_fields(self, results, output_path): + def _assert_results_fields(self, results): assert len(results) == 1 result = results[0] assert result["codemod"] == self.codemod_wrapper.id @@ -94,9 +98,11 @@ def _assert_results_fields(self, results, output_path): # A codemod may change multiple files. For now we will # assert the resulting data for one file only. change = [ - result for result in result["changeset"] if result["path"] == output_path + result + for result in result["changeset"] + if result["path"] == self.relative_code_path ][0] - assert change["path"] == output_path + assert change["path"] == self.relative_code_path assert change["diff"] == self.expected_diff assert len(change["changes"]) == self.num_changes @@ -114,10 +120,7 @@ def _assert_codetf_output(self): run = codetf["run"] self._assert_run_fields(run, self.output_path) results = codetf["results"] - # CodeTf2 spec requires relative paths - self._assert_results_fields( - results, os.path.relpath(self.code_path, SAMPLES_DIR) - ) + self._assert_results_fields(results) def check_code_before(self): with open(self.code_path, "r", encoding="utf-8") as f: @@ -145,7 +148,7 @@ def test_file_rewritten(self): "--output", self.output_path, f"--codemod-include={self.codemod_wrapper.name}", - f"--path-include={self.code_path}", + f"--path-include={self.relative_code_path}", ] self.check_code_before() diff --git a/src/codemodder/code_directory.py b/src/codemodder/code_directory.py index 7e132340..d0a06cc5 100644 --- a/src/codemodder/code_directory.py +++ b/src/codemodder/code_directory.py @@ -36,13 +36,26 @@ def file_line_patterns(file_path: str | Path, patterns: Sequence[str]): ] -def filter_files(names: Sequence[str], patterns: Sequence[str], exclude: bool = False): +def filter_files( + names: Sequence[str], + parent_path: str, + patterns: Sequence[str], + exclude: bool = False, +): patterns = ( [x.split(":")[0] for x in (patterns or [])] if not exclude # An excluded line should not cause the entire file to be excluded else [x for x in (patterns or []) if ":" not in x] ) + + # TODO: handle case when parent path is "." + patterns = [ + str(Path(parent_path) / Path(pat)) + if not pat.startswith("*") + else parent_path + pat + for pat in patterns + ] return itertools.chain(*[fnmatch.filter(names, pattern) for pattern in patterns]) @@ -65,15 +78,18 @@ def match_files( that match the criteria of both exclude and include patterns. """ all_files = [str(path) for path in Path(parent_path).rglob("*")] + included_files = set( filter_files( all_files, + str(parent_path), include_paths if include_paths is not None else DEFAULT_INCLUDED_PATHS, ) ) excluded_files = set( filter_files( all_files, + str(parent_path), exclude_paths if exclude_paths is not None else DEFAULT_EXCLUDED_PATHS, exclude=True, ) diff --git a/src/codemodder/codemodder.py b/src/codemodder/codemodder.py index 69603d3a..5eed3680 100644 --- a/src/codemodder/codemodder.py +++ b/src/codemodder/codemodder.py @@ -3,7 +3,6 @@ import logging import os import sys -from pathlib import Path import libcst as cst from libcst.codemod import CodemodContext @@ -129,13 +128,7 @@ def run(original_args) -> int: log_section("startup") logger.info("codemodder: python/%s", __VERSION__) - - context = CodemodExecutionContext( - Path(argv.directory), - argv.dry_run, - argv.verbose, - codemod_registry, - ) + context = CodemodExecutionContext(argv, codemod_registry) # TODO: this should be a method of CodemodExecutionContext codemods_to_run = codemod_registry.match_codemods( diff --git a/src/codemodder/context.py b/src/codemodder/context.py index 9b88e8a7..06524b1d 100644 --- a/src/codemodder/context.py +++ b/src/codemodder/context.py @@ -1,3 +1,4 @@ +import argparse import logging from pathlib import Path import itertools @@ -21,14 +22,14 @@ class CodemodExecutionContext: # pylint: disable=too-many-instance-attributes def __init__( self, - directory: Path, - dry_run: bool, - verbose: bool, + cli_args: argparse.Namespace, registry: CodemodRegistry, ): - self.directory = directory - self.dry_run = dry_run - self.verbose = verbose + self.directory = Path(cli_args.directory) + self.dry_run = cli_args.dry_run + self.verbose = cli_args.verbose + self.path_include: list = cli_args.path_include + self.path_exclude: list = cli_args.path_exclude self._results_by_codemod = {} self._failures_by_codemod = {} self.dependencies = {} diff --git a/src/codemodder/semgrep.py b/src/codemodder/semgrep.py index 59493d04..78f6297e 100644 --- a/src/codemodder/semgrep.py +++ b/src/codemodder/semgrep.py @@ -26,6 +26,24 @@ def run(execution_context: CodemodExecutionContext, yaml_files: List[Path]) -> d "-o", temp_sarif_file.name, ] + + if execution_context.path_exclude: + command.extend( + itertools.chain.from_iterable( + map( + lambda f: ["--exclude", f"{execution_context.directory}{f}"], + execution_context.path_exclude, + ) + ) + ) + if execution_context.path_include: + # Note: parent path is not passed with --include + command.extend( + itertools.chain.from_iterable( + map(lambda f: ["--include", str(f)], execution_context.path_include) + ) + ) + command.extend( itertools.chain.from_iterable( map(lambda f: ["--config", str(f)], yaml_files) diff --git a/tests/codemods/base_codemod_test.py b/tests/codemods/base_codemod_test.py index c35cd4cc..ca090c99 100644 --- a/tests/codemods/base_codemod_test.py +++ b/tests/codemods/base_codemod_test.py @@ -1,15 +1,16 @@ # pylint: disable=no-member,not-callable,attribute-defined-outside-init +import argparse from collections import defaultdict import os from pathlib import Path from textwrap import dedent from typing import ClassVar - import libcst as cst from libcst.codemod import CodemodContext import mock from codemodder.context import CodemodExecutionContext +from codemodder.code_directory import DEFAULT_INCLUDED_PATHS, DEFAULT_EXCLUDED_PATHS from codemodder.file_context import FileContext from codemodder.registry import CodemodRegistry, CodemodCollection from codemodder.semgrep import run as semgrep_run @@ -21,18 +22,23 @@ class BaseCodemodTest: def setup_method(self): self.file_context = None + def _make_context(self, root): + cli_args = argparse.Namespace( + directory=root, + dry_run=True, + verbose=False, + path_include=DEFAULT_INCLUDED_PATHS, + path_exclude=DEFAULT_EXCLUDED_PATHS, + ) + return CodemodExecutionContext(cli_args, registry=mock.MagicMock()) + def run_and_assert(self, tmpdir, input_code, expected): tmp_file_path = tmpdir / "code.py" self.run_and_assert_filepath(tmpdir, tmp_file_path, input_code, expected) def run_and_assert_filepath(self, root, file_path, input_code, expected): input_tree = cst.parse_module(dedent(input_code)) - self.execution_context = CodemodExecutionContext( - directory=root, - dry_run=True, - verbose=False, - registry=mock.MagicMock(), - ) + self.execution_context = self._make_context(root) self.file_context = FileContext( file_path, [], @@ -74,12 +80,7 @@ def results_by_id_filepath(self, input_code, file_path): return semgrep_run(self.execution_context, results[0].yaml_files) def run_and_assert_filepath(self, root, file_path, input_code, expected): - self.execution_context = CodemodExecutionContext( - directory=root, - dry_run=True, - verbose=False, - registry=mock.MagicMock(), - ) + self.execution_context = self._make_context(root) input_tree = cst.parse_module(input_code) all_results = self.results_by_id_filepath(input_code, file_path) results = all_results[str(file_path)] diff --git a/tests/test_code_directory.py b/tests/test_code_directory.py index 17c34b2d..debc55b1 100644 --- a/tests/test_code_directory.py +++ b/tests/test_code_directory.py @@ -23,7 +23,11 @@ def dir_structure(tmp_path_factory): (tests_dir / "test_make_request.py").touch() (tests_dir / "test_insecure_random.py").touch() - assert len(list(base_dir.rglob("*"))) == 9 + sub_tests_dir = tests_dir / "tests" + sub_tests_dir.mkdir() + (sub_tests_dir / "something.py").touch() + + assert len(list(base_dir.rglob("*"))) == 11 return base_dir @@ -35,11 +39,18 @@ def _assert_expected(self, result_files, expected_files): file_names.sort() assert file_names == expected_files - def test_all_py_files_match(self, dir_structure): + def test_all_py_files_match_except_tests_dir(self, dir_structure): expected = ["empty_for_testing.py", "insecure_random.py", "make_request.py"] files = match_files(dir_structure) self._assert_expected(files, expected) + def test_tests_not_excluded(self, dir_structure): + expected = ["test_insecure_random.py", "test_make_request.py"] + # anything in foo/tests will be analyzed but anything in + # foo/tests/tests will not be analyzed by default + files = match_files(dir_structure / "tests") + self._assert_expected(files, expected) + def test_match_excluded(self, dir_structure): expected = ["empty_for_testing.py", "insecure_random.py"] files = match_files(dir_structure, ["**/tests/**", "*request.py"]) @@ -102,40 +113,36 @@ def test_match_excluded_precedence_over_included(self, dir_structure): self._assert_expected(files, expected) def test_test_directory_not_excluded(self, dir_structure): - expected = ["test_insecure_random.py", "test_make_request.py"] + expected = ["something.py", "test_insecure_random.py", "test_make_request.py"] files = match_files( dir_structure, exclude_paths=["**/samples/**", "**/more_samples/**"] ) self._assert_expected(files, expected) - def test_include_test_overridden_by_default_excludes(self, mocker): - mocker.patch( - "codemodder.code_directory.Path.rglob", - return_value=[ - "foo/tests/test_insecure_random.py", - "foo/tests/test_make_request.py", - ], - ) - mocker.patch( - "codemodder.code_directory.Path.is_file", - return_value=True, - ) - files = match_files(Path("."), include_paths=["**/tests/**"]) + def test_include_test_overridden_by_default_excludes(self, dir_structure): + files = match_files(dir_structure, include_paths=["**/tests/**"]) self._assert_expected(files, []) - def test_include_test_without_default_includes(self, mocker): - files = ["foo/tests/test_insecure_random.py", "foo/tests/test_make_request.py"] - mocker.patch( - "codemodder.code_directory.Path.rglob", - return_value=files, - ) - mocker.patch( - "codemodder.code_directory.Path.is_file", - return_value=True, - ) - result = match_files(Path("."), exclude_paths=[]) - assert result == [Path(x) for x in files] + def test_include_test_without_default_excludes(self, dir_structure): + expected = [ + "empty_for_testing.py", + "insecure_random.py", + "make_request.py", + "something.py", + "test_insecure_random.py", + "test_make_request.py", + ] + files = match_files(dir_structure, exclude_paths=[]) + self._assert_expected(files, expected) def test_extract_line_from_pattern(self): lines = file_line_patterns(Path("insecure_random.py"), ["insecure_*.py:3"]) assert lines == [3] + + def test_include_specific_file(self, dir_structure): + expected = ["empty_for_testing.py"] + files = match_files( + dir_structure / "samples" / "more_samples", + include_paths=["empty_for_testing.py"], + ) + self._assert_expected(files, expected)