Skip to content

Commit

Permalink
Ignore tests and other files by default
Browse files Browse the repository at this point in the history
  • Loading branch information
drdavella committed Oct 17, 2023
1 parent 21ae8b2 commit 722f9c7
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 62 deletions.
7 changes: 4 additions & 3 deletions src/codemodder/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import sys

from codemodder import __VERSION__
from codemodder.code_directory import DEFAULT_INCLUDED_PATHS, DEFAULT_EXCLUDED_PATHS
from codemodder.logging import OutputFormat, logger


Expand Down Expand Up @@ -43,7 +44,7 @@ class CsvListAction(argparse.Action):
argparse Action to convert "a,b,c" into ["a", "b", "c"]
"""

def __call__(self, parser, namespace, values: str, option_string=None):
def __call__(self, parser, namespace, values, option_string=None):
# Conversion to dict removes duplicates while preserving order
items = list(dict.fromkeys(values.split(",")).keys())
self.validate_items(items)
Expand Down Expand Up @@ -144,13 +145,13 @@ def parse_args(argv, codemod_registry):
parser.add_argument(
"--path-exclude",
action=CsvListAction,
default="",
default=DEFAULT_EXCLUDED_PATHS,
help="Comma-separated set of UNIX glob patterns to exclude",
)
parser.add_argument(
"--path-include",
action=CsvListAction,
default="**/*.py",
default=DEFAULT_INCLUDED_PATHS,
help="Comma-separated set of UNIX glob patterns to include",
)

Expand Down
109 changes: 63 additions & 46 deletions src/codemodder/code_directory.py
Original file line number Diff line number Diff line change
@@ -1,65 +1,82 @@
from pathlib import Path


DEFAULT_INCLUDE = "*.py"


def py_file_matches(file_path, patterns):
"""
file_path is suffixed with `.py` and matches at least one pattern.
"""
is_py_file = file_path.match(DEFAULT_INCLUDE)
if not patterns:
return is_py_file
return is_py_file and any(
file_path.match(pattern.rsplit(":")[0]) for pattern in patterns
)
from typing import Optional, Sequence

import fnmatch
import itertools
from pathlib import Path

def file_line_patterns(file_path, patterns):
DEFAULT_INCLUDED_PATHS = ["**.py", "**/*.py"]
DEFAULT_EXCLUDED_PATHS = [
"**/test/**",
"**/tests/**",
"**/test_*.py",
"**/tests_*.py",
"**/conftest.py",
"**/build/**",
"**/dist/**",
"**/venv/**",
"**/.venv/**",
"**/.tox/**",
"**/.nox/**",
"**/.eggs/**",
"**/.git/**",
"**/.mypy_cache/**",
"**/.pytest_cache/**",
"**/.hypothesis/**",
"**/.coverage*",
]


def file_line_patterns(file_path: str | Path, patterns: Sequence[str]):
"""
Find the lines included or excluded for a given file_path among the patterns
"""
lines = []
for pattern in patterns or []:
split = pattern.rsplit(":")
if len(split) == 2:
if file_path.match(split[0]):
lines.append(int(split[1]))
return lines
return [
int(split[1])
for pat in patterns
if (split := pat.split(":"))
and len(split) == 2
and fnmatch.fnmatch(str(file_path), split[0])
]


def filter_files(names: Sequence[str], patterns: Sequence[str], exclude: bool = False):
patterns = (
[x.split(":")[0] for x in (patterns or [])]
if not exclude
# An excluded line should not cause the entire file to be excluded
else [x for x in (patterns or []) if ":" not in x]
)
return itertools.chain(*[fnmatch.filter(names, pattern) for pattern in patterns])


def match_files(parent_path, exclude_paths=None, include_paths=None):
def match_files(
parent_path: str | Path,
exclude_paths: Optional[Sequence[str]] = None,
include_paths: Optional[Sequence[str]] = None,
):
"""
Find pattern-matching files starting at the parent_path, recursively.
If a file matches any exclude pattern, it is not matched. If any include
patterns are passed in, a file must match `*.py` and at least one include patterns.
:param parent_path: str name for starting directory
:param exclude_paths: comma-separated set of UNIX glob patterns to exclude
:param include_paths: comma-separated set of UNIX glob patterns to exclude
:param exclude_paths: list of UNIX glob patterns to exclude
:param include_paths: list of UNIX glob patterns to exclude
:return: list of <pathlib.PosixPath> files found within (including recursively) the parent directory
that match the criteria of both exclude and include patterns.
"""
exclude_paths = list(exclude_paths) if exclude_paths is not None else []
include_paths = list(include_paths) if include_paths is not None else []

matched_files = []

for file_path in Path(parent_path).rglob("*"):
if file_path.is_file():
# Exclude patterns take precedence over include patterns.
if exclude_file(file_path, exclude_paths):
continue

if py_file_matches(file_path, include_paths):
matched_files.append(file_path)

return matched_files

all_files = [str(path) for path in Path(parent_path).rglob("*")]
included_files = set(
filter_files(all_files, include_paths or DEFAULT_INCLUDED_PATHS)
)
excluded_files = set(
filter_files(all_files, exclude_paths or DEFAULT_EXCLUDED_PATHS, exclude=True)
)

def exclude_file(file_path, exclude_patterns):
# if a file matches any excluded pattern, return True so it is not included for analysis
return any(file_path.match(exclude) for exclude in exclude_patterns)
return [
path
for p in sorted(list(included_files - excluded_files))
if (path := Path(p)).is_file() and path.suffix == ".py"
]
42 changes: 29 additions & 13 deletions tests/test_code_directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@

@pytest.fixture(scope="module")
def dir_structure(tmp_path_factory):
tests_dir = tmp_path_factory.mktemp("tests")
samples_dir = tests_dir / "samples"
base_dir = tmp_path_factory.mktemp("foo")
samples_dir = base_dir / "samples"
samples_dir.mkdir()
(samples_dir / "make_request.py").touch()
(samples_dir / "insecure_random.py").touch()
Expand All @@ -18,9 +18,14 @@ def dir_structure(tmp_path_factory):
(more_samples_dir / "empty_for_testing.py").touch()
(more_samples_dir / "empty_for_testing.txt").touch()

assert len(list(tests_dir.rglob("*"))) == 6
tests_dir = base_dir / "tests"
tests_dir.mkdir()
(tests_dir / "test_make_request.py").touch()
(tests_dir / "test_insecure_random.py").touch()

return tests_dir
assert len(list(base_dir.rglob("*"))) == 9

return base_dir


class TestMatchFiles:
Expand All @@ -37,45 +42,49 @@ def test_all_py_files_match(self, dir_structure):

def test_match_excluded(self, dir_structure):
expected = ["empty_for_testing.py", "insecure_random.py"]
files = match_files(dir_structure, ["*request.py"])
files = match_files(dir_structure, ["**/tests/**", "*request.py"])
self._assert_expected(files, expected)

def test_match_included_file_with_line(self, dir_structure):
expected = ["insecure_random.py"]
files = match_files(dir_structure, include_paths=["insecure_random.py:2"])
files = match_files(dir_structure, include_paths=["**/insecure_random.py:2"])
self._assert_expected(files, expected)

def test_match_excluded_line(self, dir_structure):
expected = ["empty_for_testing.py", "insecure_random.py", "make_request.py"]
files = match_files(dir_structure, exclude_paths=["insecure_random.py:2"])
files = match_files(
dir_structure, exclude_paths=["**/tests/**", "**/insecure_random.py:2"]
)
self._assert_expected(files, expected)

def test_match_included_line_and_glob(self, dir_structure):
expected = ["insecure_random.py"]
files = match_files(dir_structure, include_paths=["insecure*.py:3"])
files = match_files(dir_structure, include_paths=["**/insecure*.py:3"])
self._assert_expected(files, expected)

def test_match_excluded_line_and_glob(self, dir_structure):
expected = ["empty_for_testing.py", "insecure_random.py", "make_request.py"]
files = match_files(dir_structure, exclude_paths=["insecure*.py:3"])
files = match_files(
dir_structure, exclude_paths=["**/tests/**", "**/insecure*.py:3"]
)
self._assert_expected(files, expected)

def test_match_excluded_dir_incorrect_glob(self, dir_structure):
incorrect_glob = "more_samples"
expected = ["empty_for_testing.py", "insecure_random.py", "make_request.py"]
files = match_files(dir_structure, [incorrect_glob])
files = match_files(dir_structure, ["**/tests/**", incorrect_glob])
self._assert_expected(files, expected)

def test_match_excluded_dir_correct_glob(self, dir_structure):
correct_globs = ["**/more_samples/**", "*/more_samples/*"]
for correct_glob in correct_globs:
expected = ["insecure_random.py", "make_request.py"]
files = match_files(dir_structure, [correct_glob])
files = match_files(dir_structure, ["**/tests/**", correct_glob])
self._assert_expected(files, expected)

def test_match_excluded_multiple(self, dir_structure):
expected = ["insecure_random.py"]
files = match_files(dir_structure, ["*request.py", "*empty*"])
files = match_files(dir_structure, ["**/tests/**", "*request.py", "*empty*"])
self._assert_expected(files, expected)

def test_match_included(self, dir_structure):
Expand All @@ -87,11 +96,18 @@ def test_match_excluded_precedence_over_included(self, dir_structure):
expected = ["empty_for_testing.py", "insecure_random.py"]
files = match_files(
dir_structure,
exclude_paths=["*request.py"],
exclude_paths=["**/tests/**", "*request.py"],
include_paths=["*request.py", "*empty*.py", "*random.py"],
)
self._assert_expected(files, expected)

def test_test_directory_not_excluded(self, dir_structure):
expected = ["test_insecure_random.py", "test_make_request.py"]
files = match_files(
dir_structure, exclude_paths=["**/samples/**", "**/more_samples/**"]
)
self._assert_expected(files, expected)

def test_extract_line_from_pattern(self):
lines = file_line_patterns(Path("insecure_random.py"), ["insecure_*.py:3"])
assert lines == [3]

0 comments on commit 722f9c7

Please sign in to comment.