-
Notifications
You must be signed in to change notification settings - Fork 10
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Pass include/exclude args to semgrep run #80
Changes from all commits
482c5ec
6279bc0
2b8c7c5
71d9d69
396ee70
3ce7a3b
1817467
791a81b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,13 +36,26 @@ def file_line_patterns(file_path: str | Path, patterns: Sequence[str]): | |
] | ||
|
||
|
||
def filter_files(names: Sequence[str], patterns: Sequence[str], exclude: bool = False): | ||
def filter_files( | ||
names: Sequence[str], | ||
parent_path: str, | ||
patterns: Sequence[str], | ||
exclude: bool = False, | ||
): | ||
patterns = ( | ||
[x.split(":")[0] for x in (patterns or [])] | ||
if not exclude | ||
# An excluded line should not cause the entire file to be excluded | ||
else [x for x in (patterns or []) if ":" not in x] | ||
) | ||
|
||
# TODO: handle case when parent path is "." | ||
patterns = [ | ||
str(Path(parent_path) / Path(pat)) | ||
if not pat.startswith("*") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This condition doesn't make sense to me. Do tests fail without it? This means that the behavior is going to be entirely dependent on the presence or absence of a trailing slash in I'm wondering whether removing this also correctly handles the |
||
else parent_path + pat | ||
for pat in patterns | ||
] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd like to combine and avoid doing multiple list comp operations but it was honestly easier to iterate and easier to understand separating it. We can refactor later on with a generator or something. Also, I don't feel super confident with this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suggestion:
There are two caveats: (1) resolve will also make it absolute, we may need to handle the current directory carefully, (2) glob will return actual files in the filesystem which is slower than |
||
return itertools.chain(*[fnmatch.filter(names, pattern) for pattern in patterns]) | ||
|
||
|
||
|
@@ -65,15 +78,18 @@ def match_files( | |
that match the criteria of both exclude and include patterns. | ||
""" | ||
all_files = [str(path) for path in Path(parent_path).rglob("*")] | ||
|
||
included_files = set( | ||
filter_files( | ||
all_files, | ||
str(parent_path), | ||
include_paths if include_paths is not None else DEFAULT_INCLUDED_PATHS, | ||
) | ||
) | ||
excluded_files = set( | ||
filter_files( | ||
all_files, | ||
str(parent_path), | ||
exclude_paths if exclude_paths is not None else DEFAULT_EXCLUDED_PATHS, | ||
exclude=True, | ||
) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,24 @@ def run(execution_context: CodemodExecutionContext, yaml_files: List[Path]) -> d | |
"-o", | ||
temp_sarif_file.name, | ||
] | ||
|
||
if execution_context.path_exclude: | ||
command.extend( | ||
itertools.chain.from_iterable( | ||
map( | ||
lambda f: ["--exclude", f"{execution_context.directory}{f}"], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It feels like there should be a path separator here. |
||
execution_context.path_exclude, | ||
) | ||
) | ||
) | ||
if execution_context.path_include: | ||
# Note: parent path is not passed with --include | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand why this should be the case. |
||
command.extend( | ||
itertools.chain.from_iterable( | ||
map(lambda f: ["--include", str(f)], execution_context.path_include) | ||
) | ||
) | ||
|
||
command.extend( | ||
itertools.chain.from_iterable( | ||
map(lambda f: ["--config", str(f)], yaml_files) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,7 +23,11 @@ def dir_structure(tmp_path_factory): | |
(tests_dir / "test_make_request.py").touch() | ||
(tests_dir / "test_insecure_random.py").touch() | ||
|
||
assert len(list(base_dir.rglob("*"))) == 9 | ||
sub_tests_dir = tests_dir / "tests" | ||
sub_tests_dir.mkdir() | ||
(sub_tests_dir / "something.py").touch() | ||
|
||
assert len(list(base_dir.rglob("*"))) == 11 | ||
|
||
return base_dir | ||
|
||
|
@@ -35,11 +39,18 @@ def _assert_expected(self, result_files, expected_files): | |
file_names.sort() | ||
assert file_names == expected_files | ||
|
||
def test_all_py_files_match(self, dir_structure): | ||
def test_all_py_files_match_except_tests_dir(self, dir_structure): | ||
expected = ["empty_for_testing.py", "insecure_random.py", "make_request.py"] | ||
files = match_files(dir_structure) | ||
self._assert_expected(files, expected) | ||
|
||
def test_tests_not_excluded(self, dir_structure): | ||
expected = ["test_insecure_random.py", "test_make_request.py"] | ||
# anything in foo/tests will be analyzed but anything in | ||
# foo/tests/tests will not be analyzed by default | ||
files = match_files(dir_structure / "tests") | ||
self._assert_expected(files, expected) | ||
|
||
def test_match_excluded(self, dir_structure): | ||
expected = ["empty_for_testing.py", "insecure_random.py"] | ||
files = match_files(dir_structure, ["**/tests/**", "*request.py"]) | ||
|
@@ -102,40 +113,36 @@ def test_match_excluded_precedence_over_included(self, dir_structure): | |
self._assert_expected(files, expected) | ||
|
||
def test_test_directory_not_excluded(self, dir_structure): | ||
expected = ["test_insecure_random.py", "test_make_request.py"] | ||
expected = ["something.py", "test_insecure_random.py", "test_make_request.py"] | ||
files = match_files( | ||
dir_structure, exclude_paths=["**/samples/**", "**/more_samples/**"] | ||
) | ||
self._assert_expected(files, expected) | ||
|
||
def test_include_test_overridden_by_default_excludes(self, mocker): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These tests are using parent_dir as "." which does not work as expected. We will revisit |
||
mocker.patch( | ||
"codemodder.code_directory.Path.rglob", | ||
return_value=[ | ||
"foo/tests/test_insecure_random.py", | ||
"foo/tests/test_make_request.py", | ||
], | ||
) | ||
mocker.patch( | ||
"codemodder.code_directory.Path.is_file", | ||
return_value=True, | ||
) | ||
files = match_files(Path("."), include_paths=["**/tests/**"]) | ||
def test_include_test_overridden_by_default_excludes(self, dir_structure): | ||
files = match_files(dir_structure, include_paths=["**/tests/**"]) | ||
self._assert_expected(files, []) | ||
|
||
def test_include_test_without_default_includes(self, mocker): | ||
files = ["foo/tests/test_insecure_random.py", "foo/tests/test_make_request.py"] | ||
mocker.patch( | ||
"codemodder.code_directory.Path.rglob", | ||
return_value=files, | ||
) | ||
mocker.patch( | ||
"codemodder.code_directory.Path.is_file", | ||
return_value=True, | ||
) | ||
result = match_files(Path("."), exclude_paths=[]) | ||
assert result == [Path(x) for x in files] | ||
def test_include_test_without_default_excludes(self, dir_structure): | ||
expected = [ | ||
"empty_for_testing.py", | ||
"insecure_random.py", | ||
"make_request.py", | ||
"something.py", | ||
"test_insecure_random.py", | ||
"test_make_request.py", | ||
] | ||
files = match_files(dir_structure, exclude_paths=[]) | ||
self._assert_expected(files, expected) | ||
|
||
def test_extract_line_from_pattern(self): | ||
lines = file_line_patterns(Path("insecure_random.py"), ["insecure_*.py:3"]) | ||
assert lines == [3] | ||
|
||
def test_include_specific_file(self, dir_structure): | ||
expected = ["empty_for_testing.py"] | ||
files = match_files( | ||
dir_structure / "samples" / "more_samples", | ||
include_paths=["empty_for_testing.py"], | ||
) | ||
self._assert_expected(files, expected) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is probably faster with
os.path.join