Skip to content

Commit

Permalink
Skip symlinks for analysis (#611)
Browse files Browse the repository at this point in the history
* Exclude symlinks from analysis

* Attempt to capture Semgrep SARIF upon failure

* Add unit test for symlink exclusion
  • Loading branch information
drdavella authored May 31, 2024
1 parent 9679b51 commit adcca27
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 5 deletions.
14 changes: 9 additions & 5 deletions src/codemodder/codemodder.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,11 +199,15 @@ def run(original_args) -> int:
log_list(logging.INFO, "including paths", included_paths)
log_list(logging.INFO, "excluding paths", argv.path_exclude)

files_to_analyze: list[Path] = match_files(
context.directory,
argv.path_exclude,
included_paths,
)
files_to_analyze: list[Path] = [
path
for path in match_files(
context.directory,
argv.path_exclude,
included_paths,
)
if path.is_file() and not path.is_symlink()
]

full_names = [str(path) for path in files_to_analyze]
log_list(logging.DEBUG, "matched files", full_names)
Expand Down
5 changes: 5 additions & 0 deletions src/codemodder/semgrep.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,11 @@ def run(
if call.returncode != 0:
if not execution_context.verbose:
logger.error("captured semgrep stderr: %s", call.stderr)
try:
logger.error("semgrep sarif output: %s", temp_sarif_file.read())
except Exception as e:
logger.error("failed to read semgrep sarif output: %s", e)

raise subprocess.CalledProcessError(call.returncode, command)
# semgrep prepends the folders into the rule-id, we want the base name only
results = InternalSemgrepResultSet.from_sarif(
Expand Down
15 changes: 15 additions & 0 deletions tests/test_codemodder.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from codemodder.diff import create_diff_from_tree
from codemodder.registry import load_registered_codemods
from codemodder.result import ResultSet
from codemodder.semgrep import run as semgrep_run


@pytest.fixture(autouse=True, scope="module")
Expand Down Expand Up @@ -74,6 +75,20 @@ def test_no_files_matched(self, mock_parse, dir_structure):
mock_parse.assert_not_called()
assert codetf.exists()

def test_skip_symlinks(self, mocker, dir_structure):
# Override fixture for this specific test case
mocker.patch("codemodder.codemods.semgrep.semgrep_run", semgrep_run)
code_dir, codetf = dir_structure
(code_dir / "symlink.py").symlink_to(code_dir / "test_request.py")
args = [
str(code_dir),
"--output",
str(codetf),
"--codemod-include=url-sandbox",
]
res = run(args)
assert res == 0

@mock.patch("libcst.parse_module", side_effect=Exception)
@mock.patch("codemodder.codetf.CodeTF.build")
def test_cst_parsing_fails(self, build_report, mock_parse, dir_structure):
Expand Down

0 comments on commit adcca27

Please sign in to comment.