From adcca271596884f8a0f514108282bf943caaffa0 Mon Sep 17 00:00:00 2001 From: Dan D'Avella Date: Fri, 31 May 2024 12:31:05 -0400 Subject: [PATCH] Skip symlinks for analysis (#611) * Exclude symlinks from analysis * Attempt to capture Semgrep SARIF upon failure * Add unit test for symlink exclusion --- src/codemodder/codemodder.py | 14 +++++++++----- src/codemodder/semgrep.py | 5 +++++ tests/test_codemodder.py | 15 +++++++++++++++ 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/src/codemodder/codemodder.py b/src/codemodder/codemodder.py index af11bf0a..eb6a15ef 100644 --- a/src/codemodder/codemodder.py +++ b/src/codemodder/codemodder.py @@ -199,11 +199,15 @@ def run(original_args) -> int: log_list(logging.INFO, "including paths", included_paths) log_list(logging.INFO, "excluding paths", argv.path_exclude) - files_to_analyze: list[Path] = match_files( - context.directory, - argv.path_exclude, - included_paths, - ) + files_to_analyze: list[Path] = [ + path + for path in match_files( + context.directory, + argv.path_exclude, + included_paths, + ) + if path.is_file() and not path.is_symlink() + ] full_names = [str(path) for path in files_to_analyze] log_list(logging.DEBUG, "matched files", full_names) diff --git a/src/codemodder/semgrep.py b/src/codemodder/semgrep.py index a2a21f36..d92b79ca 100644 --- a/src/codemodder/semgrep.py +++ b/src/codemodder/semgrep.py @@ -110,6 +110,11 @@ def run( if call.returncode != 0: if not execution_context.verbose: logger.error("captured semgrep stderr: %s", call.stderr) + try: + logger.error("semgrep sarif output: %s", temp_sarif_file.read()) + except Exception as e: + logger.error("failed to read semgrep sarif output: %s", e) + raise subprocess.CalledProcessError(call.returncode, command) # semgrep prepends the folders into the rule-id, we want the base name only results = InternalSemgrepResultSet.from_sarif( diff --git a/tests/test_codemodder.py b/tests/test_codemodder.py index 32aa8172..e78b65a9 100644 --- a/tests/test_codemodder.py +++ b/tests/test_codemodder.py @@ -8,6 +8,7 @@ from codemodder.diff import create_diff_from_tree from codemodder.registry import load_registered_codemods from codemodder.result import ResultSet +from codemodder.semgrep import run as semgrep_run @pytest.fixture(autouse=True, scope="module") @@ -74,6 +75,20 @@ def test_no_files_matched(self, mock_parse, dir_structure): mock_parse.assert_not_called() assert codetf.exists() + def test_skip_symlinks(self, mocker, dir_structure): + # Override fixture for this specific test case + mocker.patch("codemodder.codemods.semgrep.semgrep_run", semgrep_run) + code_dir, codetf = dir_structure + (code_dir / "symlink.py").symlink_to(code_dir / "test_request.py") + args = [ + str(code_dir), + "--output", + str(codetf), + "--codemod-include=url-sandbox", + ] + res = run(args) + assert res == 0 + @mock.patch("libcst.parse_module", side_effect=Exception) @mock.patch("codemodder.codetf.CodeTF.build") def test_cst_parsing_fails(self, build_report, mock_parse, dir_structure):