Skip to content

Commit

Permalink
Codemodder can handle non-existent requested codemods (#384)
Browse files Browse the repository at this point in the history
* move check for include/exclude to match_codemods

* fix unit tests

* fix exclude tests according to current behavior

* add codemodder tests and reduce test time

* use default dict
  • Loading branch information
clavedeluna authored Mar 19, 2024
1 parent 83a561d commit 9ec75cf
Show file tree
Hide file tree
Showing 4 changed files with 192 additions and 91 deletions.
36 changes: 2 additions & 34 deletions src/codemodder/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,38 +70,8 @@ class CsvListAction(argparse.Action):
def __call__(self, parser, namespace, values, option_string=None):
# Conversion to dict removes duplicates while preserving order
items = list(dict.fromkeys(values.split(",")).keys())
self.validate_items(items)
setattr(namespace, self.dest, items)

def validate_items(self, items):
"""Basic Action does not validate the items"""


def build_codemod_validator(codemod_registry: CodemodRegistry):
names = codemod_registry.names
ids = codemod_registry.ids

class ValidatedCodmods(CsvListAction):
"""
argparse Action to convert "codemod1,codemod2,codemod3" into a list
representation and validate against existing codemods
"""

def validate_items(self, items):
potential_names = ids + names

if unrecognized_codemods := [
name for name in items if name not in potential_names
]:
args = {
"values": unrecognized_codemods,
"choices": ", ".join(map(repr, names)),
}
msg = "invalid choice(s): %(values)r (choose from %(choices)s)"
raise argparse.ArgumentError(self, msg % args)

return ValidatedCodmods


def parse_args(argv, codemod_registry: CodemodRegistry):
"""
Expand All @@ -117,17 +87,15 @@ def parse_args(argv, codemod_registry: CodemodRegistry):
help="name of output file to produce",
)

codemod_validator = build_codemod_validator(codemod_registry)

codemod_args_group = parser.add_mutually_exclusive_group()
codemod_args_group.add_argument(
"--codemod-exclude",
action=codemod_validator,
action=CsvListAction,
help="Comma-separated set of codemod ID(s) to exclude",
)
codemod_args_group.add_argument(
"--codemod-include",
action=codemod_validator,
action=CsvListAction,
help="Comma-separated set of codemod ID(s) to include",
)

Expand Down
49 changes: 32 additions & 17 deletions src/codemodder/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,25 +61,40 @@ def match_codemods(
) -> list[BaseCodemod]:
codemod_include = codemod_include or []
codemod_exclude = codemod_exclude or DEFAULT_EXCLUDED_CODEMODS
base_list = [
codemod
for codemod in self.codemods
if (sast_only and codemod.origin != "pixee")
or (not sast_only and codemod.origin == "pixee")
]

if codemod_exclude and not codemod_include:
return [
codemod
for codemod in base_list
if codemod.name not in codemod_exclude
and codemod.id not in codemod_exclude
]

return [
self._codemods_by_name.get(name) or self._codemods_by_id[name]
for name in codemod_include
]
base_codemods = {}
for codemod in self.codemods:
if (sast_only and codemod.origin != "pixee") or (
not sast_only and codemod.origin == "pixee"
):
base_codemods[codemod.id] = codemod
base_codemods[codemod.name] = codemod

for name_or_id in codemod_exclude:
try:
codemod = base_codemods[name_or_id]
except KeyError:
logger.warning(
f"Requested codemod to exclude'{name_or_id}' does not exist."
)
continue

# remove both by name and id since we don't know which `name_or_id` represented
base_codemods.pop(codemod.name, None)
base_codemods.pop(codemod.id, None)
# Remove duplicates and preserve order
return list(dict.fromkeys(base_codemods.values()))

matched_codemods = []
for name in codemod_include:
try:
matched_codemods.append(
self._codemods_by_name.get(name) or self._codemods_by_id[name]
)
except KeyError:
logger.warning(f"Requested codemod to include'{name}' does not exist.")
return matched_codemods

def describe_codemods(
self,
Expand Down
40 changes: 31 additions & 9 deletions tests/codemods/test_include_exclude.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,20 @@ class TestMatchCodemods:
def setup_class(cls):
cls.registry = load_registered_codemods()
cls.codemod_map = cls.registry._codemods_by_name
cls.default_ids = [
cls.all_ids = [
c().id if isinstance(c, type) else c.id for c in registry.codemods
]

def test_no_include_exclude(self):
default_codemods = set(
assert set(self.registry.match_codemods(None, None)) == set(
x
for x in self.registry.codemods
if x.id in self.default_ids and x.id not in DEFAULT_EXCLUDED_CODEMODS
if x.id in self.all_ids and x.id not in DEFAULT_EXCLUDED_CODEMODS
)
assert set(self.registry.match_codemods(None, None)) == default_codemods

def test_load_sast_codemods(self):
sast_codemods = set(
c for c in self.registry.codemods if c.id not in self.default_ids
c for c in self.registry.codemods if c.id not in self.all_ids
)
assert (
set(self.registry.match_codemods(None, None, sast_only=True))
Expand All @@ -36,12 +35,13 @@ def test_include_non_sast_in_sast(self):
) == {self.codemod_map["secure-random"]}

@pytest.mark.parametrize(
"input_str", ["secure-random", "secure-random,url-sandbox"]
"input_str",
["secure-random", "pixee:python/secure-random", "secure-random,url-sandbox"],
)
def test_include(self, input_str):
includes = input_str.split(",")
assert self.registry.match_codemods(includes, None) == [
self.codemod_map[name] for name in includes
self.codemod_map[name.replace("pixee:python/", "")] for name in includes
]

@pytest.mark.parametrize(
Expand All @@ -57,13 +57,35 @@ def test_include_preserve_order(self, input_str):
"input_str",
[
"secure-random",
"pixee:python/secure-random",
"secure-random,url-sandbox",
],
)
def test_exclude(self, input_str):
excludes = input_str.split(",")
assert self.registry.match_codemods(None, excludes) == [
res = [
c
for c in self.registry.codemods
if c.id in self.all_ids and c.name not in excludes and c.id not in excludes
]
assert self.registry.match_codemods(None, excludes) == res

def test_bad_codemod_include_no_match(self):
assert self.registry.match_codemods(["doesntexist"], None) == []

def test_codemod_include_some_match(self):
assert self.registry.match_codemods(["doesntexist", "secure-random"], None) == [
self.codemod_map["secure-random"]
]

def test_bad_codemod_exclude_all_match(self):
assert self.registry.match_codemods(None, ["doesntexist"]) == [
c for c in self.registry.codemods if c.id in self.all_ids
]

def test_exclude_some_match(self):
assert self.registry.match_codemods(None, ["doesntexist", "secure-random"]) == [
c
for c in self.registry.codemods
if c.name not in excludes and c.id in self.default_ids
if c.name not in "secure-random" and c.id in self.all_ids
]
158 changes: 127 additions & 31 deletions tests/test_codemodder.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,18 @@ def disable_write_report():
"""Override fixture from conftest.py"""


@pytest.fixture(autouse=True)
def disable_codemod_apply(mocker, request):
"""
The tests in this module are like integration tests in that they often
run all of codemodder but we most often don't need to actually apply codemods.
"""
# Skip mocking only for specific tests that need to apply codemods.
if request.function.__name__ == "test_cst_parsing_fails":
return
mocker.patch("codemodder.codemods.base_codemod.BaseCodemod.apply")


class TestRun:
@mock.patch("libcst.parse_module")
def test_no_files_matched(self, mock_parse, tmpdir):
Expand Down Expand Up @@ -113,40 +125,138 @@ def test_reporting(self, mock_reporting, dry_run):

mock_reporting.return_value.write_report.assert_called_once()

@mock.patch("codemodder.codemods.semgrep.semgrep_run")
def test_no_codemods_to_run(self, mock_semgrep_run, tmpdir):
codetf = tmpdir / "result.codetf"
assert not codetf.exists()

registry = load_registered_codemods()
names = ",".join(registry.names)
@pytest.mark.parametrize("codemod", ["secure-random", "pixee:python/secure-random"])
@mock.patch("codemodder.context.CodemodExecutionContext.compile_results")
@mock.patch("codemodder.codetf.CodeTF.write_report")
def test_run_codemod_name_or_id(self, write_report, mock_compile_results, codemod):
del write_report
args = [
"tests/samples/",
"--output",
str(codetf),
f"--codemod-exclude={names}",
"here.txt",
f"--codemod-include={codemod}",
]

exit_code = run(args)
assert exit_code == 0
mock_semgrep_run.assert_not_called()
assert codetf.exists()
# todo: if no codemods run do we still compile results?
mock_compile_results.assert_called()

@pytest.mark.parametrize("codemod", ["secure-random", "pixee:python/secure-random"])
@mock.patch("codemodder.context.CodemodExecutionContext.compile_results")

class TestCodemodIncludeExclude:

@mock.patch("codemodder.registry.logger.warning")
@mock.patch("codemodder.codemodder.logger.info")
@mock.patch("codemodder.codetf.CodeTF.write_report")
def test_run_codemod_name_or_id(self, write_report, mock_compile_results, codemod):
del write_report
def test_codemod_include_no_match(self, write_report, info_logger, warning_logger):
bad_codemod = "doesntexist"
args = [
"tests/samples/",
"--output",
"here.txt",
f"--codemod-include={codemod}",
f"--codemod-include={bad_codemod}",
]
run(args)
write_report.assert_called_once()

assert any("no codemods to run" in x[0][0] for x in info_logger.call_args_list)
assert any(
f"Requested codemod to include'{bad_codemod}' does not exist." in x[0][0]
for x in warning_logger.call_args_list
)

@mock.patch("codemodder.registry.logger.warning")
@mock.patch("codemodder.codemodder.logger.info")
@mock.patch("codemodder.codetf.CodeTF.write_report")
def test_codemod_include_some_match(
self, write_report, info_logger, warning_logger
):
bad_codemod = "doesntexist"
good_codemod = "secure-random"
args = [
"tests/samples/",
"--output",
"here.txt",
f"--codemod-include={bad_codemod},{good_codemod}",
]
run(args)
write_report.assert_called_once()
assert any("running codemod %s" in x[0][0] for x in info_logger.call_args_list)
assert any(
f"Requested codemod to include'{bad_codemod}' does not exist." in x[0][0]
for x in warning_logger.call_args_list
)

@mock.patch("codemodder.registry.logger.warning")
@mock.patch("codemodder.codemodder.logger.info")
@mock.patch("codemodder.codetf.CodeTF.write_report")
def test_codemod_exclude_some_match(
self, write_report, info_logger, warning_logger
):
bad_codemod = "doesntexist"
good_codemod = "secure-random"
args = [
"tests/samples/",
"--output",
"here.txt",
f"--codemod-exclude={bad_codemod},{good_codemod}",
]
run(args)
write_report.assert_called_once()
codemods_that_ran = [
x[0][1]
for x in info_logger.call_args_list
if x[0][0] == "running codemod %s"
]

assert f"pixee:python/{good_codemod}" not in codemods_that_ran
assert any("running codemod %s" in x[0][0] for x in info_logger.call_args_list)
assert any(
f"Requested codemod to exclude'{bad_codemod}' does not exist." in x[0][0]
for x in warning_logger.call_args_list
)

@mock.patch("codemodder.registry.logger.warning")
@mock.patch("codemodder.codemodder.logger.info")
@mock.patch("codemodder.codetf.CodeTF.write_report")
@mock.patch("codemodder.codemods.base_codemod.BaseCodemod.apply")
def test_codemod_exclude_no_match(
self, apply, write_report, info_logger, warning_logger
):
bad_codemod = "doesntexist"
args = [
"tests/samples/",
"--output",
"here.txt",
f"--codemod-exclude={bad_codemod}",
]

run(args)
write_report.assert_called_once()
assert any("running codemod %s" in x[0][0] for x in info_logger.call_args_list)
assert any(
f"Requested codemod to exclude'{bad_codemod}' does not exist." in x[0][0]
for x in warning_logger.call_args_list
)

@mock.patch("codemodder.codemods.semgrep.semgrep_run")
def test_exclude_all_registered_codemods(self, mock_semgrep_run, tmpdir):
codetf = tmpdir / "result.codetf"
assert not codetf.exists()

registry = load_registered_codemods()
names = ",".join(registry.names)
args = [
"tests/samples/",
"--output",
str(codetf),
f"--codemod-exclude={names}",
]

exit_code = run(args)
assert exit_code == 0
mock_compile_results.assert_called()
mock_semgrep_run.assert_not_called()
assert codetf.exists()


class TestExitCode:
Expand Down Expand Up @@ -194,20 +304,6 @@ def test_conflicting_include_exclude(self, mock_report):
run(args)
assert err.value.args[0] == 3

@mock.patch("codemodder.codetf.CodeTF.write_report")
def test_bad_codemod_name(self, mock_report):
del mock_report
bad_codemod = "doesntexist"
args = [
"tests/samples/",
"--output",
"here.txt",
f"--codemod-include={bad_codemod}",
]
with pytest.raises(SystemExit) as err:
run(args)
assert err.value.args[0] == 3

def test_find_semgrep_results(self, mocker):
run_semgrep = mocker.patch("codemodder.codemodder.run_semgrep")
codemods = load_registered_codemods()
Expand Down

0 comments on commit 9ec75cf

Please sign in to comment.