From 658bc919da6b3ee1ad89fc4a63e40556c1329da7 Mon Sep 17 00:00:00 2001 From: Dan D'Avella Date: Tue, 12 Mar 2024 10:04:25 -0400 Subject: [PATCH] Use Pydantic to generate and validate CodeTF data models (#357) * Implement CodeTF data model using Pydantic * Rename codemodder.change => codemodder.codetf * Implement JSON schema validation for CodeTF results --------- Co-authored-by: pixeebot[bot] <104101892+pixeebot[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 1 + integration_tests/conftest.py | 11 ++ pyproject.toml | 3 + src/codemodder/change.py | 71 --------- src/codemodder/codemodder.py | 12 +- src/codemodder/codemods/base_codemod.py | 15 +- src/codemodder/codemods/base_transformer.py | 2 +- .../codemods/imported_call_modifier.py | 4 +- src/codemodder/codemods/libcst_transformer.py | 11 +- .../codemods/test/integration_utils.py | 16 +- src/codemodder/codetf.py | 137 ++++++++++++++++++ src/codemodder/context.py | 29 ++-- .../base_dependency_writer.py | 8 +- .../dependency_manager.py | 2 +- .../dependency_management/pyproject_writer.py | 6 +- .../requirements_txt_writer.py | 6 +- .../dependency_management/setup_py_writer.py | 6 +- .../dependency_management/setupcfg_writer.py | 6 +- src/codemodder/file_context.py | 2 +- src/codemodder/report/__init__.py | 0 src/codemodder/report/codetf_reporter.py | 51 ------- src/core_codemods/file_resource_leak.py | 7 +- src/core_codemods/fix_assert_tuple.py | 2 +- src/core_codemods/order_imports.py | 9 +- src/core_codemods/remove_unused_imports.py | 5 +- .../secure_flask_session_config.py | 7 +- src/core_codemods/sql_parameterization.py | 6 +- src/core_codemods/url_sandbox.py | 12 +- tests/conftest.py | 5 +- .../test_base_dependency_writer.py | 2 +- .../test_dependency_manager.py | 2 +- .../test_pyproject_writer.py | 2 +- .../test_requirements_txt_writer.py | 2 +- .../test_setup_py_writer.py | 2 +- .../test_setupcfgt_writer.py | 2 +- tests/report/__init__.py | 0 tests/report/test_codetf_reporter.py | 31 ---- tests/test_change.py | 39 ----- tests/test_codemodder.py | 36 ++--- tests/test_codetf.py | 113 +++++++++++++++ tests/test_context.py | 14 ++ 41 files changed, 396 insertions(+), 301 deletions(-) delete mode 100644 src/codemodder/change.py create mode 100644 src/codemodder/codetf.py delete mode 100644 src/codemodder/report/__init__.py delete mode 100644 src/codemodder/report/codetf_reporter.py delete mode 100644 tests/report/__init__.py delete mode 100644 tests/report/test_codetf_reporter.py delete mode 100644 tests/test_change.py create mode 100644 tests/test_codetf.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 07330be0..b9e79e57 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -34,6 +34,7 @@ repos: args: [--disable-error-code=has-type,--disable-error-code=import-not-found] additional_dependencies: [ + "types-jsonschema~=4.21.0", "types-mock==5.0.*", "types-PyYAML==6.0", "types-toml~=0.10", diff --git a/integration_tests/conftest.py b/integration_tests/conftest.py index e69de29b..3f4f5fb2 100644 --- a/integration_tests/conftest.py +++ b/integration_tests/conftest.py @@ -0,0 +1,11 @@ +import json + +import pytest +import requests + + +@pytest.fixture(scope="module") +def codetf_schema(): + schema_path = "https://raw.githubusercontent.com/pixee/codemodder-specs/main/codetf.schema.json" + response = requests.get(schema_path) + yield json.loads(response.text) diff --git a/pyproject.toml b/pyproject.toml index b2d10990..1eb33690 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ dependencies = [ "isort>=5.12,<5.14", "libcst>=1.1,<1.3", "packaging>=23.2,<25.0", + "pydantic~=2.6.0", "pylint>=3.0,<3.2", "python-json-logger~=2.0.0", "PyYAML~=6.0.0", @@ -50,6 +51,7 @@ test = [ "coverage>=7.3,<7.5", "Flask<4", "Jinja2~=3.1.2", + "jsonschema~=4.21.0", "lxml>=4.9.3,<5.2.0", "mock==5.1.*", "pre-commit<4", @@ -59,6 +61,7 @@ test = [ "pytest-mock~=3.12.0", "pytest-randomly==3.*", "pytest-xdist==3.*", + "requests~=2.31.0", "security~=1.2.0", "types-mock==5.1.*", "django>=4,<6", diff --git a/src/codemodder/change.py b/src/codemodder/change.py deleted file mode 100644 index 25c012d7..00000000 --- a/src/codemodder/change.py +++ /dev/null @@ -1,71 +0,0 @@ -from dataclasses import dataclass, field -from enum import Enum - - -class Action(Enum): - ADD = "add" - REMOVE = "remove" - - -class Result(Enum): - COMPLETED = "completed" - FAILED = "failed" - SKIPPED = "skipped" - - -class DiffSide(Enum): - LEFT = "left" - RIGHT = "right" - - -@dataclass -class PackageAction: - action: Action - result: Result - package: str - - def to_json(self): - return { - "action": self.action.value.upper(), - "result": self.result.value.upper(), - "package": self.package, - } - - -@dataclass -class Change: - lineNumber: int - description: str - # All of our changes are currently treated as additive, so it makes sense - # for the comments to appear on the RIGHT side of the split diff. Eventually we - # may want to differentiate between LEFT and RIGHT, but for now we'll just - # default to RIGHT. - diffSide: DiffSide = field(default=DiffSide.RIGHT) - properties: dict = field(default_factory=dict) - packageActions: list[PackageAction] = field(default_factory=list) - - def to_json(self): - return { - # Not sure why this is a string but it's in the spec - "lineNumber": str(self.lineNumber), - "description": self.description, - "properties": self.properties, - "diffSide": self.diffSide.value.lower(), - "packageActions": [pa.to_json() for pa in self.packageActions], - } - - -@dataclass -class ChangeSet: - """A set of changes made to a file at `path`""" - - path: str - diff: str - changes: list[Change] - - def to_json(self): - return { - "path": self.path, - "diff": self.diff, - "changes": [x.to_json() for x in self.changes], - } diff --git a/src/codemodder/codemodder.py b/src/codemodder/codemodder.py index 4776a939..87363164 100644 --- a/src/codemodder/codemodder.py +++ b/src/codemodder/codemodder.py @@ -11,12 +11,12 @@ from codemodder.code_directory import match_files from codemodder.codemods.api import BaseCodemod from codemodder.codemods.semgrep import SemgrepRuleDetector +from codemodder.codetf import CodeTF from codemodder.context import CodemodExecutionContext from codemodder.dependency import Dependency from codemodder.logging import configure_logger, log_list, log_section, logger from codemodder.project_analysis.file_parsers.package_store import PackageStore from codemodder.project_analysis.python_repo_manager import PythonRepoManager -from codemodder.report.codetf_reporter import report_default from codemodder.result import ResultSet from codemodder.sarifs import detect_sarif_tools from codemodder.semgrep import run as run_semgrep @@ -210,13 +210,17 @@ def run(original_args) -> int: files_to_analyze, ) - results = context.compile_results(codemods_to_run) - elapsed = datetime.datetime.now() - start elapsed_ms = int(elapsed.total_seconds() * 1000) if argv.output: - report_default(elapsed_ms, argv, original_args, results) + codetf = CodeTF.build( + context, + elapsed_ms, + original_args, + context.compile_results(codemods_to_run), + ) + codetf.write_report(argv.output) log_report(context, argv, elapsed_ms, files_to_analyze) return 0 diff --git a/src/codemodder/codemods/base_codemod.py b/src/codemodder/codemods/base_codemod.py index e2398183..5b592012 100644 --- a/src/codemodder/codemods/base_codemod.py +++ b/src/codemodder/codemods/base_codemod.py @@ -11,6 +11,7 @@ from codemodder.code_directory import file_line_patterns from codemodder.codemods.base_detector import BaseDetector from codemodder.codemods.base_transformer import BaseTransformerPipeline +from codemodder.codetf import Reference from codemodder.context import CodemodExecutionContext from codemodder.file_context import FileContext from codemodder.logging import logger @@ -23,18 +24,6 @@ class ReviewGuidance(Enum): MERGE_WITHOUT_REVIEW = 3 -@dataclass -class Reference: - url: str - description: str = "" - - def to_json(self): - return { - "url": self.url, - "description": self.description or self.url, - } - - @dataclass class Metadata: name: str @@ -124,7 +113,7 @@ def describe(self): "codemod": self.id, "summary": self.summary, "description": self.description, - "references": [ref.to_json() for ref in self.references], + "references": [ref.model_dump() for ref in self.references], } def _apply( diff --git a/src/codemodder/codemods/base_transformer.py b/src/codemodder/codemods/base_transformer.py index fba16156..673af338 100644 --- a/src/codemodder/codemods/base_transformer.py +++ b/src/codemodder/codemods/base_transformer.py @@ -1,7 +1,7 @@ from abc import ABCMeta, abstractmethod -from codemodder.change import ChangeSet from codemodder.codemods.base_visitor import BaseTransformer +from codemodder.codetf import ChangeSet from codemodder.context import CodemodExecutionContext from codemodder.file_context import FileContext from codemodder.result import Result diff --git a/src/codemodder/codemods/imported_call_modifier.py b/src/codemodder/codemods/imported_call_modifier.py index f1566928..4d1a371a 100644 --- a/src/codemodder/codemods/imported_call_modifier.py +++ b/src/codemodder/codemods/imported_call_modifier.py @@ -6,9 +6,9 @@ from libcst.codemod import CodemodContext, VisitorBasedCodemodCommand from libcst.metadata import PositionProvider -from codemodder.change import Change from codemodder.codemods.base_visitor import UtilsMixin from codemodder.codemods.utils_mixin import NameResolutionMixin +from codemodder.codetf import Change from codemodder.file_context import FileContext from codemodder.result import Result @@ -75,7 +75,7 @@ def leave_Call(self, original_node: cst.Call, updated_node: cst.Call): and true_name in self.matching_functions ): self.changes_in_file.append( - Change(line_number, self.change_description) + Change(lineNumber=line_number, description=self.change_description) ) new_args = self.updated_args(updated_node.args) diff --git a/src/codemodder/codemods/libcst_transformer.py b/src/codemodder/codemods/libcst_transformer.py index 7df6c1a5..be50dc0d 100644 --- a/src/codemodder/codemods/libcst_transformer.py +++ b/src/codemodder/codemods/libcst_transformer.py @@ -1,4 +1,5 @@ from collections import namedtuple +from typing import cast import libcst as cst from libcst import matchers @@ -6,10 +7,10 @@ from libcst.codemod import CodemodContext from libcst.codemod.visitors import AddImportsVisitor, RemoveImportsVisitor -from codemodder.change import Change, ChangeSet from codemodder.codemods.base_transformer import BaseTransformerPipeline from codemodder.codemods.base_visitor import BaseTransformer from codemodder.codemods.utils import get_call_name +from codemodder.codetf import Change, ChangeSet from codemodder.context import CodemodExecutionContext from codemodder.dependency import Dependency from codemodder.diff import create_diff_from_tree @@ -99,7 +100,7 @@ def leave_ClassDef( def node_position(self, node): # See https://github.com/Instagram/LibCST/blob/main/libcst/_metadata_dependent.py#L112 - return self.get_metadata(self.METADATA_DEPENDENCIES[0], node) + return cast(CodeRange, self.get_metadata(self.METADATA_DEPENDENCIES[0], node)) def add_change(self, node, description: str, start: bool = True): position = self.node_position(node) @@ -125,7 +126,7 @@ def add_dependency(self, dependency: Dependency): def report_change(self, original_node): line_number = self.lineno_for_node(original_node) self.file_context.codemod_changes.append( - Change(line_number, self.change_description) + Change(lineNumber=line_number, description=self.change_description) ) def remove_unused_import(self, original_node): @@ -275,8 +276,8 @@ def apply( return None change_set = ChangeSet( - str(file_context.file_path.relative_to(context.directory)), - diff, + path=str(file_context.file_path.relative_to(context.directory)), + diff=diff, changes=file_context.codemod_changes, ) diff --git a/src/codemodder/codemods/test/integration_utils.py b/src/codemodder/codemods/test/integration_utils.py index 2895bde0..8deaefd0 100644 --- a/src/codemodder/codemods/test/integration_utils.py +++ b/src/codemodder/codemods/test/integration_utils.py @@ -6,6 +6,7 @@ from types import ModuleType import git +import jsonschema from codemodder import __version__, registry @@ -99,7 +100,8 @@ def _assert_results_fields(self, results, output_path): result = results[0] assert result["codemod"] == self.codemod_instance.id assert result["references"] == [ - ref.to_json() for ref in self.codemod_instance.references + ref.model_dump(exclude_none=True) + for ref in self.codemod_instance.references ] # TODO: once we add description for each url. @@ -118,15 +120,15 @@ def _assert_results_fields(self, results, output_path): assert len(change["changes"]) == self.num_changes line_change = change["changes"][0] - assert line_change["lineNumber"] == str(self.expected_line_change) + assert line_change["lineNumber"] == int(self.expected_line_change) assert line_change["description"] == self.change_description - assert line_change["packageActions"] == [] - assert line_change["properties"] == {} - def _assert_codetf_output(self): + def _assert_codetf_output(self, codetf_schema): with open(self.output_path, "r", encoding="utf-8") as f: codetf = json.load(f) + jsonschema.validate(codetf, codetf_schema) + assert sorted(codetf.keys()) == ["results", "run"] run = codetf["run"] self._assert_run_fields(run, self.output_path) @@ -149,7 +151,7 @@ def check_code_after(self) -> ModuleType: path=self.code_path, allowed_exceptions=self.allowed_exceptions ) - def test_file_rewritten(self): + def test_file_rewritten(self, codetf_schema): """ Tests that file is re-written correctly with new code and correct codetf output. @@ -183,7 +185,7 @@ def test_file_rewritten(self): self.check_code_after() self.check_dependencies_after() - self._assert_codetf_output() + self._assert_codetf_output(codetf_schema) pathlib.Path(self.output_path).unlink(missing_ok=True) self._run_idempotency_chec(command) diff --git a/src/codemodder/codetf.py b/src/codemodder/codetf.py new file mode 100644 index 00000000..ce9eb327 --- /dev/null +++ b/src/codemodder/codetf.py @@ -0,0 +1,137 @@ +""" +Data models for the CodeTF format. + +We need to keep this in sync with the CodeTF schema. +""" + +from __future__ import annotations + +import os +import sys +from enum import Enum +from typing import TYPE_CHECKING, Optional + +from pydantic import BaseModel, model_validator + +from codemodder import __version__ +from codemodder.logging import logger + +if TYPE_CHECKING: + from codemodder.context import CodemodExecutionContext + + +class Action(Enum): + ADD = "add" + REMOVE = "remove" + + +class PackageResult(Enum): + COMPLETED = "completed" + FAILED = "failed" + SKIPPED = "skipped" + + +class DiffSide(Enum): + LEFT = "left" + RIGHT = "right" + + +class PackageAction(BaseModel): + action: Action + result: PackageResult + package: str + + +class Change(BaseModel): + lineNumber: int + description: Optional[str] + # All of our changes are currently treated as additive, so it makes sense + # for the comments to appear on the RIGHT side of the split diff. Eventually we + # may want to differentiate between LEFT and RIGHT, but for now we'll just + # default to RIGHT. + diffSide: DiffSide = DiffSide.RIGHT + properties: Optional[dict] = None + packageActions: Optional[list[PackageAction]] = None + + +class ChangeSet(BaseModel): + """A set of changes made to a file at `path`""" + + path: str + diff: str + changes: list[Change] = [] + + +class Reference(BaseModel): + url: str + description: Optional[str] = None + + @model_validator(mode="after") + def validate_description(self): + self.description = self.description or self.url + return self + + +class Result(BaseModel): + codemod: str + summary: str + description: str + references: Optional[list[Reference]] = None + properties: Optional[dict] = None + failedFiles: Optional[list[str]] = None + changeset: list[ChangeSet] + + +class Sarif(BaseModel): + artifact: str + sha1: str + + +class Run(BaseModel): + vendor: str + tool: str + version: str + projectName: Optional[str] = None + commandLine: str + elapsed: Optional[int] + directory: str + sarifs: list[Sarif] = [] + + +class CodeTF(BaseModel): + run: Run + results: list[Result] + + @classmethod + def build( + cls, + context: CodemodExecutionContext, + elapsed_ms, + original_args, + results: list[Result], + ): + command_name = os.path.basename(sys.argv[0]) + command_args = " ".join(original_args) + run = Run( + vendor="pixee", + tool="codemodder-python", + version=__version__, + projectName=None, + commandLine=f"{command_name} {command_args}", + elapsed=elapsed_ms, + directory=str(context.directory.absolute()), + # TODO: this should be populated from the context + sarifs=[], + ) + return cls(run=run, results=results) + + def write_report(self, outfile): + try: + with open(outfile, "w", encoding="utf-8") as f: + f.write(self.model_dump_json(exclude_none=True)) + except Exception: + logger.exception("failed to write report file.") + # Any issues with writing the output file should exit status 2. + return 2 + logger.debug("wrote report to %s", outfile) + return 0 diff --git a/src/codemodder/context.py b/src/codemodder/context.py index 461b4b7a..4824d29a 100644 --- a/src/codemodder/context.py +++ b/src/codemodder/context.py @@ -6,7 +6,8 @@ from textwrap import indent from typing import TYPE_CHECKING, Iterator, List -from codemodder.change import ChangeSet +from codemodder.codetf import ChangeSet +from codemodder.codetf import Result as CodeTFResult from codemodder.dependency import ( Dependency, build_dependency_notification, @@ -150,22 +151,20 @@ def process_results(self, codemod_id: str, results: Iterator[FileContext]): self.add_dependencies(codemod_id, file_context.dependencies) self.timer.aggregate(file_context.timer) - def compile_results(self, codemods: list[BaseCodemod]): + def compile_results(self, codemods: list[BaseCodemod]) -> list[CodeTFResult]: results = [] for codemod in codemods: - data = { - "codemod": codemod.id, - "summary": codemod.summary, - "description": self.add_description(codemod), - "references": [ref.to_json() for ref in codemod.references], - "properties": {}, - "failedFiles": [str(file) for file in self.get_failures(codemod.id)], - "changeset": [ - change.to_json() for change in self.get_results(codemod.id) - ], - } - - results.append(data) + result = CodeTFResult( + codemod=codemod.id, + summary=codemod.summary, + description=self.add_description(codemod), + references=codemod.references, + properties={}, + failedFiles=[str(file) for file in self.get_failures(codemod.id)], + changeset=self.get_results(codemod.id), + ) + + results.append(result) return results diff --git a/src/codemodder/dependency_management/base_dependency_writer.py b/src/codemodder/dependency_management/base_dependency_writer.py index c9be50e2..8d1931a5 100644 --- a/src/codemodder/dependency_management/base_dependency_writer.py +++ b/src/codemodder/dependency_management/base_dependency_writer.py @@ -4,7 +4,7 @@ from packaging.requirements import Requirement -from codemodder.change import Action, Change, ChangeSet, PackageAction, Result +from codemodder.codetf import Action, Change, ChangeSet, PackageAction, PackageResult from codemodder.dependency import Dependency from codemodder.project_analysis.file_parsers.package_store import PackageStore @@ -61,7 +61,11 @@ def build_changes( "contextual_description_position": "right", }, packageActions=[ - PackageAction(Action.ADD, Result.COMPLETED, str(dep.requirement)) + PackageAction( + action=Action.ADD, + result=PackageResult.COMPLETED, + package=str(dep.requirement), + ) ], ) for i, dep in enumerate(dependencies) diff --git a/src/codemodder/dependency_management/dependency_manager.py b/src/codemodder/dependency_management/dependency_manager.py index 3e6f2b71..4fe777b8 100644 --- a/src/codemodder/dependency_management/dependency_manager.py +++ b/src/codemodder/dependency_management/dependency_manager.py @@ -1,7 +1,7 @@ from pathlib import Path from typing import Optional -from codemodder.change import ChangeSet +from codemodder.codetf import ChangeSet from codemodder.dependency import Dependency from codemodder.dependency_management.pyproject_writer import PyprojectWriter from codemodder.dependency_management.requirements_txt_writer import ( diff --git a/src/codemodder/dependency_management/pyproject_writer.py b/src/codemodder/dependency_management/pyproject_writer.py index 28441525..f7c767b2 100644 --- a/src/codemodder/dependency_management/pyproject_writer.py +++ b/src/codemodder/dependency_management/pyproject_writer.py @@ -3,7 +3,7 @@ import tomlkit -from codemodder.change import ChangeSet +from codemodder.codetf import ChangeSet from codemodder.dependency import Dependency from codemodder.dependency_management.base_dependency_writer import DependencyWriter from codemodder.diff import create_diff_and_linenums @@ -41,8 +41,8 @@ def add_to_file( dependencies, added_line_nums_strategy, added_line_nums ) return ChangeSet( - str(self.path.relative_to(self.parent_directory)), - diff, + path=str(self.path.relative_to(self.parent_directory)), + diff=diff, changes=changes, ) diff --git a/src/codemodder/dependency_management/requirements_txt_writer.py b/src/codemodder/dependency_management/requirements_txt_writer.py index 5661d204..646bdd93 100644 --- a/src/codemodder/dependency_management/requirements_txt_writer.py +++ b/src/codemodder/dependency_management/requirements_txt_writer.py @@ -1,6 +1,6 @@ from typing import Optional -from codemodder.change import ChangeSet +from codemodder.codetf import ChangeSet from codemodder.dependency import Dependency from codemodder.dependency_management.base_dependency_writer import DependencyWriter from codemodder.diff import create_diff @@ -40,8 +40,8 @@ def add_to_file( dependencies, original_lines_strategy, original_lines ) return ChangeSet( - str(self.path.relative_to(self.parent_directory)), - diff, + path=str(self.path.relative_to(self.parent_directory)), + diff=diff, changes=changes, ) diff --git a/src/codemodder/dependency_management/setup_py_writer.py b/src/codemodder/dependency_management/setup_py_writer.py index 23689935..f9d07920 100644 --- a/src/codemodder/dependency_management/setup_py_writer.py +++ b/src/codemodder/dependency_management/setup_py_writer.py @@ -5,10 +5,10 @@ from libcst.codemod import CodemodContext from packaging.requirements import Requirement -from codemodder.change import ChangeSet from codemodder.codemods.api import Metadata, ReviewGuidance, SimpleCodemod from codemodder.codemods.utils import is_setup_py_file from codemodder.codemods.utils_mixin import NameResolutionMixin +from codemodder.codetf import ChangeSet from codemodder.dependency import Dependency from codemodder.dependency_management.base_dependency_writer import DependencyWriter from codemodder.diff import create_diff_from_tree @@ -48,8 +48,8 @@ def add_to_file( dependencies, fixed_line_number_strategy, codemod.line_num_changed ) return ChangeSet( - str(self.path.relative_to(self.parent_directory)), - diff, + path=str(self.path.relative_to(self.parent_directory)), + diff=diff, changes=changes, ) diff --git a/src/codemodder/dependency_management/setupcfg_writer.py b/src/codemodder/dependency_management/setupcfg_writer.py index 34613212..6d02083a 100644 --- a/src/codemodder/dependency_management/setupcfg_writer.py +++ b/src/codemodder/dependency_management/setupcfg_writer.py @@ -2,7 +2,7 @@ import re from typing import Optional -from codemodder.change import ChangeSet +from codemodder.codetf import ChangeSet from codemodder.dependency import Dependency from codemodder.dependency_management.base_dependency_writer import DependencyWriter from codemodder.diff import create_diff_and_linenums @@ -61,8 +61,8 @@ def add_to_file( dependencies, added_line_nums_strategy, added_line_nums ) return ChangeSet( - str(self.path.relative_to(self.parent_directory)), - diff, + path=str(self.path.relative_to(self.parent_directory)), + diff=diff, changes=changes, ) diff --git a/src/codemodder/file_context.py b/src/codemodder/file_context.py index 2b70de1b..a0aceea8 100644 --- a/src/codemodder/file_context.py +++ b/src/codemodder/file_context.py @@ -1,7 +1,7 @@ from dataclasses import dataclass, field from pathlib import Path -from codemodder.change import Change, ChangeSet +from codemodder.codetf import Change, ChangeSet from codemodder.dependency import Dependency from codemodder.result import Result from codemodder.utils.timer import Timer diff --git a/src/codemodder/report/__init__.py b/src/codemodder/report/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/codemodder/report/codetf_reporter.py b/src/codemodder/report/codetf_reporter.py deleted file mode 100644 index 5ee15e5e..00000000 --- a/src/codemodder/report/codetf_reporter.py +++ /dev/null @@ -1,51 +0,0 @@ -import json -from os.path import abspath - -from codemodder import __version__ -from codemodder.logging import logger - - -def base_report(): - return { - "run": { - "vendor": "pixee", - "tool": "codemodder-python", - "version": __version__, - "sarifs": [], - }, - "results": [], - } - - -def report_default(elapsed_ms, parsed_args, original_args, results_by_codemod): - report = CodeTF() - absolute_path = abspath(parsed_args.directory) - report.generate(elapsed_ms, original_args, absolute_path, results_by_codemod) - report.write_report(parsed_args.output) - - -class CodeTF: - def __init__(self): - self.report = base_report() - - def generate(self, elapsed_ms, original_args, absolute_path, results_by_codemod): - self.report["run"]["elapsed"] = str(elapsed_ms) - # TODO: this shouldn't be necessary, we should just use sys.argv - # I think this is an artifact of using python -m codemodder, which is being deprecated - self.report["run"]["commandLine"] = self._recreate_command(original_args) - self.report["run"]["directory"] = absolute_path - self.report["results"] = results_by_codemod - - def _recreate_command(self, original_args): - return f"codemodder {' '.join(original_args)}" - - def write_report(self, outfile): - try: - with open(outfile, "w", encoding="utf-8") as f: - json.dump(self.report, f) - except Exception: - logger.exception("failed to write report file.") - # Any issues with writing the output file should exit status 2. - return 2 - logger.debug("wrote report to %s", outfile) - return 0 diff --git a/src/core_codemods/file_resource_leak.py b/src/core_codemods/file_resource_leak.py index c55591d6..bc8a8d97 100644 --- a/src/core_codemods/file_resource_leak.py +++ b/src/core_codemods/file_resource_leak.py @@ -11,7 +11,6 @@ ScopeProvider, ) -from codemodder.change import Change from codemodder.codemods.libcst_transformer import ( LibcstResultTransformer, LibcstTransformerPipeline, @@ -22,6 +21,7 @@ NameAndAncestorResolutionMixin, NameResolutionMixin, ) +from codemodder.codetf import Change from codemodder.file_context import FileContext from codemodder.result import Result from core_codemods.api import Metadata, Reference, ReviewGuidance @@ -222,7 +222,10 @@ def _handle_block( if self._is_fixable(original_block, index, named_targets, other_targets): line_number = self.get_metadata(PositionProvider, resource).start.line self.changes.append( - Change(line_number, FileResourceLeakTransformer.change_description) + Change( + lineNumber=line_number, + description=FileResourceLeakTransformer.change_description, + ) ) # grab the index of the last statement with reference to the resource diff --git a/src/core_codemods/fix_assert_tuple.py b/src/core_codemods/fix_assert_tuple.py index 2a054078..8dce30bb 100644 --- a/src/core_codemods/fix_assert_tuple.py +++ b/src/core_codemods/fix_assert_tuple.py @@ -2,12 +2,12 @@ import libcst as cst -from codemodder.change import Change from codemodder.codemods.libcst_transformer import ( LibcstResultTransformer, LibcstTransformerPipeline, ) from codemodder.codemods.utils_mixin import NameResolutionMixin +from codemodder.codetf import Change from core_codemods.api import Metadata, ReviewGuidance from core_codemods.api.core_codemod import CoreCodemod diff --git a/src/core_codemods/order_imports.py b/src/core_codemods/order_imports.py index f26d7c2f..91aae686 100644 --- a/src/core_codemods/order_imports.py +++ b/src/core_codemods/order_imports.py @@ -1,7 +1,6 @@ import libcst as cst from libcst.metadata import PositionProvider -from codemodder.change import Change from codemodder.codemods.transformations.clean_imports import ( GatherTopLevelImportBlocks, OrderImportsBlocksTransform, @@ -41,11 +40,9 @@ def transform_module_impl(self, tree: cst.Module) -> cst.Module: # seemingly redundant, this check makes it possible to return the original tree for i, changed in enumerate(order_transformer.changes): if changed: - line_number = self.node_position( - top_imports_visitor.top_imports_blocks[i][0] - ).start.line - self.file_context.codemod_changes.append( - Change(line_number, self.change_description) + self.add_change( + top_imports_visitor.top_imports_blocks[i][0], + self.change_description, ) return result_tree return tree diff --git a/src/core_codemods/remove_unused_imports.py b/src/core_codemods/remove_unused_imports.py index 67c8ec95..62deac9e 100644 --- a/src/core_codemods/remove_unused_imports.py +++ b/src/core_codemods/remove_unused_imports.py @@ -7,7 +7,6 @@ ScopeProvider, ) -from codemodder.change import Change from codemodder.codemods.check_annotations import is_disabled_by_annotations from codemodder.codemods.transformations.remove_unused_imports import ( RemoveUnusedImportsTransformer, @@ -48,9 +47,7 @@ def transform_module_impl(self, tree: cst.Module) -> cst.Module: self.metadata, # type: ignore messages=self.IGNORE_ANNOTATIONS, ): - self.file_context.codemod_changes.append( - Change(pos.start.line, self.change_description) - ) + self.add_change_from_position(pos, self.change_description) filtered_unused_imports.add((import_alias, importt)) return tree.visit(RemoveUnusedImportsTransformer(filtered_unused_imports)) diff --git a/src/core_codemods/secure_flask_session_config.py b/src/core_codemods/secure_flask_session_config.py index 46b78285..fea58347 100644 --- a/src/core_codemods/secure_flask_session_config.py +++ b/src/core_codemods/secure_flask_session_config.py @@ -3,9 +3,9 @@ from libcst.codemod import Codemod, CodemodContext from libcst.metadata import ParentNodeProvider -from codemodder.change import Change from codemodder.codemods.base_visitor import BaseTransformer from codemodder.codemods.utils_mixin import NameResolutionMixin +from codemodder.codetf import Change from codemodder.file_context import FileContext from codemodder.utils.utils import extract_targets_of_assignment, true_value from core_codemods.api import Metadata, Reference, ReviewGuidance, SimpleCodemod @@ -194,5 +194,8 @@ def _is_config_subscript(self, original_node: cst.Assign): def report_change(self, original_node): line_number = self.lineno_for_node(original_node) self.file_context.codemod_changes.append( - Change(line_number, SecureFlaskSessionConfig.change_description) + Change( + lineNumber=line_number, + description=SecureFlaskSessionConfig.change_description, + ) ) diff --git a/src/core_codemods/sql_parameterization.py b/src/core_codemods/sql_parameterization.py index 401df5b9..7e39c9ac 100644 --- a/src/core_codemods/sql_parameterization.py +++ b/src/core_codemods/sql_parameterization.py @@ -13,7 +13,6 @@ ScopeProvider, ) -from codemodder.change import Change from codemodder.codemods.base_visitor import UtilsMixin from codemodder.codemods.libcst_transformer import ( LibcstResultTransformer, @@ -30,6 +29,7 @@ infer_expression_type, ) from codemodder.codemods.utils_mixin import NameAndAncestorResolutionMixin +from codemodder.codetf import Change from core_codemods.api import Metadata, Reference, ReviewGuidance from core_codemods.api.core_codemod import CoreCodemod @@ -149,8 +149,8 @@ def transform_module_impl(self, tree: cst.Module) -> cst.Module: line_number = self.get_metadata(PositionProvider, call).start.line self.file_context.codemod_changes.append( Change( - line_number, - SQLQueryParameterizationTransformer.change_description, + lineNumber=line_number, + description=SQLQueryParameterizationTransformer.change_description, ) ) # Normalization and cleanup diff --git a/src/core_codemods/url_sandbox.py b/src/core_codemods/url_sandbox.py index 3ac72d94..40ebb3f5 100644 --- a/src/core_codemods/url_sandbox.py +++ b/src/core_codemods/url_sandbox.py @@ -6,12 +6,12 @@ from libcst.codemod.visitors import AddImportsVisitor, ImportItem from libcst.metadata import PositionProvider, ScopeProvider -from codemodder.change import Change from codemodder.codemods.base_visitor import UtilsMixin from codemodder.codemods.transformations.remove_unused_imports import ( RemoveUnusedImportsCodemod, ) from codemodder.codemods.utils import ReplaceNodes +from codemodder.codetf import Change from codemodder.dependency import Security from codemodder.file_context import FileContext from core_codemods.api import Metadata, Reference, ReviewGuidance, SimpleCodemod @@ -137,7 +137,10 @@ def leave_Call(self, original_node: cst.Call): } ) self.changes_in_file.append( - Change(line_number, UrlSandbox.change_description) + Change( + lineNumber=line_number, + description=UrlSandbox.change_description, + ) ) # case req.get(...) @@ -151,7 +154,10 @@ def leave_Call(self, original_node: cst.Call): } ) self.changes_in_file.append( - Change(line_number, UrlSandbox.change_description) + Change( + lineNumber=line_number, + description=UrlSandbox.change_description, + ) ) def _find_assignments(self, node: CSTNode): diff --git a/tests/conftest.py b/tests/conftest.py index ecaeb400..c8d55daa 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -6,7 +6,7 @@ def disable_write_report(mocker): """ Unit tests should not write analysis report or update any source files. """ - mocker.patch("codemodder.report.codetf_reporter.CodeTF.write_report") + mocker.patch("codemodder.codetf.CodeTF.write_report") @pytest.fixture(autouse=True) @@ -31,7 +31,8 @@ def disable_write_dependencies(mocker): Unit tests should not write any dependency files """ mocker.patch( - "codemodder.dependency_management.dependency_manager.DependencyManager.write" + "codemodder.dependency_management.dependency_manager.DependencyManager.write", + return_value=None, ) diff --git a/tests/dependency_management/test_base_dependency_writer.py b/tests/dependency_management/test_base_dependency_writer.py index 06f433d1..58ce858f 100644 --- a/tests/dependency_management/test_base_dependency_writer.py +++ b/tests/dependency_management/test_base_dependency_writer.py @@ -1,4 +1,4 @@ -from codemodder.change import ChangeSet +from codemodder.codetf import ChangeSet from codemodder.dependency import Dependency, Requirement from codemodder.dependency_management.base_dependency_writer import DependencyWriter from codemodder.project_analysis.file_parsers.package_store import PackageStore diff --git a/tests/dependency_management/test_dependency_manager.py b/tests/dependency_management/test_dependency_manager.py index fee20f61..84ec44e0 100644 --- a/tests/dependency_management/test_dependency_manager.py +++ b/tests/dependency_management/test_dependency_manager.py @@ -1,6 +1,6 @@ import pytest -from codemodder.change import ChangeSet +from codemodder.codetf import ChangeSet from codemodder.dependency import DefusedXML, Security from codemodder.dependency_management import DependencyManager from codemodder.project_analysis.file_parsers import ( diff --git a/tests/dependency_management/test_pyproject_writer.py b/tests/dependency_management/test_pyproject_writer.py index b806f2c9..06c19add 100644 --- a/tests/dependency_management/test_pyproject_writer.py +++ b/tests/dependency_management/test_pyproject_writer.py @@ -2,7 +2,7 @@ import pytest -from codemodder.change import DiffSide +from codemodder.codetf import DiffSide from codemodder.dependency import DefusedXML, Security from codemodder.dependency_management.pyproject_writer import PyprojectWriter from codemodder.project_analysis.file_parsers.package_store import ( diff --git a/tests/dependency_management/test_requirements_txt_writer.py b/tests/dependency_management/test_requirements_txt_writer.py index db1ce993..dd7c68bc 100644 --- a/tests/dependency_management/test_requirements_txt_writer.py +++ b/tests/dependency_management/test_requirements_txt_writer.py @@ -2,7 +2,7 @@ import pytest -from codemodder.change import DiffSide +from codemodder.codetf import DiffSide from codemodder.dependency import DefusedXML, Requirement, Security from codemodder.dependency_management.requirements_txt_writer import ( RequirementsTxtWriter, diff --git a/tests/dependency_management/test_setup_py_writer.py b/tests/dependency_management/test_setup_py_writer.py index a82f0096..d7c06f13 100644 --- a/tests/dependency_management/test_setup_py_writer.py +++ b/tests/dependency_management/test_setup_py_writer.py @@ -2,7 +2,7 @@ import pytest -from codemodder.change import DiffSide +from codemodder.codetf import DiffSide from codemodder.dependency import DefusedXML, Security from codemodder.dependency_management.setup_py_writer import SetupPyWriter from codemodder.project_analysis.file_parsers.package_store import ( diff --git a/tests/dependency_management/test_setupcfgt_writer.py b/tests/dependency_management/test_setupcfgt_writer.py index 7c037980..e4c68227 100644 --- a/tests/dependency_management/test_setupcfgt_writer.py +++ b/tests/dependency_management/test_setupcfgt_writer.py @@ -3,7 +3,7 @@ import mock import pytest -from codemodder.change import DiffSide +from codemodder.codetf import DiffSide from codemodder.dependency import DefusedXML, Security from codemodder.dependency_management.setupcfg_writer import SetupCfgWriter from codemodder.project_analysis.file_parsers.package_store import ( diff --git a/tests/report/__init__.py b/tests/report/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/report/test_codetf_reporter.py b/tests/report/test_codetf_reporter.py deleted file mode 100644 index e128f486..00000000 --- a/tests/report/test_codetf_reporter.py +++ /dev/null @@ -1,31 +0,0 @@ -import json - -import mock -import pytest - -from codemodder.report.codetf_reporter import CodeTF - - -@pytest.fixture(autouse=True, scope="module") -def disable_write_report(): - """Disable write_report mocking so it can be properlly tested""" - patch_open = mock.patch("builtins.open") - patch_open.start() - yield - patch_open.stop() - - -class TestCodeTfReporter: - @mock.patch("json.dump") - def test_write_report(self, mock_dump): - reporter = CodeTF() - res = reporter.write_report("any/file") - assert res == 0 - mock_dump.assert_called() - - @mock.patch("json.dump", side_effect=json.JSONDecodeError) - def test_write_report_fails(self, mock_dump): - reporter = CodeTF() - res = reporter.write_report("any/file") - assert res == 2 - mock_dump.assert_called() diff --git a/tests/test_change.py b/tests/test_change.py deleted file mode 100644 index a1729e8b..00000000 --- a/tests/test_change.py +++ /dev/null @@ -1,39 +0,0 @@ -import pytest - -from codemodder.change import Change, ChangeSet, DiffSide - - -def test_change(): - diff = "--- a/test\n+++ b/test\n@@ -1,1 +1,1 @@\n-1\n+2\n" - changeset = ChangeSet( - path="test", - diff=diff, - changes=[ - Change( - lineNumber=1, - description="Change 1 to 2", - ), - ], - ) - - result = changeset.to_json() - - assert result["path"] == "test" - assert result["diff"] == diff - assert result["changes"][0]["lineNumber"] == str(1) - assert result["changes"][0]["description"] == "Change 1 to 2" - assert result["changes"][0]["diffSide"] == "right" - assert result["changes"][0]["properties"] == {} - assert result["changes"][0]["packageActions"] == [] - - -@pytest.mark.parametrize("side", [DiffSide.LEFT, DiffSide.RIGHT]) -def test_change_diffside(side): - change = Change( - lineNumber=1, - description="Change 1 to 2", - diffSide=side, - ) - - assert change.diffSide == side - assert change.to_json()["diffSide"] == side.value diff --git a/tests/test_codemodder.py b/tests/test_codemodder.py index a2fc56dc..7e3236ac 100644 --- a/tests/test_codemodder.py +++ b/tests/test_codemodder.py @@ -35,8 +35,8 @@ def test_no_files_matched(self, mock_parse, tmpdir): assert codetf.exists() @mock.patch("libcst.parse_module", side_effect=Exception) - @mock.patch("codemodder.codemodder.report_default") - def test_cst_parsing_fails(self, mock_reporting, mock_parse): + @mock.patch("codemodder.codetf.CodeTF.build") + def test_cst_parsing_fails(self, build_report, mock_parse): args = [ "tests/samples/", "--output", @@ -52,19 +52,21 @@ def test_cst_parsing_fails(self, mock_reporting, mock_parse): assert res == 0 mock_parse.assert_called() - mock_reporting.assert_called_once() - args_to_reporting = mock_reporting.call_args_list[0][0] + build_report.assert_called_once() + args_to_reporting = build_report.call_args_list[0][0] assert len(args_to_reporting) == 4 _, _, _, results_by_codemod = args_to_reporting assert results_by_codemod != [] requests_report = results_by_codemod[0] - assert requests_report["changeset"] == [] - assert len(requests_report["failedFiles"]) == 1 - assert sorted(requests_report["failedFiles"]) == [ + assert requests_report.changeset == [] + assert len(requests_report.failedFiles) == 1 + assert sorted(requests_report.failedFiles) == [ "tests/samples/unverified_request.py", ] + build_report.return_value.write_report.assert_called_once() + @mock.patch("codemodder.codemods.libcst_transformer.update_code") @mock.patch("codemodder.codemods.semgrep.semgrep_run", side_effect=semgrep_run) def test_dry_run(self, _, mock_update_code, tmpdir): @@ -87,7 +89,7 @@ def test_dry_run(self, _, mock_update_code, tmpdir): mock_update_code.assert_not_called() @pytest.mark.parametrize("dry_run", [True, False]) - @mock.patch("codemodder.codemodder.report_default") + @mock.patch("codemodder.codetf.CodeTF.build") def test_reporting(self, mock_reporting, dry_run): args = [ "tests/samples/", @@ -109,6 +111,8 @@ def test_reporting(self, mock_reporting, dry_run): assert len(results_by_codemod) == 3 + mock_reporting.return_value.write_report.assert_called_once() + @mock.patch("codemodder.codemods.semgrep.semgrep_run") def test_no_codemods_to_run(self, mock_semgrep_run, tmpdir): codetf = tmpdir / "result.codetf" @@ -130,11 +134,9 @@ def test_no_codemods_to_run(self, mock_semgrep_run, tmpdir): @pytest.mark.parametrize("codemod", ["secure-random", "pixee:python/secure-random"]) @mock.patch("codemodder.context.CodemodExecutionContext.compile_results") - @mock.patch("codemodder.codemodder.report_default") - def test_run_codemod_name_or_id( - self, report_default, mock_compile_results, codemod - ): - del report_default + @mock.patch("codemodder.codetf.CodeTF.write_report") + def test_run_codemod_name_or_id(self, write_report, mock_compile_results, codemod): + del write_report args = [ "tests/samples/", "--output", @@ -148,7 +150,7 @@ def test_run_codemod_name_or_id( class TestExitCode: - @mock.patch("codemodder.codemodder.report_default") + @mock.patch("codemodder.codetf.CodeTF.write_report") def test_success_0(self, mock_report): del mock_report args = [ @@ -163,7 +165,7 @@ def test_success_0(self, mock_report): exit_code = run(args) assert exit_code == 0 - @mock.patch("codemodder.codemodder.report_default") + @mock.patch("codemodder.codetf.CodeTF.write_report") def test_bad_project_dir_1(self, mock_report): del mock_report args = [ @@ -176,7 +178,7 @@ def test_bad_project_dir_1(self, mock_report): exit_code = run(args) assert exit_code == 1 - @mock.patch("codemodder.codemodder.report_default") + @mock.patch("codemodder.codetf.CodeTF.write_report") def test_conflicting_include_exclude(self, mock_report): del mock_report args = [ @@ -192,7 +194,7 @@ def test_conflicting_include_exclude(self, mock_report): run(args) assert err.value.args[0] == 3 - @mock.patch("codemodder.codemodder.report_default") + @mock.patch("codemodder.codetf.CodeTF.write_report") def test_bad_codemod_name(self, mock_report): del mock_report bad_codemod = "doesntexist" diff --git a/tests/test_codetf.py b/tests/test_codetf.py new file mode 100644 index 00000000..7f2acc5f --- /dev/null +++ b/tests/test_codetf.py @@ -0,0 +1,113 @@ +import json +from pathlib import Path + +import jsonschema +import pytest +import requests + +from codemodder.codetf import Change, ChangeSet, CodeTF, DiffSide, Result + + +@pytest.fixture(autouse=True) +def disable_write_report(): + """ + Override conftest to enable results to be written to disk for these tests. + """ + + +@pytest.fixture(autouse=True, scope="module") +def codetf_schema(): + schema_path = "https://raw.githubusercontent.com/pixee/codemodder-specs/main/codetf.schema.json" + response = requests.get(schema_path) + yield json.loads(response.text) + + +def test_change(): + diff = "--- a/test\n+++ b/test\n@@ -1,1 +1,1 @@\n-1\n+2\n" + changeset = ChangeSet( + path="test", + diff=diff, + changes=[ + Change( + lineNumber=1, + description="Change 1 to 2", + ), + ], + ) + + result = changeset.model_dump() + + assert result["path"] == "test" + assert result["diff"] == diff + assert result["changes"][0]["lineNumber"] == 1 + assert result["changes"][0]["description"] == "Change 1 to 2" + assert result["changes"][0]["diffSide"] == DiffSide.RIGHT + assert result["changes"][0]["properties"] is None + assert result["changes"][0]["packageActions"] is None + + +@pytest.mark.parametrize("side", [DiffSide.LEFT, DiffSide.RIGHT]) +def test_change_diffside(side): + change = Change( + lineNumber=1, + description="Change 1 to 2", + diffSide=side, + ) + + assert change.diffSide == side + assert change.model_dump()["diffSide"] == side + + +def test_write_codetf(tmpdir, mocker, codetf_schema): + path = tmpdir / "test.codetf.json" + + assert not path.exists() + + context = mocker.MagicMock(directory=Path("/foo/bar/whatever")) + codetf = CodeTF.build(context, 42, [], []) + retval = codetf.write_report(path) + + assert retval == 0 + assert path.exists() + + data = path.read_text(encoding="utf-8") + CodeTF.model_validate_json(data) + + jsonschema.validate(json.loads(data), codetf_schema) + + +def test_write_codetf_with_results(tmpdir, mocker, codetf_schema): + path = tmpdir / "test.codetf.json" + + assert not path.exists() + + context = mocker.MagicMock(directory=Path("/foo/bar/whatever")) + results = [ + Result( + codemod="test", + summary="test", + description="test", + changeset=[ + ChangeSet( + path="test", + diff="--- a/test\n+++ b/test\n@@ -1,1 +1,1 @@\n-1\n+2\n", + changes=[ + Change( + lineNumber=1, + description="Change 1 to 2", + ), + ], + ), + ], + ) + ] + codetf = CodeTF.build(context, 42, [], results) + retval = codetf.write_report(path) + + assert retval == 0 + assert path.exists() + + data = path.read_text(encoding="utf-8") + CodeTF.model_validate_json(data) + + jsonschema.validate(json.loads(data), codetf_schema) diff --git a/tests/test_context.py b/tests/test_context.py index 59135dd0..2238afb8 100644 --- a/tests/test_context.py +++ b/tests/test_context.py @@ -1,9 +1,23 @@ +import pytest + from codemodder.context import CodemodExecutionContext as Context from codemodder.dependency import Security from codemodder.project_analysis.python_repo_manager import PythonRepoManager from codemodder.registry import load_registered_codemods +@pytest.fixture(autouse=True) +def disable_write_dependencies(mocker): + """ + Unit tests should not write any dependency files + """ + mocker.patch( + "codemodder.dependency_management.dependency_manager.DependencyManager.write", + # In conftest.py the return value is set to None + # We need a mocked value for the tests in this file + ) + + class TestContext: def test_successful_dependency_description(self, mocker): registry = load_registered_codemods()