Skip to content

Commit

Permalink
Use Pydantic to generate and validate CodeTF data models (#357)
Browse files Browse the repository at this point in the history
* Implement CodeTF data model using Pydantic

* Rename codemodder.change => codemodder.codetf

* Implement JSON schema validation for CodeTF results

---------

Co-authored-by: pixeebot[bot] <104101892+pixeebot[bot]@users.noreply.github.com>
  • Loading branch information
drdavella and pixeebot[bot] authored Mar 12, 2024
1 parent 9a46cce commit 658bc91
Show file tree
Hide file tree
Showing 41 changed files with 396 additions and 301 deletions.
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ repos:
args: [--disable-error-code=has-type,--disable-error-code=import-not-found]
additional_dependencies:
[
"types-jsonschema~=4.21.0",
"types-mock==5.0.*",
"types-PyYAML==6.0",
"types-toml~=0.10",
Expand Down
11 changes: 11 additions & 0 deletions integration_tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import json

import pytest
import requests


@pytest.fixture(scope="module")
def codetf_schema():
schema_path = "https://raw.githubusercontent.com/pixee/codemodder-specs/main/codetf.schema.json"
response = requests.get(schema_path)
yield json.loads(response.text)
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dependencies = [
"isort>=5.12,<5.14",
"libcst>=1.1,<1.3",
"packaging>=23.2,<25.0",
"pydantic~=2.6.0",
"pylint>=3.0,<3.2",
"python-json-logger~=2.0.0",
"PyYAML~=6.0.0",
Expand Down Expand Up @@ -50,6 +51,7 @@ test = [
"coverage>=7.3,<7.5",
"Flask<4",
"Jinja2~=3.1.2",
"jsonschema~=4.21.0",
"lxml>=4.9.3,<5.2.0",
"mock==5.1.*",
"pre-commit<4",
Expand All @@ -59,6 +61,7 @@ test = [
"pytest-mock~=3.12.0",
"pytest-randomly==3.*",
"pytest-xdist==3.*",
"requests~=2.31.0",
"security~=1.2.0",
"types-mock==5.1.*",
"django>=4,<6",
Expand Down
71 changes: 0 additions & 71 deletions src/codemodder/change.py

This file was deleted.

12 changes: 8 additions & 4 deletions src/codemodder/codemodder.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@
from codemodder.code_directory import match_files
from codemodder.codemods.api import BaseCodemod
from codemodder.codemods.semgrep import SemgrepRuleDetector
from codemodder.codetf import CodeTF
from codemodder.context import CodemodExecutionContext
from codemodder.dependency import Dependency
from codemodder.logging import configure_logger, log_list, log_section, logger
from codemodder.project_analysis.file_parsers.package_store import PackageStore
from codemodder.project_analysis.python_repo_manager import PythonRepoManager
from codemodder.report.codetf_reporter import report_default
from codemodder.result import ResultSet
from codemodder.sarifs import detect_sarif_tools
from codemodder.semgrep import run as run_semgrep
Expand Down Expand Up @@ -210,13 +210,17 @@ def run(original_args) -> int:
files_to_analyze,
)

results = context.compile_results(codemods_to_run)

elapsed = datetime.datetime.now() - start
elapsed_ms = int(elapsed.total_seconds() * 1000)

if argv.output:
report_default(elapsed_ms, argv, original_args, results)
codetf = CodeTF.build(
context,
elapsed_ms,
original_args,
context.compile_results(codemods_to_run),
)
codetf.write_report(argv.output)

log_report(context, argv, elapsed_ms, files_to_analyze)
return 0
Expand Down
15 changes: 2 additions & 13 deletions src/codemodder/codemods/base_codemod.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from codemodder.code_directory import file_line_patterns
from codemodder.codemods.base_detector import BaseDetector
from codemodder.codemods.base_transformer import BaseTransformerPipeline
from codemodder.codetf import Reference
from codemodder.context import CodemodExecutionContext
from codemodder.file_context import FileContext
from codemodder.logging import logger
Expand All @@ -23,18 +24,6 @@ class ReviewGuidance(Enum):
MERGE_WITHOUT_REVIEW = 3


@dataclass
class Reference:
url: str
description: str = ""

def to_json(self):
return {
"url": self.url,
"description": self.description or self.url,
}


@dataclass
class Metadata:
name: str
Expand Down Expand Up @@ -124,7 +113,7 @@ def describe(self):
"codemod": self.id,
"summary": self.summary,
"description": self.description,
"references": [ref.to_json() for ref in self.references],
"references": [ref.model_dump() for ref in self.references],
}

def _apply(
Expand Down
2 changes: 1 addition & 1 deletion src/codemodder/codemods/base_transformer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from abc import ABCMeta, abstractmethod

from codemodder.change import ChangeSet
from codemodder.codemods.base_visitor import BaseTransformer
from codemodder.codetf import ChangeSet
from codemodder.context import CodemodExecutionContext
from codemodder.file_context import FileContext
from codemodder.result import Result
Expand Down
4 changes: 2 additions & 2 deletions src/codemodder/codemods/imported_call_modifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
from libcst.codemod import CodemodContext, VisitorBasedCodemodCommand
from libcst.metadata import PositionProvider

from codemodder.change import Change
from codemodder.codemods.base_visitor import UtilsMixin
from codemodder.codemods.utils_mixin import NameResolutionMixin
from codemodder.codetf import Change
from codemodder.file_context import FileContext
from codemodder.result import Result

Expand Down Expand Up @@ -75,7 +75,7 @@ def leave_Call(self, original_node: cst.Call, updated_node: cst.Call):
and true_name in self.matching_functions
):
self.changes_in_file.append(
Change(line_number, self.change_description)
Change(lineNumber=line_number, description=self.change_description)
)

new_args = self.updated_args(updated_node.args)
Expand Down
11 changes: 6 additions & 5 deletions src/codemodder/codemods/libcst_transformer.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
from collections import namedtuple
from typing import cast

import libcst as cst
from libcst import matchers
from libcst._position import CodeRange
from libcst.codemod import CodemodContext
from libcst.codemod.visitors import AddImportsVisitor, RemoveImportsVisitor

from codemodder.change import Change, ChangeSet
from codemodder.codemods.base_transformer import BaseTransformerPipeline
from codemodder.codemods.base_visitor import BaseTransformer
from codemodder.codemods.utils import get_call_name
from codemodder.codetf import Change, ChangeSet
from codemodder.context import CodemodExecutionContext
from codemodder.dependency import Dependency
from codemodder.diff import create_diff_from_tree
Expand Down Expand Up @@ -99,7 +100,7 @@ def leave_ClassDef(

def node_position(self, node):
# See https://github.com/Instagram/LibCST/blob/main/libcst/_metadata_dependent.py#L112
return self.get_metadata(self.METADATA_DEPENDENCIES[0], node)
return cast(CodeRange, self.get_metadata(self.METADATA_DEPENDENCIES[0], node))

def add_change(self, node, description: str, start: bool = True):
position = self.node_position(node)
Expand All @@ -125,7 +126,7 @@ def add_dependency(self, dependency: Dependency):
def report_change(self, original_node):
line_number = self.lineno_for_node(original_node)
self.file_context.codemod_changes.append(
Change(line_number, self.change_description)
Change(lineNumber=line_number, description=self.change_description)
)

def remove_unused_import(self, original_node):
Expand Down Expand Up @@ -275,8 +276,8 @@ def apply(
return None

change_set = ChangeSet(
str(file_context.file_path.relative_to(context.directory)),
diff,
path=str(file_context.file_path.relative_to(context.directory)),
diff=diff,
changes=file_context.codemod_changes,
)

Expand Down
16 changes: 9 additions & 7 deletions src/codemodder/codemods/test/integration_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from types import ModuleType

import git
import jsonschema

from codemodder import __version__, registry

Expand Down Expand Up @@ -99,7 +100,8 @@ def _assert_results_fields(self, results, output_path):
result = results[0]
assert result["codemod"] == self.codemod_instance.id
assert result["references"] == [
ref.to_json() for ref in self.codemod_instance.references
ref.model_dump(exclude_none=True)
for ref in self.codemod_instance.references
]

# TODO: once we add description for each url.
Expand All @@ -118,15 +120,15 @@ def _assert_results_fields(self, results, output_path):

assert len(change["changes"]) == self.num_changes
line_change = change["changes"][0]
assert line_change["lineNumber"] == str(self.expected_line_change)
assert line_change["lineNumber"] == int(self.expected_line_change)
assert line_change["description"] == self.change_description
assert line_change["packageActions"] == []
assert line_change["properties"] == {}

def _assert_codetf_output(self):
def _assert_codetf_output(self, codetf_schema):
with open(self.output_path, "r", encoding="utf-8") as f:
codetf = json.load(f)

jsonschema.validate(codetf, codetf_schema)

assert sorted(codetf.keys()) == ["results", "run"]
run = codetf["run"]
self._assert_run_fields(run, self.output_path)
Expand All @@ -149,7 +151,7 @@ def check_code_after(self) -> ModuleType:
path=self.code_path, allowed_exceptions=self.allowed_exceptions
)

def test_file_rewritten(self):
def test_file_rewritten(self, codetf_schema):
"""
Tests that file is re-written correctly with new code and correct codetf output.
Expand Down Expand Up @@ -183,7 +185,7 @@ def test_file_rewritten(self):

self.check_code_after()
self.check_dependencies_after()
self._assert_codetf_output()
self._assert_codetf_output(codetf_schema)
pathlib.Path(self.output_path).unlink(missing_ok=True)
self._run_idempotency_chec(command)

Expand Down
Loading

0 comments on commit 658bc91

Please sign in to comment.