Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use Pydantic to generate and validate CodeTF data models #357

Merged
merged 3 commits into from
Mar 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ repos:
args: [--disable-error-code=has-type,--disable-error-code=import-not-found]
additional_dependencies:
[
"types-jsonschema~=4.21.0",
"types-mock==5.0.*",
"types-PyYAML==6.0",
"types-toml~=0.10",
Expand Down
11 changes: 11 additions & 0 deletions integration_tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import json

import pytest
import requests


@pytest.fixture(scope="module")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This new fixture means every time a test module runs we're making a request. I see this is also defined below, so now both unit tests and integration tests rely on internet connectivity. Just pointing that out.
Do we need it per module, or could this fixture run per test run session only?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@clavedeluna good catch. I did think about making it session scoped but I think we're running unit tests and integration tests in separate workflows so it would run twice anyway.

def codetf_schema():
schema_path = "https://raw.githubusercontent.com/pixee/codemodder-specs/main/codetf.schema.json"
response = requests.get(schema_path)
yield json.loads(response.text)
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dependencies = [
"isort>=5.12,<5.14",
"libcst>=1.1,<1.3",
"packaging>=23.2,<25.0",
"pydantic~=2.6.0",
"pylint>=3.0,<3.2",
"python-json-logger~=2.0.0",
"PyYAML~=6.0.0",
Expand Down Expand Up @@ -50,6 +51,7 @@ test = [
"coverage>=7.3,<7.5",
"Flask<4",
"Jinja2~=3.1.2",
"jsonschema~=4.21.0",
"lxml>=4.9.3,<5.2.0",
"mock==5.1.*",
"pre-commit<4",
Expand All @@ -59,6 +61,7 @@ test = [
"pytest-mock~=3.12.0",
"pytest-randomly==3.*",
"pytest-xdist==3.*",
"requests~=2.31.0",
"security~=1.2.0",
"types-mock==5.1.*",
"django>=4,<6",
Expand Down
71 changes: 0 additions & 71 deletions src/codemodder/change.py

This file was deleted.

12 changes: 8 additions & 4 deletions src/codemodder/codemodder.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@
from codemodder.code_directory import match_files
from codemodder.codemods.api import BaseCodemod
from codemodder.codemods.semgrep import SemgrepRuleDetector
from codemodder.codetf import CodeTF
from codemodder.context import CodemodExecutionContext
from codemodder.dependency import Dependency
from codemodder.logging import configure_logger, log_list, log_section, logger
from codemodder.project_analysis.file_parsers.package_store import PackageStore
from codemodder.project_analysis.python_repo_manager import PythonRepoManager
from codemodder.report.codetf_reporter import report_default
from codemodder.result import ResultSet
from codemodder.sarifs import detect_sarif_tools
from codemodder.semgrep import run as run_semgrep
Expand Down Expand Up @@ -210,13 +210,17 @@ def run(original_args) -> int:
files_to_analyze,
)

results = context.compile_results(codemods_to_run)

elapsed = datetime.datetime.now() - start
elapsed_ms = int(elapsed.total_seconds() * 1000)

if argv.output:
report_default(elapsed_ms, argv, original_args, results)
codetf = CodeTF.build(
context,
elapsed_ms,
original_args,
context.compile_results(codemods_to_run),
)
codetf.write_report(argv.output)

log_report(context, argv, elapsed_ms, files_to_analyze)
return 0
Expand Down
15 changes: 2 additions & 13 deletions src/codemodder/codemods/base_codemod.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from codemodder.code_directory import file_line_patterns
from codemodder.codemods.base_detector import BaseDetector
from codemodder.codemods.base_transformer import BaseTransformerPipeline
from codemodder.codetf import Reference
from codemodder.context import CodemodExecutionContext
from codemodder.file_context import FileContext
from codemodder.logging import logger
Expand All @@ -23,18 +24,6 @@ class ReviewGuidance(Enum):
MERGE_WITHOUT_REVIEW = 3


@dataclass
class Reference:
url: str
description: str = ""

def to_json(self):
return {
"url": self.url,
"description": self.description or self.url,
}


@dataclass
class Metadata:
name: str
Expand Down Expand Up @@ -124,7 +113,7 @@ def describe(self):
"codemod": self.id,
"summary": self.summary,
"description": self.description,
"references": [ref.to_json() for ref in self.references],
"references": [ref.model_dump() for ref in self.references],
}

def _apply(
Expand Down
2 changes: 1 addition & 1 deletion src/codemodder/codemods/base_transformer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from abc import ABCMeta, abstractmethod

from codemodder.change import ChangeSet
from codemodder.codemods.base_visitor import BaseTransformer
from codemodder.codetf import ChangeSet
from codemodder.context import CodemodExecutionContext
from codemodder.file_context import FileContext
from codemodder.result import Result
Expand Down
4 changes: 2 additions & 2 deletions src/codemodder/codemods/imported_call_modifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
from libcst.codemod import CodemodContext, VisitorBasedCodemodCommand
from libcst.metadata import PositionProvider

from codemodder.change import Change
from codemodder.codemods.base_visitor import UtilsMixin
from codemodder.codemods.utils_mixin import NameResolutionMixin
from codemodder.codetf import Change
from codemodder.file_context import FileContext
from codemodder.result import Result

Expand Down Expand Up @@ -75,7 +75,7 @@ def leave_Call(self, original_node: cst.Call, updated_node: cst.Call):
and true_name in self.matching_functions
):
self.changes_in_file.append(
Change(line_number, self.change_description)
Change(lineNumber=line_number, description=self.change_description)
)

new_args = self.updated_args(updated_node.args)
Expand Down
11 changes: 6 additions & 5 deletions src/codemodder/codemods/libcst_transformer.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
from collections import namedtuple
from typing import cast

import libcst as cst
from libcst import matchers
from libcst._position import CodeRange
from libcst.codemod import CodemodContext
from libcst.codemod.visitors import AddImportsVisitor, RemoveImportsVisitor

from codemodder.change import Change, ChangeSet
from codemodder.codemods.base_transformer import BaseTransformerPipeline
from codemodder.codemods.base_visitor import BaseTransformer
from codemodder.codemods.utils import get_call_name
from codemodder.codetf import Change, ChangeSet
from codemodder.context import CodemodExecutionContext
from codemodder.dependency import Dependency
from codemodder.diff import create_diff_from_tree
Expand Down Expand Up @@ -99,7 +100,7 @@ def leave_ClassDef(

def node_position(self, node):
# See https://github.com/Instagram/LibCST/blob/main/libcst/_metadata_dependent.py#L112
return self.get_metadata(self.METADATA_DEPENDENCIES[0], node)
return cast(CodeRange, self.get_metadata(self.METADATA_DEPENDENCIES[0], node))

def add_change(self, node, description: str, start: bool = True):
position = self.node_position(node)
Expand All @@ -125,7 +126,7 @@ def add_dependency(self, dependency: Dependency):
def report_change(self, original_node):
line_number = self.lineno_for_node(original_node)
self.file_context.codemod_changes.append(
Change(line_number, self.change_description)
Change(lineNumber=line_number, description=self.change_description)
)

def remove_unused_import(self, original_node):
Expand Down Expand Up @@ -275,8 +276,8 @@ def apply(
return None

change_set = ChangeSet(
str(file_context.file_path.relative_to(context.directory)),
diff,
path=str(file_context.file_path.relative_to(context.directory)),
diff=diff,
changes=file_context.codemod_changes,
)

Expand Down
16 changes: 9 additions & 7 deletions src/codemodder/codemods/test/integration_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from types import ModuleType

import git
import jsonschema

from codemodder import __version__, registry

Expand Down Expand Up @@ -99,7 +100,8 @@ def _assert_results_fields(self, results, output_path):
result = results[0]
assert result["codemod"] == self.codemod_instance.id
assert result["references"] == [
ref.to_json() for ref in self.codemod_instance.references
ref.model_dump(exclude_none=True)
for ref in self.codemod_instance.references
]

# TODO: once we add description for each url.
Expand All @@ -118,15 +120,15 @@ def _assert_results_fields(self, results, output_path):

assert len(change["changes"]) == self.num_changes
line_change = change["changes"][0]
assert line_change["lineNumber"] == str(self.expected_line_change)
assert line_change["lineNumber"] == int(self.expected_line_change)
assert line_change["description"] == self.change_description
assert line_change["packageActions"] == []
assert line_change["properties"] == {}

def _assert_codetf_output(self):
def _assert_codetf_output(self, codetf_schema):
with open(self.output_path, "r", encoding="utf-8") as f:
codetf = json.load(f)

jsonschema.validate(codetf, codetf_schema)

assert sorted(codetf.keys()) == ["results", "run"]
run = codetf["run"]
self._assert_run_fields(run, self.output_path)
Expand All @@ -149,7 +151,7 @@ def check_code_after(self) -> ModuleType:
path=self.code_path, allowed_exceptions=self.allowed_exceptions
)

def test_file_rewritten(self):
def test_file_rewritten(self, codetf_schema):
"""
Tests that file is re-written correctly with new code and correct codetf output.

Expand Down Expand Up @@ -183,7 +185,7 @@ def test_file_rewritten(self):

self.check_code_after()
self.check_dependencies_after()
self._assert_codetf_output()
self._assert_codetf_output(codetf_schema)
pathlib.Path(self.output_path).unlink(missing_ok=True)
self._run_idempotency_chec(command)

Expand Down
Loading
Loading