diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 47e59b6b..f7e991ff 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -33,4 +33,5 @@ repos: [ "types-mock==5.0.*", "types-PyYAML==6.0", + "types-toml~=0.10", ] diff --git a/pyproject.toml b/pyproject.toml index 0e98d0b9..5536340f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ dependencies = [ "python-json-logger~=2.0.0", "PyYAML~=6.0.0", "semgrep~=1.46.0", + "toml~=0.10.2", "wrapt~=1.15.0", ] diff --git a/src/codemodder/codemodder.py b/src/codemodder/codemodder.py index 69603d3a..00852a3f 100644 --- a/src/codemodder/codemodder.py +++ b/src/codemodder/codemodder.py @@ -16,6 +16,7 @@ from codemodder.code_directory import file_line_patterns, match_files from codemodder.context import CodemodExecutionContext from codemodder.executor import CodemodExecutorWrapper +from codemodder.project_analysis.python_repo_manager import PythonRepoManager from codemodder.report.codetf_reporter import report_default @@ -130,12 +131,16 @@ def run(original_args) -> int: log_section("startup") logger.info("codemodder: python/%s", __VERSION__) + repo_manager = PythonRepoManager(Path(argv.directory)) context = CodemodExecutionContext( Path(argv.directory), argv.dry_run, argv.verbose, codemod_registry, + repo_manager, ) + # todo: enable when ready + # repo_manager.package_stores # TODO: this should be a method of CodemodExecutionContext codemods_to_run = codemod_registry.match_codemods( diff --git a/src/codemodder/context.py b/src/codemodder/context.py index 07ddec6e..3f97f89a 100644 --- a/src/codemodder/context.py +++ b/src/codemodder/context.py @@ -9,6 +9,7 @@ from codemodder.executor import CodemodExecutorWrapper from codemodder.logging import logger, log_list from codemodder.registry import CodemodRegistry +from codemodder.project_analysis.python_repo_manager import PythonRepoManager DEPENDENCY_NOTIFICATION = """``` @@ -32,6 +33,7 @@ class CodemodExecutionContext: # pylint: disable=too-many-instance-attributes dry_run: bool = False verbose: bool = False registry: CodemodRegistry + repo_manager: PythonRepoManager def __init__( self, @@ -39,7 +41,8 @@ def __init__( dry_run: bool, verbose: bool, registry: CodemodRegistry, - ): + repo_manager: PythonRepoManager, + ): # pylint: disable=too-many-arguments self.directory = directory self.dry_run = dry_run self.verbose = verbose @@ -47,6 +50,7 @@ def __init__( self._failures_by_codemod = {} self.dependencies = {} self.registry = registry + self.repo_manager = repo_manager def add_result(self, codemod_name, change_set): self._results_by_codemod.setdefault(codemod_name, []).append(change_set) diff --git a/src/codemodder/dependency_manager.py b/src/codemodder/dependency_manager.py index 94810f5a..b83d224c 100644 --- a/src/codemodder/dependency_manager.py +++ b/src/codemodder/dependency_manager.py @@ -15,7 +15,6 @@ class DependencyManager: _new_requirements: list[Dependency] def __init__(self, parent_directory: Path): - """One-time class initialization.""" self.parent_directory = parent_directory self.dependency_file_changed = False self._lines = [] diff --git a/src/codemodder/project_analysis/__init__.py b/src/codemodder/project_analysis/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/codemodder/project_analysis/file_parsers/__init__.py b/src/codemodder/project_analysis/file_parsers/__init__.py new file mode 100644 index 00000000..719ca7eb --- /dev/null +++ b/src/codemodder/project_analysis/file_parsers/__init__.py @@ -0,0 +1,4 @@ +from .requirements_txt_file_parser import RequirementsTxtParser +from .pyproject_toml_file_parser import PyprojectTomlParser +from .setup_cfg_file_parser import SetupCfgParser +from .setup_py_file_parser import SetupPyParser diff --git a/src/codemodder/project_analysis/file_parsers/base_parser.py b/src/codemodder/project_analysis/file_parsers/base_parser.py new file mode 100644 index 00000000..0a8e1bc4 --- /dev/null +++ b/src/codemodder/project_analysis/file_parsers/base_parser.py @@ -0,0 +1,42 @@ +from abc import ABC, abstractmethod + +from pathlib import Path +from typing import List +from .package_store import PackageStore +from packaging.requirements import Requirement + + +class BaseParser(ABC): + def __init__(self, parent_directory: Path): + self.parent_directory = parent_directory + + @property + @abstractmethod + def file_name(self): + ... # pragma: no cover + + def _parse_dependencies(self, dependencies: List[str]): + return [ + Requirement(line) + for x in dependencies + # Skip empty lines and comments + if (line := x.strip()) and not line.startswith("#") + ] + + @abstractmethod + def _parse_file(self, file: Path): + ... # pragma: no cover + + def find_file_locations(self) -> List[Path]: + return list(Path(self.parent_directory).rglob(self.file_name)) + + def parse(self) -> list[PackageStore]: + """ + Find 0 or more project config or dependency files within a project repo. + """ + stores = [] + req_files = self.find_file_locations() + for file in req_files: + store = self._parse_file(file) + stores.append(store) + return stores diff --git a/src/codemodder/project_analysis/file_parsers/package_store.py b/src/codemodder/project_analysis/file_parsers/package_store.py new file mode 100644 index 00000000..ce39c760 --- /dev/null +++ b/src/codemodder/project_analysis/file_parsers/package_store.py @@ -0,0 +1,10 @@ +from dataclasses import dataclass +from packaging.requirements import Requirement + + +@dataclass +class PackageStore: + type: str + file: str + dependencies: list[Requirement] + py_versions: list[str] diff --git a/src/codemodder/project_analysis/file_parsers/pyproject_toml_file_parser.py b/src/codemodder/project_analysis/file_parsers/pyproject_toml_file_parser.py new file mode 100644 index 00000000..8cb5c60e --- /dev/null +++ b/src/codemodder/project_analysis/file_parsers/pyproject_toml_file_parser.py @@ -0,0 +1,33 @@ +from codemodder.project_analysis.file_parsers.package_store import PackageStore +from pathlib import Path +import toml + +from .base_parser import BaseParser + + +class PyprojectTomlParser(BaseParser): + @property + def file_name(self): + return "pyproject.toml" + + def _parse_dependencies_from_toml(self, toml_data: dict): + # todo: handle cases for + # 1. no dependencies + return self._parse_dependencies(toml_data["project"]["dependencies"]) + + def _parse_py_versions(self, toml_data: dict): + # todo: handle cases for + # 1. no requires-python + # 2. multiple requires-python such as "">3.5.2"", ">=3.11.1,<3.11.2" + return [toml_data["project"]["requires-python"]] + + def _parse_file(self, file: Path): + data = toml.load(file) + # todo: handle no "project" in data + + return PackageStore( + type=self.file_name, + file=str(file), + dependencies=self._parse_dependencies_from_toml(data), + py_versions=self._parse_py_versions(data), + ) diff --git a/src/codemodder/project_analysis/file_parsers/requirements_txt_file_parser.py b/src/codemodder/project_analysis/file_parsers/requirements_txt_file_parser.py new file mode 100644 index 00000000..4fa69041 --- /dev/null +++ b/src/codemodder/project_analysis/file_parsers/requirements_txt_file_parser.py @@ -0,0 +1,23 @@ +from codemodder.project_analysis.file_parsers.package_store import PackageStore +from pathlib import Path +from .base_parser import BaseParser + + +class RequirementsTxtParser(BaseParser): + @property + def file_name(self): + return "requirements.txt" + + def _parse_file(self, file: Path): + with open(file, "r", encoding="utf-8") as f: + lines = f.readlines() + + return PackageStore( + type=self.file_name, + file=str(file), + dependencies=self._parse_dependencies(lines), + # requirements.txt files do not declare py versions explicitly + # though we could create a heuristic by analyzing each dependency + # and extracting py versions from them. + py_versions=[], + ) diff --git a/src/codemodder/project_analysis/file_parsers/setup_cfg_file_parser.py b/src/codemodder/project_analysis/file_parsers/setup_cfg_file_parser.py new file mode 100644 index 00000000..5f715681 --- /dev/null +++ b/src/codemodder/project_analysis/file_parsers/setup_cfg_file_parser.py @@ -0,0 +1,37 @@ +from codemodder.project_analysis.file_parsers.package_store import PackageStore +from pathlib import Path +import configparser + +from .base_parser import BaseParser + + +class SetupCfgParser(BaseParser): + @property + def file_name(self): + return "setup.cfg" + + def _parse_dependencies_from_cfg(self, config: configparser.ConfigParser): + # todo: handle cases for + # 1. no dependencies, no options dict + # setup_requires, tests_require, extras_require + dependency_lines = config["options"]["install_requires"].split("\n") + return self._parse_dependencies(dependency_lines) + + def _parse_py_versions(self, config: configparser.ConfigParser): + # todo: handle cases for + # 1. no options/ no requires-python + # 2. various requires-python such as "">3.5.2"", ">=3.11.1,<3.11.2" + return [config["options"]["python_requires"]] + + def _parse_file(self, file: Path): + config = configparser.ConfigParser() + config.read(file) + + # todo: handle no config, no "options" in config + + return PackageStore( + type=self.file_name, + file=str(file), + dependencies=self._parse_dependencies_from_cfg(config), + py_versions=self._parse_py_versions(config), + ) diff --git a/src/codemodder/project_analysis/file_parsers/setup_py_file_parser.py b/src/codemodder/project_analysis/file_parsers/setup_py_file_parser.py new file mode 100644 index 00000000..2fbd78d7 --- /dev/null +++ b/src/codemodder/project_analysis/file_parsers/setup_py_file_parser.py @@ -0,0 +1,77 @@ +from codemodder.project_analysis.file_parsers.package_store import PackageStore +from pathlib import Path +import libcst as cst +from libcst import matchers +from packaging.requirements import Requirement + +from .base_parser import BaseParser +from .utils import clean_simplestring + + +class SetupPyParser(BaseParser): + @property + def file_name(self): + return "setup.py" + + def _parse_dependencies(self, dependencies): + return [ + Requirement(line) + for x in dependencies + # Skip empty lines and comments + if (line := clean_simplestring(x.value)) and not line.startswith("#") + ] + + def _parse_dependencies_from_cst(self, cst_dependencies): + # todo: handle cases for + # 1. no dependencies, + return self._parse_dependencies(cst_dependencies) + + def _parse_py_versions(self, version_str): + # todo: handle for multiple versions + return [clean_simplestring(version_str)] + + def _parse_file(self, file: Path): + visitor = SetupCallVisitor() + with open(str(file), "r", encoding="utf-8") as f: + # todo: handle failure in parsing + module = cst.parse_module(f.read()) + module.visit(visitor) + + # todo: handle no python_requires, install_requires + + return PackageStore( + type=self.file_name, + file=str(file), + dependencies=self._parse_dependencies_from_cst(visitor.install_requires), + py_versions=self._parse_py_versions(visitor.python_requires), + ) + + +class SetupCallVisitor(cst.CSTVisitor): + def __init__(self): + self.python_requires = None + self.install_requires = None + # todo setup_requires, tests_require, extras_require + + def visit_Call(self, node: cst.Call) -> None: + # todo: only handle setup from setuptools, not others tho unlikely + if matchers.matches(node.func, cst.Name(value="setup")): + visitor = SetupArgVisitor() + node.visit(visitor) + self.python_requires = visitor.python_requires + self.install_requires = visitor.install_requires + + +class SetupArgVisitor(cst.CSTVisitor): + def __init__(self): + self.python_requires = None + self.install_requires = None + + def visit_Arg(self, node: cst.Arg) -> None: + if matchers.matches(node.keyword, cst.Name(value="python_requires")): + # todo: this works for `python_requires=">=3.7",` but what about + # a list of versions? + self.python_requires = node.value.value + if matchers.matches(node.keyword, cst.Name(value="install_requires")): + # todo: could it be something other than a list? + self.install_requires = node.value.elements diff --git a/src/codemodder/project_analysis/file_parsers/utils.py b/src/codemodder/project_analysis/file_parsers/utils.py new file mode 100644 index 00000000..4513523e --- /dev/null +++ b/src/codemodder/project_analysis/file_parsers/utils.py @@ -0,0 +1,7 @@ +import libcst as cst + + +def clean_simplestring(node: cst.SimpleString | str) -> str: + if isinstance(node, str): + return node.strip('"') + return node.raw_value diff --git a/src/codemodder/project_analysis/python_repo_manager.py b/src/codemodder/project_analysis/python_repo_manager.py new file mode 100644 index 00000000..c73e4656 --- /dev/null +++ b/src/codemodder/project_analysis/python_repo_manager.py @@ -0,0 +1,30 @@ +from functools import cached_property +from pathlib import Path +from codemodder.project_analysis.file_parsers import ( + RequirementsTxtParser, + PyprojectTomlParser, + SetupCfgParser, + SetupPyParser, +) +from codemodder.project_analysis.file_parsers.package_store import PackageStore + + +class PythonRepoManager: + def __init__(self, parent_directory: Path): + self.parent_directory = parent_directory + self._potential_stores = [ + RequirementsTxtParser, + PyprojectTomlParser, + SetupCfgParser, + SetupPyParser, + ] + + @cached_property + def package_stores(self) -> list[PackageStore]: + return self._parse_all_stores() + + def _parse_all_stores(self) -> list[PackageStore]: + discovered_pkg_stores: list[PackageStore] = [] + for store in self._potential_stores: + discovered_pkg_stores.extend(store(self.parent_directory).parse()) + return discovered_pkg_stores diff --git a/tests/codemods/base_codemod_test.py b/tests/codemods/base_codemod_test.py index f98e10e1..5cdae259 100644 --- a/tests/codemods/base_codemod_test.py +++ b/tests/codemods/base_codemod_test.py @@ -33,6 +33,7 @@ def run_and_assert_filepath(self, root, file_path, input_code, expected): dry_run=True, verbose=False, registry=mock.MagicMock(), + repo_manager=mock.MagicMock(), ) self.file_context = FileContext( file_path, @@ -80,6 +81,7 @@ def run_and_assert_filepath(self, root, file_path, input_code, expected): dry_run=True, verbose=False, registry=mock.MagicMock(), + repo_manager=mock.MagicMock(), ) input_tree = cst.parse_module(input_code) all_results = self.results_by_id_filepath(input_code, file_path) diff --git a/tests/conftest.py b/tests/conftest.py index 22a7facd..c366bea1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -49,3 +49,12 @@ def disable_write_dependencies(): dm_write.start() yield dm_write.stop() + + +@pytest.fixture(scope="module") +def pkg_with_reqs_txt(tmp_path_factory): + base_dir = tmp_path_factory.mktemp("foo") + req_file = base_dir / "requirements.txt" + reqs = "# comment\nrequests==2.31.0\nblack==23.7.*\nmypy~=1.4\npylint>1\n" + req_file.write_text(reqs) + return base_dir diff --git a/tests/project_analysis/__init__.py b/tests/project_analysis/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/project_analysis/file_parsers/__init__.py b/tests/project_analysis/file_parsers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/project_analysis/file_parsers/test_pyproject_toml_file_parser.py b/tests/project_analysis/file_parsers/test_pyproject_toml_file_parser.py new file mode 100644 index 00000000..39e92173 --- /dev/null +++ b/tests/project_analysis/file_parsers/test_pyproject_toml_file_parser.py @@ -0,0 +1,48 @@ +# pylint: disable=redefined-outer-name +import pytest +from codemodder.project_analysis.file_parsers import PyprojectTomlParser + + +@pytest.fixture(scope="module") +def pkg_with_pyproject_toml(tmp_path_factory): + base_dir = tmp_path_factory.mktemp("foo") + toml_file = base_dir / "pyproject.toml" + toml = """\ + [build-system] + requires = ["setuptools"] + build-backend = "setuptools.build_meta" + + [project] + name = "pkg for testing" + version = "0.60.0" + requires-python = ">=3.10.0" + readme = "README.md" + license = {file = "LICENSE"} + dependencies = [ + "isort~=5.12.0", + "libcst~=1.1.0", + "PyYAML~=6.0.0", + "semgrep<2", + "toml~=0.10.2", + "wrapt~=1.15.0", + ] + """ + toml_file.write_text(toml) + return base_dir + + +class TestPyprojectTomlParser: + def test_parse(self, pkg_with_pyproject_toml): + parser = PyprojectTomlParser(pkg_with_pyproject_toml) + found = parser.parse() + assert len(found) == 1 + store = found[0] + assert store.type == "pyproject.toml" + assert store.file == str(pkg_with_pyproject_toml / parser.file_name) + assert store.py_versions == [">=3.10.0"] + assert len(store.dependencies) == 6 + + def test_parse_no_file(self, pkg_with_pyproject_toml): + parser = PyprojectTomlParser(pkg_with_pyproject_toml / "foo") + found = parser.parse() + assert len(found) == 0 diff --git a/tests/project_analysis/file_parsers/test_requirements_txt_file_parser.py b/tests/project_analysis/file_parsers/test_requirements_txt_file_parser.py new file mode 100644 index 00000000..468e0b00 --- /dev/null +++ b/tests/project_analysis/file_parsers/test_requirements_txt_file_parser.py @@ -0,0 +1,18 @@ +from codemodder.project_analysis.file_parsers import RequirementsTxtParser + + +class TestRequirementsTxtParser: + def test_parse(self, pkg_with_reqs_txt): + parser = RequirementsTxtParser(pkg_with_reqs_txt) + found = parser.parse() + assert len(found) == 1 + store = found[0] + assert store.type == "requirements.txt" + assert store.file == str(pkg_with_reqs_txt / parser.file_name) + assert store.py_versions == [] + assert len(store.dependencies) == 4 + + def test_parse_no_file(self, pkg_with_reqs_txt): + parser = RequirementsTxtParser(pkg_with_reqs_txt / "foo") + found = parser.parse() + assert len(found) == 0 diff --git a/tests/project_analysis/file_parsers/test_setup_cfg_file_parser.py b/tests/project_analysis/file_parsers/test_setup_cfg_file_parser.py new file mode 100644 index 00000000..11a54490 --- /dev/null +++ b/tests/project_analysis/file_parsers/test_setup_cfg_file_parser.py @@ -0,0 +1,49 @@ +# pylint: disable=redefined-outer-name +import pytest +from codemodder.project_analysis.file_parsers import SetupCfgParser + + +@pytest.fixture(scope="module") +def pkg_with_setup_cfg(tmp_path_factory): + base_dir = tmp_path_factory.mktemp("foo") + setup_cfg = base_dir / "setup.cfg" + cfg = """\ + [metadata] + name = test pkg + version = 123 + author = Some Name + author_email = idk@gmail.com + description = My package description + license = BSD-3-Clause + classifiers = + Framework :: Django + Programming Language :: Python :: 3 + + [options] + zip_safe = False + include_package_data = True + packages = find: + python_requires = >=3.7 + install_requires = + requests + importlib-metadata; python_version<"3.8" + """ + setup_cfg.write_text(cfg) + return base_dir + + +class TestSetupCfgParser: + def test_parse(self, pkg_with_setup_cfg): + parser = SetupCfgParser(pkg_with_setup_cfg) + found = parser.parse() + assert len(found) == 1 + store = found[0] + assert store.type == "setup.cfg" + assert store.file == str(pkg_with_setup_cfg / parser.file_name) + assert store.py_versions == [">=3.7"] + assert len(store.dependencies) == 2 + + def test_parse_no_file(self, pkg_with_setup_cfg): + parser = SetupCfgParser(pkg_with_setup_cfg / "foo") + found = parser.parse() + assert len(found) == 0 diff --git a/tests/project_analysis/file_parsers/test_setup_py_file_parser.py b/tests/project_analysis/file_parsers/test_setup_py_file_parser.py new file mode 100644 index 00000000..bff062c7 --- /dev/null +++ b/tests/project_analysis/file_parsers/test_setup_py_file_parser.py @@ -0,0 +1,56 @@ +# pylint: disable=redefined-outer-name +import pytest +from codemodder.project_analysis.file_parsers import SetupPyParser + + +@pytest.fixture(scope="module") +def pkg_with_setup_py(tmp_path_factory): + base_dir = tmp_path_factory.mktemp("foo") + setup_py = base_dir / "setup.py" + data = """\ +# -*- coding: utf-8 -*- +# a comment +from sys import platform, version_info + +root_dir = path.abspath(path.dirname(__file__)) + +print(root_dir) + +setup( + name="test pkg", + description="testing", + long_description=read("README.md"), + # The project's main homepage. + # Author details + author="Pixee", + packages=find_packages("src"), + package_dir={"": "src"}, + python_requires=">3.6", + install_requires=[ + "protobuf>=3.12,<3.18; python_version < '3'", + "protobuf>=3.12,<4; python_version >= '3'", + "psutil>=5.7,<6", + "requests>=2.4.2,<3", + ], + entry_points={}, +) + """ + setup_py.write_text(data) + return base_dir + + +class TestSetupPyParser: + def test_parse(self, pkg_with_setup_py): + parser = SetupPyParser(pkg_with_setup_py) + found = parser.parse() + assert len(found) == 1 + store = found[0] + assert store.type == "setup.py" + assert store.file == str(pkg_with_setup_py / parser.file_name) + assert store.py_versions == [">3.6"] + assert len(store.dependencies) == 4 + + def test_parse_no_file(self, pkg_with_setup_py): + parser = SetupPyParser(pkg_with_setup_py / "foo") + found = parser.parse() + assert len(found) == 0 diff --git a/tests/project_analysis/test_python_repo_manager.py b/tests/project_analysis/test_python_repo_manager.py new file mode 100644 index 00000000..41730385 --- /dev/null +++ b/tests/project_analysis/test_python_repo_manager.py @@ -0,0 +1,8 @@ +from codemodder.project_analysis.python_repo_manager import PythonRepoManager + + +class TestPythonRepoManager: + def test_package_stores(self, pkg_with_reqs_txt): + rm = PythonRepoManager(pkg_with_reqs_txt) + stores = rm.package_stores + assert len(stores) == 1