Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement safer dependency parsing #159

Merged
merged 2 commits into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 15 additions & 16 deletions src/codemodder/project_analysis/file_parsers/base_parser.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,25 @@
from abc import ABC, abstractmethod

from pathlib import Path
from typing import List
from .package_store import PackageStore
from packaging.requirements import Requirement

from codemodder.logging import logger
from .package_store import FileType, PackageStore


class BaseParser(ABC):
parent_directory: Path

def __init__(self, parent_directory: Path):
self.parent_directory = parent_directory

@property
@abstractmethod
def file_type(self):
... # pragma: no cover

def _parse_dependencies(self, dependencies: List[str]):
return [
Requirement(line)
for x in dependencies
# Skip empty lines and comments
if (line := x.strip()) and not line.startswith("#")
]
def file_type(self) -> FileType:
pass

Check warning on line 18 in src/codemodder/project_analysis/file_parsers/base_parser.py

View check run for this annotation

Codecov / codecov/patch

src/codemodder/project_analysis/file_parsers/base_parser.py#L18

Added line #L18 was not covered by tests

@abstractmethod
def _parse_file(self, file: Path):
... # pragma: no cover
def _parse_file(self, file: Path) -> PackageStore | None:
pass

Check warning on line 22 in src/codemodder/project_analysis/file_parsers/base_parser.py

View check run for this annotation

Codecov / codecov/patch

src/codemodder/project_analysis/file_parsers/base_parser.py#L22

Added line #L22 was not covered by tests

def find_file_locations(self) -> List[Path]:
return list(Path(self.parent_directory).rglob(self.file_type.value))
Expand All @@ -37,7 +31,12 @@
stores = []
req_files = self.find_file_locations()
for file in req_files:
store = self._parse_file(file)
try:
store = self._parse_file(file)
except Exception as e:
logger.debug("Error parsing file: %s", file, exc_info=e)
continue

if store:
stores.append(store)
return stores
23 changes: 20 additions & 3 deletions src/codemodder/project_analysis/file_parsers/package_store.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from dataclasses import dataclass
from enum import Enum
from packaging.requirements import Requirement
from pathlib import Path

from codemodder.dependency import Requirement


class FileType(Enum):
Expand All @@ -10,9 +12,24 @@ class FileType(Enum):
SETUP_CFG = "setup.cfg"


@dataclass
@dataclass(init=False)
class PackageStore:
type: FileType
file: str
file: Path
dependencies: set[Requirement]
py_versions: list[str]

def __init__(
self,
type: FileType, # pylint: disable=redefined-builtin
file: Path,
dependencies: set[str | Requirement],
py_versions: list[str],
):
self.type = type
self.file = file
self.dependencies = {
dep if isinstance(dep, Requirement) else Requirement(dep)
for dep in dependencies
}
self.py_versions = py_versions
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from pathlib import Path

import toml

from codemodder.project_analysis.file_parsers.package_store import (
PackageStore,
FileType,
)
from pathlib import Path
import toml

from .base_parser import BaseParser


Expand All @@ -13,25 +14,18 @@
def file_type(self):
return FileType.TOML

def _parse_dependencies_from_toml(self, toml_data: dict):
# todo: handle cases for
# 1. no dependencies
return self._parse_dependencies(toml_data["project"]["dependencies"])
def _parse_file(self, file: Path) -> PackageStore | None:
data = toml.load(file)

def _parse_py_versions(self, toml_data: dict) -> list:
# todo: handle cases for
# 1. multiple requires-python such as "">3.5.2"", ">=3.11.1,<3.11.2"
maybe_project = toml_data.get("project")
maybe_python = maybe_project.get("requires-python") if maybe_project else None
return [maybe_python] if maybe_python else []
if not (project := data.get("project")):
return None

Check warning on line 21 in src/codemodder/project_analysis/file_parsers/pyproject_toml_file_parser.py

View check run for this annotation

Codecov / codecov/patch

src/codemodder/project_analysis/file_parsers/pyproject_toml_file_parser.py#L21

Added line #L21 was not covered by tests

def _parse_file(self, file: Path):
data = toml.load(file)
# todo: handle no "project" in data
dependencies = project.get("dependencies", [])
version = project.get("requires-python", None)

return PackageStore(
type=self.file_type,
file=str(file),
dependencies=set(self._parse_dependencies_from_toml(data)),
py_versions=self._parse_py_versions(data),
file=file,
dependencies=set(dependencies),
py_versions=[version] if version else [],
)
Original file line number Diff line number Diff line change
@@ -1,41 +1,41 @@
from typing import Optional
from pathlib import Path

import chardet

from packaging.requirements import InvalidRequirement
from codemodder.logging import logger
from codemodder.project_analysis.file_parsers.package_store import (
PackageStore,
FileType,
)
from pathlib import Path
from .base_parser import BaseParser
import chardet
from codemodder.logging import logger


class RequirementsTxtParser(BaseParser):
@property
def file_type(self):
return FileType.REQ_TXT

def _parse_file(self, file: Path) -> Optional[PackageStore]:
try:
with open(file, "rb") as f:
whole_file = f.read()
enc = chardet.detect(whole_file)
if enc["confidence"] > 0.9:
encoding = enc.get("encoding")
decoded = whole_file.decode(encoding.lower()) if encoding else ""
lines = decoded.splitlines() if decoded else []
else:
raise UnicodeError()
return PackageStore(
type=self.file_type,
file=str(file),
dependencies=set(self._parse_dependencies(lines)),
# requirements.txt files do not declare py versions explicitly
# though we could create a heuristic by analyzing each dependency
# and extracting py versions from them.
py_versions=[],
)
except (UnicodeError, OSError, InvalidRequirement):
logger.debug("Error parsing file: %s", file)
return None
def _parse_file(self, file: Path) -> PackageStore | None:
with open(file, "rb") as f:
whole_file = f.read()

enc = chardet.detect(whole_file)
if enc["confidence"] > 0.9:
encoding = enc.get("encoding")
decoded = whole_file.decode(encoding.lower()) if encoding else ""
lines = decoded.splitlines() if decoded else []
else:
logger.debug("Unknown encoding for file: %s", file)
return None

dependencies = set(line.strip() for line in lines if not line.startswith("#"))

return PackageStore(
type=self.file_type,
file=file,
dependencies=dependencies,
# requirements.txt files do not declare py versions explicitly
# though we could create a heuristic by analyzing each dependency
# and extracting py versions from them.
py_versions=[],
)
Original file line number Diff line number Diff line change
Expand Up @@ -13,28 +13,19 @@
def file_type(self):
return FileType.SETUP_CFG

def _parse_dependencies_from_cfg(self, config: configparser.ConfigParser):
# todo: handle cases for
# 1. no dependencies, no options dict
# setup_requires, tests_require, extras_require
dependency_lines = config["options"]["install_requires"].split("\n")
return self._parse_dependencies(dependency_lines)

def _parse_py_versions(self, config: configparser.ConfigParser):
# todo: handle cases for
# 1. no options/ no requires-python
# 2. various requires-python such as "">3.5.2"", ">=3.11.1,<3.11.2"
return [config["options"]["python_requires"]]

def _parse_file(self, file: Path):
def _parse_file(self, file: Path) -> PackageStore | None:
config = configparser.ConfigParser()
config.read(file)

# todo: handle no config, no "options" in config
if not (options := config["options"]):
return None

Check warning on line 21 in src/codemodder/project_analysis/file_parsers/setup_cfg_file_parser.py

View check run for this annotation

Codecov / codecov/patch

src/codemodder/project_analysis/file_parsers/setup_cfg_file_parser.py#L21

Added line #L21 was not covered by tests

dependency_lines = options.get("install_requires", "").split("\n")
python_requires = options.get("python_requires", "")

return PackageStore(
type=self.file_type,
file=str(file),
dependencies=set(self._parse_dependencies_from_cfg(config)),
py_versions=self._parse_py_versions(config),
file=file,
dependencies=set(line for line in dependency_lines if line),
py_versions=[python_requires] if python_requires else [],
)
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,6 @@
from codemodder.utils.utils import clean_simplestring
from pathlib import Path
import libcst as cst
from libcst import matchers
from packaging.requirements import Requirement
from typing import Optional

from .base_parser import BaseParser

Expand All @@ -17,67 +14,59 @@ class SetupPyParser(BaseParser):
def file_type(self):
return FileType.SETUP_PY

def _parse_dependencies(self, dependencies):
return [
Requirement(line)
for x in dependencies
# Skip empty lines and comments
if (line := clean_simplestring(x.value)) and not line.startswith("#")
]

def _parse_dependencies_from_cst(self, cst_dependencies: Optional[list]):
return self._parse_dependencies(cst_dependencies) if cst_dependencies else []

def _parse_py_versions(self, version_str):
# todo: handle for multiple versions
return [clean_simplestring(version_str)]
def _parse_file(self, file: Path) -> PackageStore | None:
with open(file, "r", encoding="utf8") as f:
module = cst.parse_module(f.read())

def _parse_file(self, file: Path):
visitor = SetupCallVisitor()
with open(str(file), "r", encoding="utf-8") as f:
# todo: handle failure in parsing
module = cst.parse_module(f.read())
module.visit(visitor)

# todo: handle no python_requires, install_requires

return PackageStore(
type=self.file_type,
file=str(file),
dependencies=set(
self._parse_dependencies_from_cst(visitor.install_requires)
),
py_versions=self._parse_py_versions(visitor.python_requires),
file=file,
dependencies=set(visitor.install_requires),
py_versions=visitor.python_requires,
)


class SetupCallVisitor(cst.CSTVisitor):
python_requires: list[str]
install_requires: list[str]

def __init__(self):
self.python_requires = None
self.install_requires = None
# todo setup_requires, tests_require, extras_require
self.python_requires = []
self.install_requires = []
# TODO: setup_requires, tests_require, extras_require

def visit_Call(self, node: cst.Call) -> None:
# todo: only handle setup from setuptools, not others tho unlikely
if matchers.matches(node.func, cst.Name(value="setup")):
visitor = SetupArgVisitor()
node.visit(visitor)
self.python_requires = visitor.python_requires
self.install_requires = visitor.install_requires
# TODO: only handle setup from setuptools, not others tho unlikely
match node.func:
case cst.Name(value="setup"):
visitor = SetupArgVisitor()
node.visit(visitor)
self.python_requires.extend(visitor.python_requires)
self.install_requires.extend(visitor.install_requires)


class SetupArgVisitor(cst.CSTVisitor):
python_requires: list[str]
install_requires: list[str]

def __init__(self):
self.python_requires = None
self.install_requires = None
self.python_requires = []
self.install_requires = []

def visit_Arg(self, node: cst.Arg) -> None:
if matchers.matches(node.keyword, matchers.Name(value="python_requires")):
# todo: this works for `python_requires=">=3.7",` but what about
# a list of versions?
self.python_requires = node.value.value
if matchers.matches(
node.keyword, matchers.Name(value="install_requires")
) and matchers.matches(node.value, matchers.List()):
# todo: if node.value is Name node, find requirements in the variable at node.value
self.install_requires = node.value.elements
match node.keyword, node.value:
case cst.Name(value="python_requires"), cst.SimpleString() as string_node:
# TODO: this works for `python_requires=">=3.7",` but what about a list of versions?
self.python_requires.append(clean_simplestring(string_node.value))
case cst.Name(value="install_requires"), cst.List() as list_node:
for elm in list_node.elements:
match elm:
case cst.Element(value=cst.SimpleString() as string_node):
self.install_requires.append(
clean_simplestring(string_node.value)
)
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def test_parse(self, pkg_with_pyproject_toml):
assert len(found) == 1
store = found[0]
assert store.type.value == "pyproject.toml"
assert store.file == str(pkg_with_pyproject_toml / parser.file_type.value)
assert store.file == pkg_with_pyproject_toml / parser.file_type.value
assert store.py_versions == [">=3.10.0"]
assert len(store.dependencies) == 6

Expand All @@ -66,13 +66,20 @@ def test_parse_no_python(self, pkg_with_pyproject_toml_no_python):
assert len(found) == 1
store = found[0]
assert store.type.value == "pyproject.toml"
assert store.file == str(
pkg_with_pyproject_toml_no_python / parser.file_type.value
)
assert store.file == pkg_with_pyproject_toml_no_python / parser.file_type.value
assert store.py_versions == []
assert len(store.dependencies) == 1

def test_parse_no_file(self, pkg_with_pyproject_toml):
parser = PyprojectTomlParser(pkg_with_pyproject_toml / "foo")
found = parser.parse()
assert len(found) == 0

def test_parser_error(self, pkg_with_pyproject_toml, mocker):
mocker.patch(
"codemodder.project_analysis.file_parsers.pyproject_toml_file_parser.toml.load",
side_effect=Exception,
)
parser = PyprojectTomlParser(pkg_with_pyproject_toml)
found = parser.parse()
assert len(found) == 0
Loading
Loading