Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restrict license checks to staged/committed files only #106

Merged
merged 2 commits into from
Nov 28, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 104 additions & 43 deletions scripts/pre-commit-license-check.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,57 +2,118 @@
#
# SPDX-License-Identifier: Apache-2.0
#!/usr/bin/env python3
import json
import os
import re
import subprocess
import sys
from pathlib import Path
from typing import List

REQUIRED_ELEMENTS = [r"Copyright.*Owners of https://github\.com/ag2ai", r"SPDX-License-Identifier: Apache-2\.0"]


def get_github_pr_files() -> List[Path]:
"""Get list of Python files changed in a GitHub PR."""
try:
if os.getenv("GITHUB_EVENT_PATH"):
with open(os.getenv("GITHUB_EVENT_PATH")) as f:
event = json.load(f)

# For pull requests, get changed files from the event payload
if os.getenv("GITHUB_EVENT_NAME") == "pull_request":
changed_files = []
for file in event.get("pull_request", {}).get("changed_files", []):
filename = file.get("filename", "")
if filename.endswith(".py"):
changed_files.append(Path(filename))
return changed_files

# For push events, use git diff
else:
result = subprocess.run(
["git", "diff", "--name-only", "HEAD^", "HEAD"], capture_output=True, text=True, check=True
)
return [Path(file) for file in result.stdout.splitlines() if file.endswith(".py")]
except Exception as e:
print(f"Error getting PR files: {e}")
return []


def get_staged_files() -> List[Path]:
"""Get list of staged Python files using git command."""
try:
result = subprocess.run(
["git", "diff", "--cached", "--name-only", "--diff-filter=AMR"], capture_output=True, text=True, check=True
)
files = result.stdout.splitlines()
return [Path(file) for file in files if file.endswith(".py")]
except subprocess.CalledProcessError as e:
print(f"Error getting staged files: {e}")
return []


def should_check_file(file_path: Path) -> bool:
# Skip __init__.py files
return file_path.name != "__init__.py"


def check_file_header(file_path):
with open(file_path, "r", encoding="utf-8") as f:
# Read first few lines of the file
content = f.read(500)

# Check if all required elements are present near the start of the file
missing_elements = []
for pattern in REQUIRED_ELEMENTS:
if not re.search(pattern, content[:500], re.IGNORECASE):
missing_elements.append(pattern)

return missing_elements


def main():
failed = False
for py_file in Path(".").rglob("*.py"):
if not should_check_file(py_file):
continue

missing_elements = check_file_header(py_file)
if missing_elements:
failed = True
print(f"\nIncomplete or missing license header in: {py_file}")
print(
"\nSee https://ag2ai.github.io/ag2/docs/contributor-guide/contributing/#license-headers for guidance."
)

"""
# For more detailed output:
print("Missing required elements:")
for element in missing_elements:
print(f" - {element}")
print("\nHeader should contain:")
print(" 1. Copyright notice with 'Owners of https://github.com/ag2ai'")
print(" 2. SPDX-License-Identifier: Apache-2.0")
"""

sys.exit(1 if failed else 0)
"""Skip __init__.py files and check if file exists."""
return file_path.name != "__init__.py" and file_path.exists()


def check_file_header(file_path: Path) -> List[str]:
"""Check if file has required license headers."""
try:
with open(file_path, "r", encoding="utf-8") as f:
content = f.read(500)
missing_elements = []
for pattern in REQUIRED_ELEMENTS:
if not re.search(pattern, content[:500], re.IGNORECASE):
missing_elements.append(pattern)
return missing_elements
except Exception as e:
print(f"Error processing file {file_path}: {e}")
return []


def get_files_to_check() -> List[Path]:
"""Determine which files to check based on environment."""
try:
if "--all-files" in sys.argv:
return list(Path(".").rglob("*.py"))

if os.getenv("GITHUB_ACTIONS") == "true":
return get_github_pr_files()

return get_staged_files()
except Exception as e:
print(f"Error getting files to check: {e}")
return []


def main() -> None:
"""Main function to check license headers."""
try:
failed = False
files_to_check = get_files_to_check()

if not files_to_check:
print("No Python files to check")
return

for py_file in files_to_check:
if not should_check_file(py_file):
continue

missing_elements = check_file_header(py_file)
if missing_elements:
failed = True
print(f"\nIncomplete or missing license header in: {py_file}")
print(
"\nSee https://ag2ai.github.io/ag2/docs/contributor-guide/contributing/#license-headers for guidance."
)

sys.exit(1 if failed else 0)
except Exception as e:
print(f"Error in main: {e}")
sys.exit(1)


if __name__ == "__main__":
Expand Down
Loading