Skip to content

Commit

Permalink
chore: add script to find python dependencies
Browse files Browse the repository at this point in the history
  • Loading branch information
salman2013 committed Apr 24, 2024
1 parent b14cbaa commit c7dd50b
Showing 1 changed file with 94 additions and 0 deletions.
94 changes: 94 additions & 0 deletions edx_repo_tools/find_dependencies/find_python_dependencies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""
Spider and catalog dependencies.
$ python find_python_dependencies.py $FILE_PATH
"""

import json
import os
import requirements
import sys
from pathlib import Path
import requests


# The first of these we find is the requirements file we'll examine:
PY_REQS = [
"requirements/edx/base.txt",
"requirements/base.txt",
"requirements.txt",
]

# Files that indicate a repo is a Python project:
PY_INDICATORS = [
"setup.py",
"setup.cfg",
"pyproject.toml",
]

def find_py_reqs():
"""Find the Python requirements file to use."""
for fname in PY_REQS:
possible_req = Path(fname)
if possible_req.exists():
return possible_req
if any(Path(ind).exists() for ind in PY_INDICATORS):
print(f"WARNING: {os.getcwd()} is likely a Python package, but we can't find its dependencies.")
return None

def request_package_info_url(package):
base_url = "https://pypi.org/pypi/"
url = f"{base_url}{package}/json"
response = requests.get(url)
if response.status_code == 200:
data_dict = response.json()
info = data_dict["info"]
home_page = info["home_page"]
return home_page
else:
print(f"Failed to retrieve data for package {package}. Status code:", response.status_code)

FIRST_PARTY_ORGS = ["openedx"]

SECOND_PARTY_ORGS = [
"edx", "edx-unsupported", "edx-solutions",
"mitodl",
"overhangio",
"open-craft", "eduNEXT", "raccoongang",
]

def urls_in_orgs(urls, orgs):
"""
Find urls that are in any of the `orgs`.
"""
return sorted(
url for url in urls
if any(f"/{org}/" in url for org in orgs)
)

def main(dirs=None, org=None):
"""
Analyze the requirements in all of the directories mentioned on the command line.
If arguments have newlines, treat each line as a separate directory.
"""
packages_url = []
if dirs is None:
repo_dir = sys.argv[1]

with open(f'{repo_dir}/base.txt') as fbase:
# Read each line (package name) in the file
for req in requirements.parse(fbase):
print(req.name)
home_page = request_package_info_url(req.name)
if home_page is not None:
if match := urls_in_orgs([home_page], SECOND_PARTY_ORGS):
packages_url.append(home_page)

print("== DONE ==============")
print("Second-party:")
print("\n".join(packages_url))

if packages_url:
sys.exit(1)

if __name__ == "__main__":
main()

0 comments on commit c7dd50b

Please sign in to comment.