Skip to content

Commit

Permalink
build: add dependency dashboard script
Browse files Browse the repository at this point in the history
  • Loading branch information
UsamaSadiq committed Jan 10, 2024
1 parent 545461c commit 87d8c61
Show file tree
Hide file tree
Showing 6 changed files with 217 additions and 8 deletions.
4 changes: 2 additions & 2 deletions repo_health/check_python_support_releases.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@ def fixture_repo_release_tags(repo_path):
@pytest.mark.py_dependency_health
def check_python_support_releases(repo_release_tags, all_results, repo_path):
"""
Check to see the python version releases for 3.8, 3.9, 3.10, 3.11
Check to see the python version releases for 3.8, 3.9, 3.10, 3.11, 3.12
"""
if not repo_release_tags:
all_results[MODULE_DICT_KEY] = {}
print("There is not tag found")
return
python_versions = ['3.8', '3.9', '3.10', '3.11']
python_versions = ['3.8', '3.9', '3.10', '3.11', '3.12']

Check warning on line 36 in repo_health/check_python_support_releases.py

View check run for this annotation

Codecov / codecov/patch

repo_health/check_python_support_releases.py#L36

Added line #L36 was not covered by tests
all_results[MODULE_DICT_KEY] = {}
desc_tags_list = list(reversed(repo_release_tags))
for version in python_versions:
Expand Down
2 changes: 1 addition & 1 deletion repo_health/check_setup_py.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def check_repo_url(setup_py, setup_cfg, all_results):
cfg_urls = re.findall(r"""(?m)^url\s*=\s*(\S+)""", setup_cfg)
urls = py_urls + cfg_urls
if urls:
assert len(urls) == 1
assert len(urls) > 0
all_results[module_dict_key]["repo_url"] = urls[0]


Expand Down
3 changes: 2 additions & 1 deletion repo_health/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,8 @@ def get_release_tags(repo_dir):
try:
subprocess.run(['git', 'fetch', '--tags'], cwd=repo_dir, check=True)
git_tags = subprocess.check_output(['git', 'tag', '--sort=version:refname'], cwd=repo_dir, text=True)
all_tags_list = git_tags.strip().split('\n')
# Filtering out empty strings or non-trivial values
all_tags_list = [tag for tag in git_tags.strip().split('\n') if tag.strip()]

Check warning on line 130 in repo_health/utils.py

View check run for this annotation

Codecov / codecov/patch

repo_health/utils.py#L130

Added line #L130 was not covered by tests
latest_tag = get_latest_release_tag(repo_dir)

if not latest_tag and len(all_tags_list):
Expand Down
12 changes: 12 additions & 0 deletions repo_health_dashboard/dependencies_configuration.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
py_dependency_health:
check_order:
- python.3.8
- python.3.9
- python.3.10
- python.3.11
- django.has_django
- django.4.0
- django.4.1
- django.4.2
key_aliases:
django.has_django: has_django
17 changes: 13 additions & 4 deletions repo_health_dashboard/repo_health_dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ def main():
Create basic dashboard
"""
parser = argparse.ArgumentParser(description="Create basic dashboard")
parser.add_argument(
"--dashboard-name",
help="name of dashboard to trigger e.g. repo_health, dependencies_health",
dest="dashboard_name",
default="repo_health",
)
parser.add_argument(
"--data-dir",
help="location of where data yaml files are located",
Expand Down Expand Up @@ -53,8 +59,9 @@ def main():
)
args = parser.parse_args()
# collect configurations if they were input
configuration_name = "py_dependency_health" if args.dashboard_name == 'py_dependency_health' else "main"
configurations = {
"main": {"check_order": [], "repo_name_order": [], "key_aliases": {}}
configuration_name: {"check_order": [], "repo_name_order": [], "key_aliases": {}}
}
if args.configuration:
with codecs.open(args.configuration, "r", "utf-8") as f:
Expand All @@ -65,7 +72,8 @@ def main():
configurations[sheet] = utils.get_sheets(parsed_file_data, sheet)

data_dir = os.path.abspath(args.data_dir)
files = glob.glob(os.path.join(data_dir, "*/*.yaml"), recursive=False)
data_files_pattern = "*/*.yaml" if args.dashboard_name == "repo_health" else "*.yaml"
files = glob.glob(os.path.join(data_dir, data_files_pattern), recursive=False)
data = {}
for file_path in files:
file_name = file_path[file_path.rfind("/") + 1:]
Expand All @@ -89,8 +97,9 @@ def main():
utils.write_squashed_metadata_to_csv(
output, args.output_csv + "_" + key, configuration, args.append
)
utils.write_squashed_metadata_to_sqlite(
output, f"dashboard_{key}", configuration, args.output_sqlite)
if args.dashboard_name == "repo_health":
utils.write_squashed_metadata_to_sqlite(
output, f"dashboard_{key}", configuration, args.output_sqlite)


if __name__ == "__main__":
Expand Down
187 changes: 187 additions & 0 deletions scripts/dependencies-health-script.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
#!/bin/bash
set -e -v

# Click requires this to work cause it interfaces weirdly with python 3 ASCII default
export LC_ALL=C.UTF-8
export LANG=C.UTF-8

WORKSPACE=$PWD

# If the REPORT_DATE variable is set and not an empty string parse the date to standardize it.
if [[ -n $REPORT_DATE ]]; then
REPORT_DATE=$(date '+%Y-%m-%d' -d "$REPORT_DATE")
fi

###############################################################
# Get list of dependencies repos from the dependencies_urls.csv
###############################################################

cd "$WORKSPACE"
touch "repositories.txt"
# Extract source column (assuming CSV is comma-separated)
source_column=$(tail -n +3 "${WORKSPACE}/dashboards/dependencies_urls.csv" | cut -d ',' -f 2)

# Filter out non-HTTP and non-HTTPS URLs using grep
filtered_urls=$(echo "$source_column" | grep -E '^(http|https)://')

# Save the filtered URLs to repo_urls.txt
echo "$filtered_urls" > "repositories.txt"

#########################################
# Run dependencies checks on repositories
#########################################

# Install checks and dashboarding script, this should also install pytest-repo-health
pip-sync -q edx-repo-health/requirements/base.txt
pip install -q -e edx-repo-health

# data destination folder setup

METADATA_FILE_DIST="docs/checks_metadata.yaml"

failed_repos=()

OUTPUT_FILE_POSTFIX="_repo_health.yaml"

# Git clone each dependency repo and run checks on it
input="repositories.txt"
while IFS= read -r line; do
cd "$WORKSPACE"
if [[ "${line}" =~ ^(git@github\.com:|https://github\.com/)([a-zA-Z0-9_.-]+?)/([a-zA-Z0-9_.-]+?)$ ]]; then
ORG_NAME="${BASH_REMATCH[2]}"
REPO_NAME="${BASH_REMATCH[3]}"
# Check if REPO_NAME ends with .git and remove it if it does as we need proper name only
if [[ "${REPO_NAME}" == *.git ]]; then
REPO_NAME="${REPO_NAME%.git}"
fi
FULL_NAME="${ORG_NAME}/${REPO_NAME}"
else
echo "Skipping <${line}>: Could not recognize as a GitHub URL in order to extract org and repo name."
continue
fi

if [[ "${REPO_NAME}" = "edx-repo-health" ]]; then
echo "Skipping <${line}>: edx-repo-health"
continue
fi

if [[ -n "${ONLY_CHECK_THIS_REPOSITORY}" && "${FULL_NAME}" != "${ONLY_CHECK_THIS_REPOSITORY}" ]]; then
echo "Skipping <${line}>: ONLY_CHECK_THIS_REPOSITORY was set, and does not match"
continue
fi

echo "Processing repo: ${FULL_NAME}"

rm -rf target-repo
if ! git clone -- "${line/https:\/\//https:\/\/$GITHUB_TOKEN@}" target-repo; then
failed_repos+=("$FULL_NAME")
continue
fi

echo "Cloned repo: ${FULL_NAME}"
cd target-repo
echo "Stepping into target-repo"
# If the REPORT_DATE variable is set and not an empty string.
if [[ -n $REPORT_DATE ]]; then
# If a specific date is given for report
FIRST_COMMIT=$(git log --reverse --format="format:%ci" | sed -n 1p)
if [[ $REPORT_DATE > ${FIRST_COMMIT:0:10} ]]; then
git checkout "$(git rev-list -n 1 --before="${REPORT_DATE} 00:00" master)"
else
echo "${REPO_NAME} doesn't have any commits prior to ${REPORT_DATE}"
failed_repos+=("$FULL_NAME")
continue
fi
fi

cd "$WORKSPACE"
DEPENDENCIES_DATA_DIR="dependencies_health_data"
# make sure destination folder exists
mkdir -p "$DEPENDENCIES_DATA_DIR"

OUTPUT_FILE_NAME="${REPO_NAME}${OUTPUT_FILE_POSTFIX}"

DEPENDENCIES_HEALTH_COMMAND() {
pytest -m py_dependency_health --repo-health \
--repo-health-path "edx-repo-health/repo_health" \
--repo-path "target-repo" \
--repo-health-metadata "${METADATA_FILE_DIST}" \
--output-path "${DEPENDENCIES_DATA_DIR}/${OUTPUT_FILE_NAME}" \
-o log_cli=true --exitfirst --noconftest -v -c /dev/null
}

if DEPENDENCIES_HEALTH_COMMAND; then
true
elif DEPENDENCIES_HEALTH_COMMAND; then
# rerun the same command if it fails once
true
else
failed_repos+=("$FULL_NAME")
continue
fi

done < "$input"

##############################
# Recalculate aggregated data.
##############################

# Go into data repo, recalculate aggregate data, and push a PR
IFS=,
failed_repo_names=$(echo "${failed_repos[*]}")

echo "Pushing data"
cd "${WORKSPACE}/dependencies_health_data"
repo_health_dashboard --data-dir . --configuration "${WORKSPACE}/edx-repo-health/repo_health_dashboard/dependencies_configuration.yaml" \
--output-csv "${WORKSPACE}/dashboards/dashboard" --dashboard-name "py_dependency_health"

cd "${WORKSPACE}"
# Only commit the data if running with master and no REPORT_DATE is set.
if [[ ${EDX_REPO_HEALTH_BRANCH} == 'master' && -z ${REPORT_DATE} ]]; then
###########################################
# Commit files and push to repo-health-data
###########################################
echo "Commit new files and push to master..."

commit_message="chore: Update repo health data files"

cd "${WORKSPACE}"

if [[ ${#failed_repos[@]} -ne 0 ]]; then
commit_message+="\nFollowing repos failed repo health checks\n ${failed_repo_names}"

for full_name in "${failed_repos[@]}"; do
OUTPUT_FILE_NAME="${full_name}${OUTPUT_FILE_POSTFIX}"
echo "reverting repo health data for ${OUTPUT_FILE_NAME}"
git checkout -- "${WORKSPACE}/dependencies_health_data/${OUTPUT_FILE_NAME}"
done
fi

git checkout master
if git diff-index --quiet HEAD; then
# No changes found in the working directory
echo "No changes to commit"
else
# Changes found in the working directory
git add dashboards
git add dependencies_health_data
git config --global user.name "Repo Health Bot"
git config --global user.email "${GITHUB_USER_EMAIL}"
git commit -m "${commit_message}"
git push origin master
fi
fi

if [[ ${#failed_repos[@]} -ne 0 ]]; then
echo
echo
echo "TLDR Runbook(More detailed runbook: https://openedx.atlassian.net/wiki/spaces/AT/pages/3229057351/Repo+Health+Runbook ):"
echo " To resolve, search the console output for 'ERRORS' (without the quotes), or search for any"
echo " of the failed repo names listed below."
echo "The following repositories failed while executing pytest dependencies-health scripts causing the job to fail:"
echo
echo " ${failed_repos[*]}"
echo
echo
exit 1
fi

0 comments on commit 87d8c61

Please sign in to comment.