build: add dependency dashboard script

openedx · Jan 10, 2024 · 87d8c61 · 87d8c61
1 parent 545461c
commit 87d8c61
Show file tree

Hide file tree

Showing 6 changed files with 217 additions and 8 deletions.
diff --git a/repo_health/check_python_support_releases.py b/repo_health/check_python_support_releases.py
@@ -27,13 +27,13 @@ def fixture_repo_release_tags(repo_path):
 @pytest.mark.py_dependency_health
 def check_python_support_releases(repo_release_tags, all_results, repo_path):
     """
-    Check to see the python version releases for 3.8, 3.9, 3.10, 3.11
+    Check to see the python version releases for 3.8, 3.9, 3.10, 3.11, 3.12
     """
     if not repo_release_tags:
         all_results[MODULE_DICT_KEY] = {}
         print("There is not tag found")
         return
-    python_versions = ['3.8', '3.9', '3.10', '3.11']
+    python_versions = ['3.8', '3.9', '3.10', '3.11', '3.12']
     all_results[MODULE_DICT_KEY] = {}
     desc_tags_list = list(reversed(repo_release_tags))
     for version in python_versions:

diff --git a/repo_health/check_setup_py.py b/repo_health/check_setup_py.py
@@ -87,7 +87,7 @@ def check_repo_url(setup_py, setup_cfg, all_results):
     cfg_urls = re.findall(r"""(?m)^url\s*=\s*(\S+)""", setup_cfg)
     urls = py_urls + cfg_urls
     if urls:
-        assert len(urls) == 1
+        assert len(urls) > 0
         all_results[module_dict_key]["repo_url"] = urls[0]
 
 

diff --git a/repo_health/utils.py b/repo_health/utils.py
@@ -126,7 +126,8 @@ def get_release_tags(repo_dir):
     try:
         subprocess.run(['git', 'fetch', '--tags'], cwd=repo_dir, check=True)
         git_tags = subprocess.check_output(['git', 'tag', '--sort=version:refname'], cwd=repo_dir, text=True)
-        all_tags_list = git_tags.strip().split('\n')
+        # Filtering out empty strings or non-trivial values
+        all_tags_list = [tag for tag in git_tags.strip().split('\n') if tag.strip()]
         latest_tag = get_latest_release_tag(repo_dir)
 
         if not latest_tag and len(all_tags_list):

diff --git a/repo_health_dashboard/dependencies_configuration.yaml b/repo_health_dashboard/dependencies_configuration.yaml
@@ -0,0 +1,12 @@
+py_dependency_health:
+    check_order:
+        - python.3.8
+        - python.3.9
+        - python.3.10
+        - python.3.11
+        - django.has_django
+        - django.4.0
+        - django.4.1
+        - django.4.2
+    key_aliases:
+        django.has_django: has_django
diff --git a/repo_health_dashboard/repo_health_dashboard.py b/repo_health_dashboard/repo_health_dashboard.py
@@ -18,6 +18,12 @@ def main():
     Create basic dashboard
     """
     parser = argparse.ArgumentParser(description="Create basic dashboard")
+    parser.add_argument(
+        "--dashboard-name",
+        help="name of dashboard to trigger e.g. repo_health, dependencies_health",
+        dest="dashboard_name",
+        default="repo_health",
+    )
     parser.add_argument(
         "--data-dir",
         help="location of where data yaml files are located",
@@ -53,8 +59,9 @@ def main():
     )
     args = parser.parse_args()
     # collect configurations if they were input
+    configuration_name = "py_dependency_health" if args.dashboard_name == 'py_dependency_health' else "main"
     configurations = {
-        "main": {"check_order": [], "repo_name_order": [], "key_aliases": {}}
+        configuration_name: {"check_order": [], "repo_name_order": [], "key_aliases": {}}
     }
     if args.configuration:
         with codecs.open(args.configuration, "r", "utf-8") as f:
@@ -65,7 +72,8 @@ def main():
                 configurations[sheet] = utils.get_sheets(parsed_file_data, sheet)
 
     data_dir = os.path.abspath(args.data_dir)
-    files = glob.glob(os.path.join(data_dir, "*/*.yaml"), recursive=False)
+    data_files_pattern = "*/*.yaml" if args.dashboard_name == "repo_health" else "*.yaml"
+    files = glob.glob(os.path.join(data_dir, data_files_pattern), recursive=False)
     data = {}
     for file_path in files:
         file_name = file_path[file_path.rfind("/") + 1:]
@@ -89,8 +97,9 @@ def main():
         utils.write_squashed_metadata_to_csv(
             output, args.output_csv + "_" + key, configuration, args.append
         )
-        utils.write_squashed_metadata_to_sqlite(
-            output, f"dashboard_{key}", configuration, args.output_sqlite)
+        if args.dashboard_name == "repo_health":
+            utils.write_squashed_metadata_to_sqlite(
+                output, f"dashboard_{key}", configuration, args.output_sqlite)
 
 
 if __name__ == "__main__":

diff --git a/scripts/dependencies-health-script.sh b/scripts/dependencies-health-script.sh
@@ -0,0 +1,187 @@
+#!/bin/bash
+set -e -v
+
+# Click requires this to work cause it interfaces weirdly with python 3 ASCII default
+export LC_ALL=C.UTF-8
+export LANG=C.UTF-8
+
+WORKSPACE=$PWD
+
+# If the REPORT_DATE variable is set and not an empty string parse the date to standardize it.
+if [[ -n $REPORT_DATE ]]; then 
+    REPORT_DATE=$(date '+%Y-%m-%d' -d "$REPORT_DATE")
+fi
+
+###############################################################
+# Get list of dependencies repos from the dependencies_urls.csv
+###############################################################
+
+cd "$WORKSPACE"
+touch "repositories.txt"
+# Extract source column (assuming CSV is comma-separated)
+source_column=$(tail -n +3 "${WORKSPACE}/dashboards/dependencies_urls.csv" | cut -d ',' -f 2)
+
+# Filter out non-HTTP and non-HTTPS URLs using grep
+filtered_urls=$(echo "$source_column" | grep -E '^(http|https)://')
+
+# Save the filtered URLs to repo_urls.txt
+echo "$filtered_urls" > "repositories.txt"
+
+#########################################
+# Run dependencies checks on repositories
+#########################################
+
+# Install checks and dashboarding script, this should also install pytest-repo-health
+pip-sync -q edx-repo-health/requirements/base.txt
+pip install -q -e edx-repo-health
+
+# data destination folder setup
+
+METADATA_FILE_DIST="docs/checks_metadata.yaml"
+
+failed_repos=()
+
+OUTPUT_FILE_POSTFIX="_repo_health.yaml"
+
+# Git clone each dependency repo and run checks on it
+input="repositories.txt"
+while IFS= read -r line; do
+    cd "$WORKSPACE"
+    if [[ "${line}" =~ ^(git@github\.com:|https://github\.com/)([a-zA-Z0-9_.-]+?)/([a-zA-Z0-9_.-]+?)$ ]]; then
+        ORG_NAME="${BASH_REMATCH[2]}"
+        REPO_NAME="${BASH_REMATCH[3]}"
+        # Check if REPO_NAME ends with .git and remove it if it does as we need proper name only
+        if [[ "${REPO_NAME}" == *.git ]]; then
+            REPO_NAME="${REPO_NAME%.git}"
+        fi
+        FULL_NAME="${ORG_NAME}/${REPO_NAME}"
+    else
+        echo "Skipping <${line}>: Could not recognize as a GitHub URL in order to extract org and repo name."
+        continue
+    fi
+
+    if [[ "${REPO_NAME}" = "edx-repo-health" ]]; then
+        echo "Skipping <${line}>: edx-repo-health"
+        continue
+    fi
+
+    if [[ -n "${ONLY_CHECK_THIS_REPOSITORY}" && "${FULL_NAME}" != "${ONLY_CHECK_THIS_REPOSITORY}" ]]; then
+        echo "Skipping <${line}>: ONLY_CHECK_THIS_REPOSITORY was set, and does not match"
+        continue
+    fi
+
+    echo "Processing repo: ${FULL_NAME}"
+
+    rm -rf target-repo
+    if ! git clone -- "${line/https:\/\//https:\/\/$GITHUB_TOKEN@}" target-repo; then
+        failed_repos+=("$FULL_NAME")
+        continue
+    fi
+
+    echo "Cloned repo: ${FULL_NAME}"
+    cd target-repo
+    echo "Stepping into target-repo"
+    # If the REPORT_DATE variable is set and not an empty string.
+    if [[ -n $REPORT_DATE ]]; then
+        # If a specific date is given for report
+        FIRST_COMMIT=$(git log --reverse --format="format:%ci" | sed -n 1p)
+        if [[ $REPORT_DATE > ${FIRST_COMMIT:0:10} ]]; then
+            git checkout "$(git rev-list -n 1 --before="${REPORT_DATE} 00:00" master)"
+        else
+            echo "${REPO_NAME} doesn't have any commits prior to ${REPORT_DATE}"
+            failed_repos+=("$FULL_NAME")
+            continue
+        fi
+    fi
+
+    cd "$WORKSPACE"
+    DEPENDENCIES_DATA_DIR="dependencies_health_data"
+    # make sure destination folder exists
+    mkdir -p "$DEPENDENCIES_DATA_DIR"
+
+    OUTPUT_FILE_NAME="${REPO_NAME}${OUTPUT_FILE_POSTFIX}"
+
+    DEPENDENCIES_HEALTH_COMMAND() {
+        pytest -m py_dependency_health --repo-health \
+            --repo-health-path "edx-repo-health/repo_health" \
+            --repo-path "target-repo" \
+            --repo-health-metadata "${METADATA_FILE_DIST}" \
+            --output-path "${DEPENDENCIES_DATA_DIR}/${OUTPUT_FILE_NAME}" \
+            -o log_cli=true --exitfirst --noconftest -v -c /dev/null
+    }
+
+    if DEPENDENCIES_HEALTH_COMMAND; then
+        true
+    elif DEPENDENCIES_HEALTH_COMMAND; then
+        # rerun the same command if it fails once
+        true
+    else
+        failed_repos+=("$FULL_NAME")
+        continue
+    fi
+
+done < "$input"
+
+##############################
+# Recalculate aggregated data.
+##############################
+
+# Go into data repo, recalculate aggregate data, and push a PR
+IFS=,
+failed_repo_names=$(echo "${failed_repos[*]}")
+
+echo "Pushing data"
+cd "${WORKSPACE}/dependencies_health_data"
+repo_health_dashboard --data-dir . --configuration "${WORKSPACE}/edx-repo-health/repo_health_dashboard/dependencies_configuration.yaml" \
+    --output-csv "${WORKSPACE}/dashboards/dashboard" --dashboard-name "py_dependency_health"
+
+cd "${WORKSPACE}"
+# Only commit the data if running with master and no REPORT_DATE is set.
+if [[ ${EDX_REPO_HEALTH_BRANCH} == 'master' && -z ${REPORT_DATE} ]]; then
+    ###########################################
+    # Commit files and push to repo-health-data
+    ###########################################
+    echo "Commit new files and push to master..."
+
+    commit_message="chore: Update repo health data files"
+
+    cd "${WORKSPACE}"
+
+    if [[ ${#failed_repos[@]} -ne 0 ]]; then
+        commit_message+="\nFollowing repos failed repo health checks\n ${failed_repo_names}"
+
+        for full_name in "${failed_repos[@]}"; do
+            OUTPUT_FILE_NAME="${full_name}${OUTPUT_FILE_POSTFIX}"
+            echo "reverting repo health data for ${OUTPUT_FILE_NAME}"
+            git checkout -- "${WORKSPACE}/dependencies_health_data/${OUTPUT_FILE_NAME}"
+        done
+    fi
+
+    git checkout master
+    if git diff-index --quiet HEAD; then
+        # No changes found in the working directory
+        echo "No changes to commit"
+    else
+        # Changes found in the working directory
+        git add dashboards
+        git add dependencies_health_data
+        git config --global user.name "Repo Health Bot"
+        git config --global user.email "${GITHUB_USER_EMAIL}"
+        git commit -m "${commit_message}"
+        git push origin master
+    fi
+fi
+
+if [[ ${#failed_repos[@]} -ne 0 ]]; then
+    echo
+    echo
+    echo "TLDR Runbook(More detailed runbook: https://openedx.atlassian.net/wiki/spaces/AT/pages/3229057351/Repo+Health+Runbook ):"
+    echo "  To resolve, search the console output for 'ERRORS' (without the quotes), or search for any"
+    echo "  of the failed repo names listed below."
+    echo "The following repositories failed while executing pytest dependencies-health scripts causing the job to fail:"
+    echo
+    echo "    ${failed_repos[*]}"
+    echo
+    echo
+    exit 1
+fi