From 2088af68c90a7ed2f1c35a8d3adcf097618e15c9 Mon Sep 17 00:00:00 2001 From: Jeremy Bowman Date: Fri, 22 Dec 2023 11:44:58 -0500 Subject: [PATCH] feat: Improved streamlit & console dashboards Improved the streamlit version of the dashboard to be essentially a superset of the console dashboard, with a shared configuration. A major documentation update will be needed soon, but for now I'm getting this up for people to start trying out. --- Makefile | 20 +++- .../console_dashboard_config.yaml | 17 ++++ requirements/constraints.txt | 3 + requirements/dev.in | 4 +- requirements/dev.txt | 95 ++++++++++++++++++- requirements/doc.txt | 6 +- requirements/pip-tools.txt | 6 +- scripts/console_dashboard.py | 3 +- scripts/streamlit_dashboard.py | 89 +++++++++++++++-- 9 files changed, 223 insertions(+), 20 deletions(-) diff --git a/Makefile b/Makefile index bfb79939..8258503f 100644 --- a/Makefile +++ b/Makefile @@ -3,6 +3,10 @@ .DEFAULT_GOAL := help +DASHBOARD_CONFIG_PATH ?= repo_health_dashboard/console_dashboard_config.yaml +REPO_HEALTH_DATA_PATH ?= ../repo-health-data +SQLITE_FILE_PATH = $(REPO_HEALTH_DATA_PATH)/dashboards/dashboard.sqlite3 + # For opening files in a browser. Use like: $(BROWSER)relative/path/to/file.html BROWSER := python -m webbrowser file://$(CURDIR)/ @@ -10,6 +14,20 @@ help: ## display this help message @echo "Please use \`make ' where is one of" @awk -F ':.*?## ' '/^[a-zA-Z]/ && NF==2 {printf "\033[36m %-25s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST) | sort +generate_sqlite: ## generate the repo health SQLite data file from a local checkout of the repo-health-data repository + rm -f $(REPO_HEALTH_DATA_PATH)/dashboards/dashboard.sqlite3 + repo_health_dashboard --data-dir $(REPO_HEALTH_DATA_PATH)/individual_repo_data --configuration "repo_health_dashboard/configuration.yaml" --data-life-time=30 --output-sqlite "${REPO_HEALTH_DATA_PATH}/dashboards/dashboard" + +streamlit: ## launch the repo health dashboard as a Streamlit app in a web browser window + streamlit run scripts/streamlit_dashboard.py $(SQLITE_FILE_PATH) $(DASHBOARD_CONFIG_PATH) + +console: ## display the console health dashboard, filter to specific squad(s) like 'make squad="arbi-bom fed-bom" console' +ifdef squad + @python scripts/console_dashboard.py $(SQLITE_FILE_PATH) --configuration=$(DASHBOARD_CONFIG_PATH) --squad='$(squad)' +else + @python scripts/console_dashboard.py $(SQLITE_FILE_PATH) --configuration=$(DASHBOARD_CONFIG_PATH) +endif + clean: ## remove generated byte code, coverage reports, and build artifacts find . -name '__pycache__' -exec rm -rf {} + find . -name '*.pyc' -exec rm -f {} + @@ -19,7 +37,6 @@ clean: ## remove generated byte code, coverage reports, and build artifacts rm -fr dist/ rm -fr *.egg-info - docs: ## generate Sphinx HTML documentation, including API docs tox -v -e docs $(BROWSER)docs/_build/html/index.html @@ -50,6 +67,7 @@ requirements: ## install development environment requirements pip install -qr requirements/pip.txt pip install -qr requirements/pip-tools.txt pip-sync requirements/dev.txt requirements/private.* + pip install -e . test: clean ## run tests in the current virtualenv tox diff --git a/repo_health_dashboard/console_dashboard_config.yaml b/repo_health_dashboard/console_dashboard_config.yaml index 7c5a436a..07c747d1 100644 --- a/repo_health_dashboard/console_dashboard_config.yaml +++ b/repo_health_dashboard/console_dashboard_config.yaml @@ -23,14 +23,30 @@ tables: sql: SELECT repo_name, ownership_squad FROM dashboard_main WHERE exists__travis_yml = 1 description: We no longer use this service, the .travis.yml file can be safely deleted. If the repo was recently added (perhaps a fork) and actually uses Travis CI, please switch it to use GitHub Actions instead; Travis CI is poorly maintained and has had multiple security incidents leaking private credentials. +- title: Not Checking for GitHub Action Version Updates + sql: SELECT repo_name, ownership_squad FROM dashboard_main WHERE dependabot_has_ecosystem_github_actions = 0 + description: Dependabot should be configured to auto-generate pull requests to upgrade the GitHub Actions in use so security patches can be applied promptly. + - title: Still Use nose sql: SELECT repo_name, ownership_squad FROM dashboard_main WHERE requires_nose = 1 description: This test runner was abandoned several years ago, it should be replaced with pytest. +- title: Still Using Dependabot to Create JavaScript Dependency Upgrade PRs + sql: SELECT repo_name, ownership_squad FROM dashboard_main WHERE dependabot_has_ecosystem_npm = 1 + description: Renovate is better suited to fulfill this need, and should be configured if that has not already been done. If Renovate is already in use, then the extra PRs from Dependabot are redundant and should be turned off. + +- title: Still Using Dependabot to Create Python Dependency Upgrade PRs + sql: SELECT repo_name, ownership_squad FROM dashboard_main WHERE dependabot_has_ecosystem_pip = 1 + description: The upgrade-python-requirements shared GitHub Actions workflow is better suited to fulfill this need, and should be configured if that has not already been done. If that is already in use, then the extra PRs from Dependabot are redundant and should be turned off. + - title: Missing Python 3.8 Classifier sql: SELECT repo_name, ownership_squad FROM dashboard_main WHERE setup_py_pypi_name IS NOT NULL AND setup_py_pypi_name != '' AND setup_py_py38_classifiers = 0 description: All our packages should work with Python 3.8, and setup.py should declare this explicitly when true to inform users and aid in future upgrades. +- title: Still Have Some Dependencies Not Tested With Django 3.2 + sql: SELECT repo_name, ownership_squad, django_packages_total_count - django_packages_upgraded_count FROM dashboard_main WHERE django_packages_total_count > django_packages_upgraded_count + description: The repository still seems to use some Python package versions which depend on Django but don't officially support Django 3.2 yet. They may in fact be working ok, but this is often a sign of poor maintenance and may cause problems with future upgrades. Work should be scheduled to either upgrade to newer releases, fix the upstream packages, or pursue removal of these dependencies as per https://openedx.atlassian.net/wiki/spaces/AC/pages/3036972032/Handling+Outdated+Dependencies . + # Use shorter and/or more readable column titles for these check names aliases: dependabot_alerts_critical_severity: Critical @@ -40,5 +56,6 @@ aliases: dependabot_alerts_total_count: Total dependabot_alerts_incomplete_results: Incomplete? dependencies_github_list: List of packages + "django_packages_total_count - django_packages_upgraded_count": Problematic dependency count repo_name: Repository ownership_squad: Squad diff --git a/requirements/constraints.txt b/requirements/constraints.txt index 491ee8b7..bc86ad01 100644 --- a/requirements/constraints.txt +++ b/requirements/constraints.txt @@ -12,6 +12,9 @@ # This file contains all common constraints for edx-repos -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt +# Streamlit started declaring this constraint until newer versions can be tested, needed to use Streamlit 1.29.0 +importlib-metadata<7 + # greater version breaking test. packaging==21.3 diff --git a/requirements/dev.in b/requirements/dev.in index e30b263d..3145bc91 100644 --- a/requirements/dev.in +++ b/requirements/dev.in @@ -3,7 +3,9 @@ -r pip-tools.txt # pip-tools and its dependencies, for managing requirements files -r quality.txt # Core and quality check dependencies --r ci.txt # tox and related dependencies +-r ci.txt # tox and related dependencies diff-cover # Changeset diff test coverage rich # Fancy formatting for console dashboards +streamlit # Library used to build the interactive web dashboard +streamlit-aggrid # Streamlit extension for the AgGrid data grid widget diff --git a/requirements/dev.txt b/requirements/dev.txt index efa957d9..15f05695 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -13,6 +13,8 @@ aiosignal==1.3.1 # via # -r requirements/quality.txt # aiohttp +altair==5.2.0 + # via streamlit asgiref==3.7.2 # via # -r requirements/quality.txt @@ -30,6 +32,12 @@ attrs==23.1.0 # via # -r requirements/quality.txt # aiohttp + # jsonschema + # referencing +backports-zoneinfo==0.2.1 + # via tzlocal +blinker==1.7.0 + # via streamlit build==1.0.3 # via # -r requirements/pip-tools.txt @@ -39,6 +47,7 @@ cachetools==5.3.2 # -r requirements/ci.txt # -r requirements/quality.txt # google-auth + # streamlit # tox certifi==2023.11.17 # via @@ -61,6 +70,7 @@ click==8.1.7 # code-annotations # edx-lint # pip-tools + # streamlit click-log==0.4.0 # via # -r requirements/quality.txt @@ -120,6 +130,7 @@ gitpython==3.1.40 # via # -r requirements/quality.txt # pytest-repo-health + # streamlit google-auth==2.25.2 # via # -r requirements/quality.txt @@ -138,10 +149,16 @@ idna==3.6 # -r requirements/quality.txt # requests # yarl -importlib-metadata==7.0.0 +importlib-metadata==6.11.0 # via + # -c requirements/constraints.txt # -r requirements/pip-tools.txt # build + # streamlit +importlib-resources==6.1.1 + # via + # jsonschema + # jsonschema-specifications iniconfig==2.0.0 # via # -r requirements/quality.txt @@ -153,8 +170,14 @@ isort==5.13.2 jinja2==3.1.2 # via # -r requirements/quality.txt + # altair # code-annotations # diff-cover + # pydeck +jsonschema==4.20.0 + # via altair +jsonschema-specifications==2023.11.2 + # via jsonschema markdown-it-py==3.0.0 # via rich markupsafe==2.1.3 @@ -172,6 +195,13 @@ multidict==6.0.4 # -r requirements/quality.txt # aiohttp # yarl +numpy==1.24.4 + # via + # altair + # pandas + # pyarrow + # pydeck + # streamlit oauthlib==3.2.2 # via # -r requirements/quality.txt @@ -182,16 +212,27 @@ packaging==21.3 # -r requirements/ci.txt # -r requirements/pip-tools.txt # -r requirements/quality.txt + # altair # build # pyproject-api # pytest + # streamlit # tox +pandas==2.0.3 + # via + # altair + # streamlit + # streamlit-aggrid pbr==6.0.0 # via # -r requirements/quality.txt # stevedore +pillow==10.1.0 + # via streamlit pip-tools==7.3.0 # via -r requirements/pip-tools.txt +pkgutil-resolve-name==1.3.10 + # via jsonschema platformdirs==4.1.0 # via # -r requirements/ci.txt @@ -206,6 +247,10 @@ pluggy==1.3.0 # diff-cover # pytest # tox +protobuf==4.25.1 + # via streamlit +pyarrow==14.0.2 + # via streamlit pyasn1==0.5.1 # via # -r requirements/quality.txt @@ -217,6 +262,8 @@ pyasn1-modules==0.3.0 # google-auth pycodestyle==2.11.1 # via -r requirements/quality.txt +pydeck==0.8.1b0 + # via streamlit pydocstyle==6.3.0 # via -r requirements/quality.txt pygments==2.17.2 @@ -276,6 +323,12 @@ pytest-cov==4.1.0 # via -r requirements/quality.txt pytest-repo-health==3.0.2 # via -r requirements/quality.txt +python-dateutil==2.8.2 + # via + # pandas + # streamlit +python-decouple==3.8 + # via streamlit-aggrid python-slugify==8.0.1 # via # -r requirements/quality.txt @@ -284,17 +337,23 @@ pytz==2023.3.post1 # via # -r requirements/quality.txt # django + # pandas pyyaml==6.0.1 # via # -r requirements/quality.txt # code-annotations # pytest-repo-health # responses +referencing==0.32.0 + # via + # jsonschema + # jsonschema-specifications requests==2.31.0 # via # -r requirements/quality.txt # requests-oauthlib # responses + # streamlit requests-oauthlib==1.3.1 # via # -r requirements/quality.txt @@ -302,7 +361,13 @@ requests-oauthlib==1.3.1 responses==0.24.1 # via -r requirements/quality.txt rich==13.7.0 - # via -r requirements/dev.in + # via + # -r requirements/dev.in + # streamlit +rpds-py==0.15.2 + # via + # jsonschema + # referencing rsa==4.9 # via # -r requirements/quality.txt @@ -311,6 +376,7 @@ six==1.16.0 # via # -r requirements/quality.txt # edx-lint + # python-dateutil smmap==5.0.1 # via # -r requirements/quality.txt @@ -327,12 +393,22 @@ stevedore==5.1.0 # via # -r requirements/quality.txt # code-annotations +streamlit==1.29.0 + # via + # -r requirements/dev.in + # streamlit-aggrid +streamlit-aggrid==0.3.4.post3 + # via -r requirements/dev.in +tenacity==8.2.3 + # via streamlit text-unidecode==1.3 # via # -r requirements/quality.txt # python-slugify toml==0.10.2 - # via -r requirements/quality.txt + # via + # -r requirements/quality.txt + # streamlit tomli==2.0.1 # via # -r requirements/ci.txt @@ -350,20 +426,32 @@ tomlkit==0.12.3 # via # -r requirements/quality.txt # pylint +toolz==0.12.0 + # via altair +tornado==6.4 + # via streamlit tox==4.0.0 # via -r requirements/ci.txt typing-extensions==4.9.0 # via # -r requirements/quality.txt + # altair # asgiref # astroid # pylint # rich + # streamlit +tzdata==2023.3 + # via pandas +tzlocal==5.2 + # via streamlit urllib3==2.1.0 # via # -r requirements/quality.txt # requests # responses +validators==0.22.0 + # via streamlit virtualenv==20.25.0 # via # -r requirements/ci.txt @@ -380,6 +468,7 @@ zipp==3.17.0 # via # -r requirements/pip-tools.txt # importlib-metadata + # importlib-resources # The following packages are considered to be unsafe in a requirements file: # pip diff --git a/requirements/doc.txt b/requirements/doc.txt index 04536e35..81d47ef7 100644 --- a/requirements/doc.txt +++ b/requirements/doc.txt @@ -98,8 +98,10 @@ idna==3.6 # yarl imagesize==1.4.1 # via sphinx -importlib-metadata==7.0.0 - # via sphinx +importlib-metadata==6.11.0 + # via + # -c requirements/constraints.txt + # sphinx iniconfig==2.0.0 # via # -r requirements/test.txt diff --git a/requirements/pip-tools.txt b/requirements/pip-tools.txt index f5f06089..1ef147cf 100644 --- a/requirements/pip-tools.txt +++ b/requirements/pip-tools.txt @@ -8,8 +8,10 @@ build==1.0.3 # via pip-tools click==8.1.7 # via pip-tools -importlib-metadata==7.0.0 - # via build +importlib-metadata==6.11.0 + # via + # -c requirements/constraints.txt + # build packaging==21.3 # via # -c requirements/constraints.txt diff --git a/scripts/console_dashboard.py b/scripts/console_dashboard.py index a6779fa4..ebcd54c6 100755 --- a/scripts/console_dashboard.py +++ b/scripts/console_dashboard.py @@ -36,7 +36,8 @@ def print_table(console: Console, title: str, cursor: sqlite3.Cursor, descriptio name = aliases.get(name, name) table.add_column(name) for row in cursor.fetchall(): - table.add_row(*row) + cells = [str(cell) for cell in row] + table.add_row(*cells) if table.row_count > 0: print("") console.print(table) diff --git a/scripts/streamlit_dashboard.py b/scripts/streamlit_dashboard.py index 3e597d04..c24ba94c 100644 --- a/scripts/streamlit_dashboard.py +++ b/scripts/streamlit_dashboard.py @@ -1,26 +1,95 @@ """ -Launch the main data table of a repo health data SQLite file in a web browser. -You'll need to pip install streamlit-aggrid, as it hasn't been added to the -package dependencies yet (and won't be until this experiment is validated a -little further). +Launch the repo health dashboard in a web browser. Don't run this script directly, instead run: -streamlit run scripts/streamlit_dashboard.py -- path_to_data.sqlite3 +streamlit run scripts/streamlit_dashboard.py [path_to_data.sqlite3] [path_to_config.yaml] """ import sqlite3 import sys +from pathlib import Path import pandas as pd import streamlit as st +import yaml from st_aggrid import AgGrid, GridOptionsBuilder -if len(sys.argv) < 2: - print("Please pass the path to your repo health SQLite data file as an argument") - sys.exit(1) -data_path = sys.argv[1] +REPO_ROOT = Path(__file__).parent.parent +DEFAULT_CONFIG_PATH = REPO_ROOT / "repo_health_dashboard" / "console_dashboard_config.yaml" +DEFAULT_DATA_PATH = REPO_ROOT.parent / "repo-health-data" / "dashboards" / "dashboard.sqlite3" + +def prepare_query(sql: str, squads: "list[str]") -> str: + """ + Adjust the provided base SQL query to sort first by squad and then by repo + name, and to support filtering by squad(s). + """ + if squads: + squads_string = "', '".join(squads) + sql += f" AND ownership_squad IN ('{squads_string}')" + sql += " ORDER BY ownership_squad, repo_name" + return sql + + +def add_table(title: str, df: pd.DataFrame, description: str, aliases: "list[str]") -> None: + """ + Add a table to the dashboard; omit it if there are no rows to display in it. + """ + column_config = {} + for name in df.columns: + alias = aliases.get(name, name) + column_config[name] = alias + if df.size > 0: + st.subheader(title) + st.dataframe( + df, + column_config=column_config, + hide_index=True, + ) + st.write(description) + + +# Load the data file +if len(sys.argv) > 1: + data_path = sys.argv[1] +else: + data_path = DEFAULT_DATA_PATH conn = sqlite3.connect(data_path) -df = pd.read_sql("SELECT * FROM dashboard_main", conn) + +# Get the list of known squads +cursor = conn.execute("SELECT DISTINCT(ownership_squad) FROM dashboard_main ORDER BY ownership_squad") +squad_options = [str(row[0]) for row in cursor.fetchall()] + +# Configure dashboard-wide settings st.set_page_config(layout="wide") +cursor = conn.execute("SELECT DISTINCT(TIMESTAMP) FROM dashboard_main ORDER BY TIMESTAMP") +data_date = cursor.fetchall()[0][0] +st.title(f"Repo Health Dashboard (As of {data_date})") +squads = st.multiselect("Squads", squad_options) + +# Load the dashboard configuration (prioritized checks and the reasons action is needed) +if len(sys.argv) > 2: + config_path = sys.argv[2] +else: + config_path = DEFAULT_CONFIG_PATH +with open(config_path, "r") as f: + config = yaml.safe_load(f) +tables = config["tables"] +aliases = config.get("aliases", []) + +# Add the configured checks to the dashboard +for table in tables: + title = table["title"] + sql = table["sql"] + description = table["description"] + df = pd.read_sql(prepare_query(sql, squads), conn) + add_table(title, df, description, aliases) + +# Add a raw health check data table, mainly as an aid in updating the dashboard configuration +st.subheader("All Health Data (For Selected Squads)") +st.write("This raw data dump is normally of limited use, but can be very handy when updating the dashboard configuration because it lists all of the column names and value formats in the main data table.") +if squads: + df = pd.read_sql(f"SELECT * FROM dashboard_main WHERE ownership_squad IN ({','.join('?' for _ in squads)})", conn, params=squads) +else: + df = pd.read_sql("SELECT * FROM dashboard_main", conn) builder = GridOptionsBuilder.from_dataframe(df) builder.configure_side_bar() options = builder.build()